Hard Clipping no longer leaves indels on the tails

The clipper could leave an insertion or deletion as the start or end of a read after hardclipping a read if the element adjacent to the clipping point was an indel. Fixed.
This commit is contained in:
Mauricio Carneiro 2011-08-29 01:51:57 -04:00
parent 943876c6eb
commit 6f9264d2b3
2 changed files with 84 additions and 23 deletions

View File

@ -5,15 +5,11 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.apache.poi.hssf.record.PageBreakRecord;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.PipedOutputStream;
import java.lang.reflect.Array;
import java.util.Iterator;
import java.util.List;
import java.util.Stack;
import java.util.Vector;
/**
@ -256,16 +252,18 @@ public class ClippingOp {
if (start == 0 && stop == read.getReadLength() -1)
return new SAMRecord(read.getHeader());
int newLength = read.getReadLength() - (stop - start + 1);
CigarShift cigarShift = hardClipCigar(read.getCigar(), start, stop);
// the cigar may force a shift left or right (or both) in case we are left with insertions
// starting or ending the read after applying the hard clip on start/stop.
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
byte [] newBases = new byte[newLength];
byte [] newQuals = new byte[newLength];
int copyStart = (start == 0) ? stop + 1 : 0;
int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
Cigar newCigar = hardClipCigar(read.getCigar(), start, stop);
SAMRecord hardClippedRead;
try {
hardClippedRead = (SAMRecord) read.clone();
@ -275,16 +273,16 @@ public class ClippingOp {
hardClippedRead.setBaseQualities(newQuals);
hardClippedRead.setReadBases(newBases);
hardClippedRead.setCigar(newCigar);
hardClippedRead.setCigar(cigarShift.cigar);
if (start == 0)
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), newCigar));
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
return hardClippedRead;
}
@Requires({"!cigar.isEmpty()"})
private Cigar hardClipCigar (Cigar cigar, int start, int stop) {
private CigarShift hardClipCigar (Cigar cigar, int start, int stop) {
Cigar newCigar = new Cigar();
int index = 0;
int totalHardClipCount = stop - start + 1;
@ -373,7 +371,59 @@ public class ClippingOp {
}
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
}
return newCigar;
return cleanHardClippedCigar(newCigar);
}
/**
* Checks if a hard clipped cigar left a read starting or ending with insertions/deletions
* and cleans it up accordingly.
*
* @param cigar
* @return
*/
private CigarShift cleanHardClippedCigar(Cigar cigar) {
Cigar cleanCigar = new Cigar();
int shiftFromStart = 0;
int shiftFromEnd = 0;
Stack<CigarElement> cigarStack = new Stack<CigarElement>();
Stack<CigarElement> inverseCigarStack = new Stack<CigarElement>();
for (CigarElement cigarElement : cigar.getCigarElements())
cigarStack.push(cigarElement);
for (int i = 1; i <= 2; i++) {
int shift = 0;
boolean readHasStarted = false;
while(!cigarStack.empty()) {
CigarElement cigarElement = cigarStack.pop();
if ( !readHasStarted &&
cigarElement.getOperator() != CigarOperator.INSERTION &&
cigarElement.getOperator() != CigarOperator.DELETION &&
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
readHasStarted = true;
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
shift += cigarElement.getLength();
if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
if (i==1)
inverseCigarStack.push(cigarElement);
else
cleanCigar.add(cigarElement);
}
}
// first pass (i=1) is from end to start of the cigar elements
if (i == 1) {
shiftFromEnd = shift;
cigarStack = inverseCigarStack;
}
// second pass (i=2) is from start to end with the end already cleaned
else {
shiftFromStart = shift;
}
}
return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd);
}
private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) {
@ -395,11 +445,9 @@ public class ClippingOp {
break;
}
return shift;
}
private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) {
if (cigarElement.getOperator() == CigarOperator.INSERTION) {
int cigarElementLength = cigarElement.getLength();
@ -414,4 +462,16 @@ public class ClippingOp {
return 0;
}
private class CigarShift {
private Cigar cigar;
private int shiftFromStart;
private int shiftFromEnd;
private CigarShift(Cigar cigar, int shiftFromStart, int shiftFromEnd) {
this.cigar = cigar;
this.shiftFromStart = shiftFromStart;
this.shiftFromEnd = shiftFromEnd;
}
}
}

View File

@ -1,12 +1,9 @@
package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broad.tribble.util.PositionalStream;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.jets3t.service.multi.ThreadedStorageService;
import java.util.ArrayList;
import java.util.List;
@ -68,7 +65,9 @@ public class ReadClipper {
//System.out.println("Clipping start/stop: " + start + "/" + stop);
this.addOp(new ClippingOp(start, stop));
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;
return clippedRead;
}
public SAMRecord hardClipByReadCoordinates(int start, int stop) {
@ -76,10 +75,12 @@ public class ReadClipper {
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
@Requires("left <= right")
public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
this.read = hardClipByReferenceCoordinates(-1, left);
this.ops = null; // reset the operations
return hardClipByReferenceCoordinates(right, -1);
if (left == right)
return new SAMRecord(read.getHeader());
this.read = hardClipByReferenceCoordinates(right, -1);
return hardClipByReferenceCoordinates(-1, left);
}
public SAMRecord hardClipLowQualEnds(byte lowQual) {