Hard Clipping no longer leaves indels on the tails

The clipper could leave an insertion or deletion as the start or end of a read after hardclipping a read if the element adjacent to the clipping point was an indel. Fixed.
This commit is contained in:
Mauricio Carneiro 2011-08-29 01:51:57 -04:00
parent 943876c6eb
commit 6f9264d2b3
2 changed files with 84 additions and 23 deletions

View File

@ -5,15 +5,11 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement; import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.apache.poi.hssf.record.PageBreakRecord;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.PipedOutputStream;
import java.lang.reflect.Array;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.Stack;
import java.util.Vector; import java.util.Vector;
/** /**
@ -256,16 +252,18 @@ public class ClippingOp {
if (start == 0 && stop == read.getReadLength() -1) if (start == 0 && stop == read.getReadLength() -1)
return new SAMRecord(read.getHeader()); return new SAMRecord(read.getHeader());
int newLength = read.getReadLength() - (stop - start + 1); CigarShift cigarShift = hardClipCigar(read.getCigar(), start, stop);
// the cigar may force a shift left or right (or both) in case we are left with insertions
// starting or ending the read after applying the hard clip on start/stop.
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
byte [] newBases = new byte[newLength]; byte [] newBases = new byte[newLength];
byte [] newQuals = new byte[newLength]; byte [] newQuals = new byte[newLength];
int copyStart = (start == 0) ? stop + 1 : 0; int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength); System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
Cigar newCigar = hardClipCigar(read.getCigar(), start, stop);
SAMRecord hardClippedRead; SAMRecord hardClippedRead;
try { try {
hardClippedRead = (SAMRecord) read.clone(); hardClippedRead = (SAMRecord) read.clone();
@ -275,16 +273,16 @@ public class ClippingOp {
hardClippedRead.setBaseQualities(newQuals); hardClippedRead.setBaseQualities(newQuals);
hardClippedRead.setReadBases(newBases); hardClippedRead.setReadBases(newBases);
hardClippedRead.setCigar(newCigar); hardClippedRead.setCigar(cigarShift.cigar);
if (start == 0) if (start == 0)
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), newCigar)); hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
return hardClippedRead; return hardClippedRead;
} }
@Requires({"!cigar.isEmpty()"}) @Requires({"!cigar.isEmpty()"})
private Cigar hardClipCigar (Cigar cigar, int start, int stop) { private CigarShift hardClipCigar (Cigar cigar, int start, int stop) {
Cigar newCigar = new Cigar(); Cigar newCigar = new Cigar();
int index = 0; int index = 0;
int totalHardClipCount = stop - start + 1; int totalHardClipCount = stop - start + 1;
@ -373,7 +371,59 @@ public class ClippingOp {
} }
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
} }
return newCigar; return cleanHardClippedCigar(newCigar);
}
/**
* Checks if a hard clipped cigar left a read starting or ending with insertions/deletions
* and cleans it up accordingly.
*
* @param cigar
* @return
*/
private CigarShift cleanHardClippedCigar(Cigar cigar) {
Cigar cleanCigar = new Cigar();
int shiftFromStart = 0;
int shiftFromEnd = 0;
Stack<CigarElement> cigarStack = new Stack<CigarElement>();
Stack<CigarElement> inverseCigarStack = new Stack<CigarElement>();
for (CigarElement cigarElement : cigar.getCigarElements())
cigarStack.push(cigarElement);
for (int i = 1; i <= 2; i++) {
int shift = 0;
boolean readHasStarted = false;
while(!cigarStack.empty()) {
CigarElement cigarElement = cigarStack.pop();
if ( !readHasStarted &&
cigarElement.getOperator() != CigarOperator.INSERTION &&
cigarElement.getOperator() != CigarOperator.DELETION &&
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
readHasStarted = true;
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
shift += cigarElement.getLength();
if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
if (i==1)
inverseCigarStack.push(cigarElement);
else
cleanCigar.add(cigarElement);
}
}
// first pass (i=1) is from end to start of the cigar elements
if (i == 1) {
shiftFromEnd = shift;
cigarStack = inverseCigarStack;
}
// second pass (i=2) is from start to end with the end already cleaned
else {
shiftFromStart = shift;
}
}
return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd);
} }
private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) {
@ -395,11 +445,9 @@ public class ClippingOp {
break; break;
} }
return shift; return shift;
} }
private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) { private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) {
if (cigarElement.getOperator() == CigarOperator.INSERTION) { if (cigarElement.getOperator() == CigarOperator.INSERTION) {
int cigarElementLength = cigarElement.getLength(); int cigarElementLength = cigarElement.getLength();
@ -414,4 +462,16 @@ public class ClippingOp {
return 0; return 0;
} }
private class CigarShift {
private Cigar cigar;
private int shiftFromStart;
private int shiftFromEnd;
private CigarShift(Cigar cigar, int shiftFromStart, int shiftFromEnd) {
this.cigar = cigar;
this.shiftFromStart = shiftFromStart;
this.shiftFromEnd = shiftFromEnd;
}
}
} }

View File

@ -1,12 +1,9 @@
package org.broadinstitute.sting.utils.clipreads; package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.Cigar; import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broad.tribble.util.PositionalStream;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.jets3t.service.multi.ThreadedStorageService;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -68,7 +65,9 @@ public class ReadClipper {
//System.out.println("Clipping start/stop: " + start + "/" + stop); //System.out.println("Clipping start/stop: " + start + "/" + stop);
this.addOp(new ClippingOp(start, stop)); this.addOp(new ClippingOp(start, stop));
return clipRead(ClippingRepresentation.HARDCLIP_BASES); SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;
return clippedRead;
} }
public SAMRecord hardClipByReadCoordinates(int start, int stop) { public SAMRecord hardClipByReadCoordinates(int start, int stop) {
@ -76,10 +75,12 @@ public class ReadClipper {
return clipRead(ClippingRepresentation.HARDCLIP_BASES); return clipRead(ClippingRepresentation.HARDCLIP_BASES);
} }
@Requires("left <= right")
public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
this.read = hardClipByReferenceCoordinates(-1, left); if (left == right)
this.ops = null; // reset the operations return new SAMRecord(read.getHeader());
return hardClipByReferenceCoordinates(right, -1); this.read = hardClipByReferenceCoordinates(right, -1);
return hardClipByReferenceCoordinates(-1, left);
} }
public SAMRecord hardClipLowQualEnds(byte lowQual) { public SAMRecord hardClipLowQualEnds(byte lowQual) {