diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 0184c8413..faa3cf34e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -5,15 +5,11 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; -import org.apache.poi.hssf.record.PageBreakRecord; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.ReadUtils; -import java.io.PipedOutputStream; -import java.lang.reflect.Array; import java.util.Iterator; -import java.util.List; +import java.util.Stack; import java.util.Vector; /** @@ -256,16 +252,18 @@ public class ClippingOp { if (start == 0 && stop == read.getReadLength() -1) return new SAMRecord(read.getHeader()); - int newLength = read.getReadLength() - (stop - start + 1); + CigarShift cigarShift = hardClipCigar(read.getCigar(), start, stop); + + // the cigar may force a shift left or right (or both) in case we are left with insertions + // starting or ending the read after applying the hard clip on start/stop. + int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd; byte [] newBases = new byte[newLength]; byte [] newQuals = new byte[newLength]; - int copyStart = (start == 0) ? stop + 1 : 0; + int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart; System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength); System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); - Cigar newCigar = hardClipCigar(read.getCigar(), start, stop); - SAMRecord hardClippedRead; try { hardClippedRead = (SAMRecord) read.clone(); @@ -275,16 +273,16 @@ public class ClippingOp { hardClippedRead.setBaseQualities(newQuals); hardClippedRead.setReadBases(newBases); - hardClippedRead.setCigar(newCigar); + hardClippedRead.setCigar(cigarShift.cigar); if (start == 0) - hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), newCigar)); + hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar)); return hardClippedRead; } @Requires({"!cigar.isEmpty()"}) - private Cigar hardClipCigar (Cigar cigar, int start, int stop) { + private CigarShift hardClipCigar (Cigar cigar, int start, int stop) { Cigar newCigar = new Cigar(); int index = 0; int totalHardClipCount = stop - start + 1; @@ -373,7 +371,59 @@ public class ClippingOp { } newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); } - return newCigar; + return cleanHardClippedCigar(newCigar); + } + + /** + * Checks if a hard clipped cigar left a read starting or ending with insertions/deletions + * and cleans it up accordingly. + * + * @param cigar + * @return + */ + private CigarShift cleanHardClippedCigar(Cigar cigar) { + Cigar cleanCigar = new Cigar(); + int shiftFromStart = 0; + int shiftFromEnd = 0; + Stack cigarStack = new Stack(); + Stack inverseCigarStack = new Stack(); + + for (CigarElement cigarElement : cigar.getCigarElements()) + cigarStack.push(cigarElement); + + for (int i = 1; i <= 2; i++) { + int shift = 0; + boolean readHasStarted = false; + + while(!cigarStack.empty()) { + CigarElement cigarElement = cigarStack.pop(); + + if ( !readHasStarted && + cigarElement.getOperator() != CigarOperator.INSERTION && + cigarElement.getOperator() != CigarOperator.DELETION && + cigarElement.getOperator() != CigarOperator.HARD_CLIP) + readHasStarted = true; + else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION) + shift += cigarElement.getLength(); + + if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) { + if (i==1) + inverseCigarStack.push(cigarElement); + else + cleanCigar.add(cigarElement); + } + } + // first pass (i=1) is from end to start of the cigar elements + if (i == 1) { + shiftFromEnd = shift; + cigarStack = inverseCigarStack; + } + // second pass (i=2) is from start to end with the end already cleaned + else { + shiftFromStart = shift; + } + } + return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd); } private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { @@ -395,11 +445,9 @@ public class ClippingOp { break; } - return shift; } - private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) { if (cigarElement.getOperator() == CigarOperator.INSERTION) { int cigarElementLength = cigarElement.getLength(); @@ -414,4 +462,16 @@ public class ClippingOp { return 0; } + + private class CigarShift { + private Cigar cigar; + private int shiftFromStart; + private int shiftFromEnd; + + private CigarShift(Cigar cigar, int shiftFromStart, int shiftFromEnd) { + this.cigar = cigar; + this.shiftFromStart = shiftFromStart; + this.shiftFromEnd = shiftFromEnd; + } + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index d83cd8c0e..be045309a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -1,12 +1,9 @@ package org.broadinstitute.sting.utils.clipreads; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; +import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; -import org.broad.tribble.util.PositionalStream; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.jets3t.service.multi.ThreadedStorageService; import java.util.ArrayList; import java.util.List; @@ -68,7 +65,9 @@ public class ReadClipper { //System.out.println("Clipping start/stop: " + start + "/" + stop); this.addOp(new ClippingOp(start, stop)); - return clipRead(ClippingRepresentation.HARDCLIP_BASES); + SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); + this.ops = null; + return clippedRead; } public SAMRecord hardClipByReadCoordinates(int start, int stop) { @@ -76,10 +75,12 @@ public class ReadClipper { return clipRead(ClippingRepresentation.HARDCLIP_BASES); } + @Requires("left <= right") public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { - this.read = hardClipByReferenceCoordinates(-1, left); - this.ops = null; // reset the operations - return hardClipByReferenceCoordinates(right, -1); + if (left == right) + return new SAMRecord(read.getHeader()); + this.read = hardClipByReferenceCoordinates(right, -1); + return hardClipByReferenceCoordinates(-1, left); } public SAMRecord hardClipLowQualEnds(byte lowQual) {