diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 5789b606a..7d351686f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -4,6 +4,7 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -17,22 +18,14 @@ import java.util.Vector; * according to the wishes of the supplid ClippingAlgorithm enum. */ public class ClippingOp { - public final ClippingType type; public final int start, stop; // inclusive - public final Object extraInfo; public ClippingOp(int start, int stop) { - this(null, start, stop, null); - } - - public ClippingOp(ClippingType type, int start, int stop, Object extraInfo) { - // todo -- remove type and extra info - this.type = type; this.start = start; this.stop = stop; - this.extraInfo = extraInfo; } + public int getLength() { return stop - start + 1; } @@ -119,15 +112,6 @@ public class ClippingOp { return clippedRead; } - /** - * What is the type of a ClippingOp? - */ - public enum ClippingType { - LOW_Q_SCORES, - WITHIN_CLIP_RANGE, - MATCHES_CLIP_SEQ - } - /** * Given a cigar string, get the number of bases hard or soft clipped at the start */ @@ -196,7 +180,7 @@ public class ClippingOp { Vector newElements = new Vector(); for (CigarElement curElem : __cigar.getCigarElements()) { if (!curElem.getOperator().consumesReadBases()) { - if (curLength > __startClipEnd && curLength < __endClipBegin) { + if (curElem.getOperator() == CigarOperator.HARD_CLIP || curLength > __startClipEnd && curLength < __endClipBegin) { newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator())); } continue; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 988d297f6..1307f1b6f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -1,6 +1,10 @@ package org.broadinstitute.sting.utils.clipreads; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.jets3t.service.multi.ThreadedStorageService; import java.util.ArrayList; import java.util.List; @@ -43,6 +47,23 @@ public class ReadClipper { return read; } + public SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { + int start = ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart); + int stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); + this.addOp(new ClippingOp(start, stop)); + return clipRead(ClippingRepresentation.HARDCLIP_BASES); + } + + public SAMRecord hardClipByReadCoordinates(int start, int stop) { + this.addOp(new ClippingOp(start, stop)); + return clipRead(ClippingRepresentation.HARDCLIP_BASES); + } + + public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { + this.read = hardClipByReferenceCoordinates(read.getUnclippedStart(), left); + this.ops = null; // reset the operations + return hardClipByReferenceCoordinates(right, read.getUnclippedEnd()); + } /** * Return a new read corresponding to this.read that's been clipped according to ops, if any are present. diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 16a02abdc..3d803804a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -432,16 +432,34 @@ public class ReadUtils { keepEnd = rec.getReadLength() - l - 1; newCigarElements.add(new CigarElement(l, CigarOperator.HARD_CLIP)); break; - case H: - // TODO -- must be handled specially - throw new ReviewedStingException("BUG: tell mark he forgot to implement this"); + default: newCigarElements.add(ce); break; } } - return hardClipBases(rec, keepStart, keepEnd, newCigarElements); + // Merges tandem cigar elements like 5H10H or 2S5S to 15H or 7S + // this will happen if you soft clip a read that has been hard clipped before + // like: 5H20S => 5H20H + List mergedCigarElements = new LinkedList(); + Iterator cigarElementIterator = newCigarElements.iterator(); + CigarOperator currentOperator = null; + int currentOperatorLength = 0; + while (cigarElementIterator.hasNext()) { + CigarElement cigarElement = cigarElementIterator.next(); + if (currentOperator != cigarElement.getOperator()) { + if (currentOperator != null) + mergedCigarElements.add(new CigarElement(currentOperatorLength, currentOperator)); + currentOperator = cigarElement.getOperator(); + currentOperatorLength = cigarElement.getLength(); + } + else + currentOperatorLength += cigarElement.getLength(); + } + mergedCigarElements.add(new CigarElement(currentOperatorLength, currentOperator)); + + return hardClipBases(rec, keepStart, keepEnd, mergedCigarElements); } /** @@ -460,8 +478,7 @@ public class ReadUtils { "keepEnd < rec.getReadLength()", "rec.getReadUnmappedFlag() || newCigarElements != null"}) @Ensures("result != null") - public static SAMRecord hardClipBases(SAMRecord rec, int keepStart, int keepEnd, - List newCigarElements) { + public static SAMRecord hardClipBases(SAMRecord rec, int keepStart, int keepEnd, List newCigarElements) { int newLength = keepEnd - keepStart + 1; if ( newLength != rec.getReadLength() ) { try { @@ -633,7 +650,43 @@ public class ReadUtils { return ReadAndIntervalOverlap.RIGHT_OVERLAP; } + @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"}) + @Ensures({"result >= 0", "result < read.getReadLength()"}) + public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) { + int readBases = 0; + int refBases = 0; + int goal = refCoord - read.getUnclippedStart(); // read coords are 0-based! + boolean goalReached = false; + Iterator cigarElementIterator = read.getCigar().getCigarElements().iterator(); + while (!goalReached && cigarElementIterator.hasNext()) { + CigarElement cigarElement = cigarElementIterator.next(); + int shift = 0; + if (refBases == 0 && readBases == 0 && cigarElement.getOperator() == CigarOperator.HARD_CLIP) { + goal -= cigarElement.getLength(); + } + + if (cigarElement.getOperator().consumesReferenceBases()) { + if (refBases + cigarElement.getLength() < goal) { + shift = cigarElement.getLength(); + } + else { + shift = goal - refBases; + } + refBases += shift; + } + goalReached = refBases == goal; + + if (cigarElement.getOperator().consumesReadBases()) { + readBases += goalReached ? shift : cigarElement.getLength(); + } + } + + if (!goalReached) + throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); + + return readBases; + } }