diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 81d00d9d7..be7dc52f7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -249,7 +249,7 @@ public class ClippingOp { @Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"}) private SAMRecord hardClip (SAMRecord read, int start, int stop) { - if (start == 0 && stop == read.getReadLength() -1) + if (start == 0 && stop == read.getReadLength() - 1) return new SAMRecord(read.getHeader()); // If the read is unmapped there is no Cigar string and neither should we create a new cigar string diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 3b83617cf..83db20238 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -4,6 +4,7 @@ import com.google.java.contract.Requires; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -68,25 +69,15 @@ public class ReadClipper { } private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { - int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart); - int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); + int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); + int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); - if ( start > stop ) { -// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read)); + if ( start > stop ) throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); - } - //This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into - //an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read -// stop -= numDeletions(read); -// if ( start > stop ) -// start -= numDeletions(read); - - - //System.out.println("Clipping start/stop: " + start + "/" + stop); this.addOp(new ClippingOp(start, stop)); SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); this.ops = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java new file mode 100644 index 000000000..02512c8dc --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java @@ -0,0 +1,46 @@ +package org.broadinstitute.sting.utils.sam; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import net.sf.samtools.SAMRecord; + +import java.util.Comparator; + +public class AlignmentStartWithNoTiesComparator implements Comparator { + @Requires("c1 >= 0 && c2 >= 0") + @Ensures("result == 0 || result == 1 || result == -1") + private int compareContigs(int c1, int c2) { + if (c1 == c2) + return 0; + else if (c1 > c2) + return 1; + return -1; + } + + @Requires("r1 != null && r2 != null") + @Ensures("result == 0 || result == 1 || result == -1") + public int compare(SAMRecord r1, SAMRecord r2) { + int result; + + if (r1 == r2) + result = 0; + + else if (r1.getReadUnmappedFlag()) + result = 1; + else if (r2.getReadUnmappedFlag()) + result = -1; + else { + final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex()); + + if (cmpContig != 0) + result = cmpContig; + + else { + if (r1.getAlignmentStart() < r2.getAlignmentStart()) result = -1; + else result = 1; + } + } + + return result; + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 8beb1b21a..5fa410922 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -780,11 +780,56 @@ public class ReadUtils { } + public enum ClippingTail { + LEFT_TAIL, + RIGHT_TAIL + } + + /** + * Pre-processes the results of getReadCoordinateForReferenceCoordinate(SAMRecord, int) in case it falls in + * a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns + * the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the + * deletion. + * + * @param read + * @param refCoord + * @param tail + * @return the read coordinate corresponding to the requested reference coordinate for clipping. + */ @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"}) @Ensures({"result >= 0", "result < read.getReadLength()"}) - public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) { + public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord, ClippingTail tail) { + Pair result = getReadCoordinateForReferenceCoordinate(read, refCoord); + int readCoord = result.getFirst(); + + if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL) + readCoord++; + + return readCoord; + } + + /** + * Returns the read coordinate corresponding to the requested reference coordinate. + * + * WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function + * will return the last read base before the deletion. This function returns a + * Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with + * a deletion. + * + * SUGGESTION: Use getReadCoordinateForReferenceCoordinate(SAMRecord, int, ClippingTail) instead to get a + * pre-processed result according to normal clipping needs. Or you can use this function and tailor the + * behavior to your needs. + * + * @param read + * @param refCoord + * @return the read coordinate corresponding to the requested reference coordinate. (see warning!) + */ + @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"}) + @Ensures({"result >= 0", "result < read.getReadLength()"}) + public static Pair getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) { int readBases = 0; int refBases = 0; + boolean fallsInsideDeletion = false; if (refCoord < read.getAlignmentStart()) { readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord); @@ -806,26 +851,56 @@ public class ReadUtils { int shift = 0; if (cigarElement.getOperator().consumesReferenceBases()) { - if (refBases + cigarElement.getLength() < goal) { + if (refBases + cigarElement.getLength() < goal) shift = cigarElement.getLength(); - } - else { + else shift = goal - refBases; - } + refBases += shift; } goalReached = refBases == goal; - if (cigarElement.getOperator().consumesReadBases()) { - readBases += goalReached ? shift : cigarElement.getLength(); - } + if (!goalReached && cigarElement.getOperator().consumesReadBases()) + readBases += cigarElement.getLength(); + + if (goalReached) { + // Is this base's reference position within this cigar element? Or did we use it all? + boolean endsWithinCigar = shift < cigarElement.getLength(); + + // If it isn't, we need to check the next one. There should *ALWAYS* be a next one + // since we checked if the goal coordinate is within the read length, so this is just a sanity check. + if (!endsWithinCigar && !cigarElementIterator.hasNext()) + throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio"); + + CigarElement nextCigarElement; + + // if we end inside the current cigar element, we just have to check if it is a deletion + if (endsWithinCigar) + fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION; + + // if we end outside the current cigar element, we need to check if the next element is a deletion. + else { + nextCigarElement = cigarElementIterator.next(); + fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION; + } + + // If we reached our goal outside a deletion, add the shift + if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases()) + readBases += shift; + + // If we reached our goal inside a deletion, but the deletion is the next cigar element then we need + // to add the shift of the current cigar element but go back to it's last element to return the last + // base before the deletion (see warning in function contracts) + else if (fallsInsideDeletion && !endsWithinCigar) + readBases += shift - 1; + } } if (!goalReached) throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); } - return readBases; + return new Pair(readBases, fallsInsideDeletion); } public static SAMRecord unclipSoftClippedBases(SAMRecord rec) { @@ -871,4 +946,19 @@ public class ReadUtils { return rec; } + + /** + * Compares two SAMRecords only the basis on alignment start. Note that + * comparisons are performed ONLY on the basis of alignment start; any + * two SAM records with the same alignment start will be considered equal. + * + * Unmapped alignments will all be considered equal. + */ + + @Requires({"read1 != null", "read2 != null"}) + @Ensures("result == 0 || result == 1 || result == -1") + public static int compareSAMRecords(SAMRecord read1, SAMRecord read2) { + AlignmentStartComparator comp = new AlignmentStartComparator(); + return comp.compare(read1, read2); + } }