From 7532be7f5ae13bd1e4cea7295d1938f66d6ef13a Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sun, 28 Aug 2011 16:06:41 -0400 Subject: [PATCH] Allowing to clip after AlignmentEnd if end is soft clipped. Read clipper now identifies and clips even if the requested coordinate is outside the alignment but the read contains soft clipped bases in that region. --- .../sting/utils/sam/ReadUtils.java | 113 ++++++++++++------ 1 file changed, 78 insertions(+), 35 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 6be37cf52..4a7cec2fd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -632,7 +632,7 @@ public class ReadUtils { public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) { int start = getRefCoordSoftUnclippedStart(read); - int stop = getRefCoordSoftUnclippedStop(read); + int stop = getRefCoordSoftUnclippedEnd(read); if ( !read.getReferenceName().equals(interval.getContig()) ) return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG; @@ -658,6 +658,7 @@ public class ReadUtils { return ReadAndIntervalOverlap.OVERLAP_RIGHT; } + @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"}) public static int getRefCoordSoftUnclippedStart(SAMRecord read) { int start = read.getUnclippedStart(); for (CigarElement cigarElement : read.getCigar().getCigarElements()) { @@ -669,51 +670,93 @@ public class ReadUtils { return start; } - public static int getRefCoordSoftUnclippedStop(SAMRecord read) { - int stop = read.getAlignmentEnd(); - List cigarElementList = read.getCigar().getCigarElements(); - CigarElement lastCigarElement = cigarElementList.get(cigarElementList.size()-1); - if (lastCigarElement.getOperator() == CigarOperator.SOFT_CLIP) - stop += lastCigarElement.getLength(); - return stop; + @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"}) + public static int getRefCoordSoftUnclippedEnd(SAMRecord read) { + int stop = read.getUnclippedStart(); + int shift = 0; + for (CigarElement cigarElement : read.getCigar().getCigarElements()) + if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) + shift += cigarElement.getLength(); + + return (shift == 0) ? stop : stop+shift-1; + } + + /** + * Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before + * the alignment start of the read. + * + * @param read + * @param refCoord + * @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before) + */ + @Requires({"refCoord >= read.getUnclippedStart()", "refCoord < read.getAlignmentStart()"}) + private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(SAMRecord read, int refCoord) { + if (getRefCoordSoftUnclippedStart(read) <= refCoord) + return refCoord - getRefCoordSoftUnclippedStart(read) + 1; + return -1; } + /** + * Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region after + * the alignment end of the read. + * + * @param read + * @param refCoord + * @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before) + */ + @Requires({"refCoord <= read.getUnclippedEnd()", "refCoord > read.getAlignmentEnd()"}) + private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(SAMRecord read, int refCoord) { + if (getRefCoordSoftUnclippedEnd(read) > refCoord) + return refCoord - read.getAlignmentEnd() + 1; + return -1; + } - @Requires({"refCoord >= read.getAlignmentStart()", "refCoord <= read.getAlignmentEnd()"}) + + @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"}) @Ensures({"result >= 0", "result < read.getReadLength()"}) public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) { int readBases = 0; int refBases = 0; - int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases - boolean goalReached = refBases == goal; - Iterator cigarElementIterator = read.getCigar().getCigarElements().iterator(); - while (!goalReached && cigarElementIterator.hasNext()) { - CigarElement cigarElement = cigarElementIterator.next(); - int shift = 0; - //if (refBases == 0 && readBases == 0 && cigarElement.getOperator() == CigarOperator.HARD_CLIP) { - // goal -= cigarElement.getLength(); - //} - - if (cigarElement.getOperator().consumesReferenceBases()) { - if (refBases + cigarElement.getLength() < goal) { - shift = cigarElement.getLength(); - } - else { - shift = goal - refBases; - } - refBases += shift; - } - goalReached = refBases == goal; - - if (cigarElement.getOperator().consumesReadBases()) { - readBases += goalReached ? shift : cigarElement.getLength(); - } + if (refCoord < read.getAlignmentStart()) { + readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord); + if (readBases < 0) + throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate."); } + else if (refCoord > read.getAlignmentEnd()) { + readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(read, refCoord); + if (readBases < 0) + throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate."); + } + else { + int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases + boolean goalReached = refBases == goal; - if (!goalReached) - throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); + Iterator cigarElementIterator = read.getCigar().getCigarElements().iterator(); + while (!goalReached && cigarElementIterator.hasNext()) { + CigarElement cigarElement = cigarElementIterator.next(); + int shift = 0; + + if (cigarElement.getOperator().consumesReferenceBases()) { + if (refBases + cigarElement.getLength() < goal) { + shift = cigarElement.getLength(); + } + else { + shift = goal - refBases; + } + refBases += shift; + } + goalReached = refBases == goal; + + if (cigarElement.getOperator().consumesReadBases()) { + readBases += goalReached ? shift : cigarElement.getLength(); + } + } + + if (!goalReached) + throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); + } return readBases; }