diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index d3a52167a..e8df869e6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -24,10 +24,8 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.BAMRecord; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.SAMRecord; +import com.google.java.contract.Ensures; +import net.sf.samtools.*; import org.broadinstitute.sting.utils.NGSPlatform; import java.util.HashMap; @@ -273,5 +271,52 @@ public class GATKSAMRecord extends BAMRecord { setReadGroup(rg); } + /** + * Calculates the reference coordinate for the beginning of the read taking into account soft clips but not hard clips. + * + * Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips. + * + * @return the unclipped start of the read taking soft clips (but not hard clips) into account + */ + @Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"}) + public int getSoftStart() { + int start = this.getUnclippedStart(); + for (CigarElement cigarElement : this.getCigar().getCigarElements()) { + if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) + start += cigarElement.getLength(); + else + break; + } + return start; + } + + /** + * Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips. + * + * Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips. + * + * @return the unclipped end of the read taking soft clips (but not hard clips) into account + */ + @Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"}) + public int getSoftEnd() { + int stop = this.getUnclippedStart(); + + if (ReadUtils.readIsEntirelyInsertion(this)) + return stop; + + int shift = 0; + CigarOperator lastOperator = null; + for (CigarElement cigarElement : this.getCigar().getCigarElements()) { + stop += shift; + lastOperator = cigarElement.getOperator(); + if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP) + shift = cigarElement.getLength(); + else + shift = 0; + } + return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; + } + + } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 0c3433eba..24fd48387 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -299,8 +299,8 @@ public class ReadUtils { */ public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(GATKSAMRecord read, GenomeLoc interval) { - int sStart = getRefCoordSoftUnclippedStart(read); - int sStop = getRefCoordSoftUnclippedEnd(read); + int sStart = read.getSoftStart(); + int sStop = read.getSoftEnd(); int uStart = read.getUnclippedStart(); int uStop = read.getUnclippedEnd(); @@ -334,40 +334,6 @@ public class ReadUtils { return ReadAndIntervalOverlap.OVERLAP_RIGHT; } - - @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) - public static int getRefCoordSoftUnclippedStart(GATKSAMRecord read) { - int start = read.getUnclippedStart(); - for (CigarElement cigarElement : read.getCigar().getCigarElements()) { - if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) - start += cigarElement.getLength(); - else - break; - } - return start; - } - - @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) - public static int getRefCoordSoftUnclippedEnd(GATKSAMRecord read) { - int stop = read.getUnclippedStart(); - - if (readIsEntirelyInsertion(read)) - return stop; - - int shift = 0; - CigarOperator lastOperator = null; - for (CigarElement cigarElement : read.getCigar().getCigarElements()) { - stop += shift; - lastOperator = cigarElement.getOperator(); - if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP) - shift = cigarElement.getLength(); - else - shift = 0; - } - return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; - } - - /** * Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) in case it falls in * a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns @@ -407,14 +373,14 @@ public class ReadUtils { * @param refCoord * @return the read coordinate corresponding to the requested reference coordinate. (see warning!) */ - @Requires({"refCoord >= getRefCoordSoftUnclippedStart(read)", "refCoord <= getRefCoordSoftUnclippedEnd(read)"}) + @Requires({"refCoord >= read.getSoftStart()", "refCoord <= read.getSoftEnd()"}) @Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"}) public static Pair getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord) { int readBases = 0; int refBases = 0; boolean fallsInsideDeletion = false; - int goal = refCoord - getRefCoordSoftUnclippedStart(read); // The goal is to move this many reference bases + int goal = refCoord - read.getSoftStart(); // The goal is to move this many reference bases boolean goalReached = refBases == goal; Iterator cigarElementIterator = read.getCigar().getCigarElements().iterator(); diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index e228762b7..d1d5ddd8f 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -32,7 +32,6 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -90,13 +89,13 @@ public class ReadClipperUnitTest extends BaseTest { int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); for (int i=alnStart; i<=alnEnd; i++) { - if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side + if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i); if (!clipLeft.isEmpty()) Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); } - if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side + if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); @@ -111,7 +110,7 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); - if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side + if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side for (int i=alnStart; i<=alnEnd; i++) { GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i); if (!clipLeft.isEmpty()) @@ -127,7 +126,7 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); - if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side + if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side for (int i=alnStart; i<=alnEnd; i++) { GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.