diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 4a253b217..06985041e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -247,7 +247,7 @@ public class ClippingOp { return newCigar; } - @Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"}) + @Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"}) private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) { if (start == 0 && stop == read.getReadLength() - 1) return new GATKSAMRecord(read.getHeader()); @@ -373,6 +373,10 @@ public class ClippingOp { while(cigarElementIterator.hasNext()) { cigarElement = cigarElementIterator.next(); alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength()); + + // if the read had a HardClip operator in the end, combine it with the Hard Clip we are adding + if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) + totalHardClipCount += cigarElement.getLength(); } newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); } diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index c3684034c..4df899888 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -58,6 +58,7 @@ public class ReadClipper { return hardClipByReferenceCoordinates(refStart, -1); } + @Requires("!read.getReadUnmappedFlag()") protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index 650d3f26e..f998a4e78 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -252,77 +252,56 @@ public class ReadClipperUnitTest extends BaseTest { } - @Test(enabled = false) + @Test(enabled = true) public void testHardClipSoftClippedBases() { // Generate a list of cigars to test for (Cigar cigar : ClipReadsTestUtils.generateCigars()) { - //logger.warn("Testing Cigar: "+cigar.toString()); - readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(cigar)); + GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + readClipper = new ReadClipper(read); + GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases(); - int clipStart = 0; - int clipEnd = 0; - boolean expectEmptyRead = false; + int sumHardClips = 0; + int sumMatches = 0; - List cigarElements = cigar.getCigarElements(); - int CigarListLength = cigarElements.size(); + boolean tail = true; + for (CigarElement element : read.getCigar().getCigarElements()) { + // Assuming cigars are well formed, if we see S or H, it means we're on the tail (left or right) + if (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP) + tail = true; - // It will know what needs to be clipped based on the start and end of the string, hardclips and softclips - // are added to the amount to clip - if (cigarElements.get(0).getOperator() == CigarOperator.HARD_CLIP) { - //clipStart += cigarElements.get(0).getLength(); - if (cigarElements.get(1).getOperator() == CigarOperator.SOFT_CLIP) { - clipStart += cigarElements.get(1).getLength(); - // Check for leading indel - if (cigarElements.get(2).getOperator() == CigarOperator.INSERTION) { - expectEmptyRead = true; - } - } - // Check for leading indel - else if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) { - expectEmptyRead = true; - } - } else if (cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP) { - clipStart += cigarElements.get(0).getLength(); - // Check for leading indel - if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) { - expectEmptyRead = true; - } - } - //Check for leading indel - else if (cigarElements.get(0).getOperator() == CigarOperator.INSERTION) { - expectEmptyRead = true; + // Adds all H, S and D's (next to hard/soft clips). + // All these should be hard clips after clipping. + if (tail && (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.DELETION)) + sumHardClips += element.getLength(); + + // this means we're no longer on the tail (insertions can still potentially be the tail because + // of the current contract of clipping out hanging insertions + else if (element.getOperator() != CigarOperator.INSERTION) + tail = false; + + // Adds all matches to verify that they remain the same after clipping + if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH) + sumMatches += element.getLength(); } - if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.HARD_CLIP) { - //clipEnd += cigarElements.get(CigarListLength - 1).getLength(); - if (cigarElements.get(CigarListLength - 2).getOperator() == CigarOperator.SOFT_CLIP) - clipEnd += cigarElements.get(CigarListLength - 2).getLength(); - } else if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.SOFT_CLIP) - clipEnd += cigarElements.get(CigarListLength - 1).getLength(); + for (CigarElement element : clippedRead.getCigar().getCigarElements()) { + // Test if clipped read has Soft Clips (shouldn't have any!) + Assert.assertTrue( element.getOperator() != CigarOperator.SOFT_CLIP, String.format("Cigar %s -> %s -- FAILED (resulting cigar has soft clips)", read.getCigarString(), clippedRead.getCigarString())); - String readBases = readClipper.read.getReadString(); - String baseQuals = readClipper.read.getBaseQualityString(); + // Keep track of the total number of Hard Clips after clipping to make sure everything was accounted for + if (element.getOperator() == CigarOperator.HARD_CLIP) + sumHardClips -= element.getLength(); - // "*" is the default empty-sequence-string and for our test it needs to be changed to "" - if (readBases.equals("*")) - readBases = ""; - if (baseQuals.equals("*")) - baseQuals = ""; + // Make sure all matches are still there + if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH) + sumMatches -= element.getLength(); + } + Assert.assertTrue( sumHardClips == 0, String.format("Cigar %s -> %s -- FAILED (number of hard clips mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumHardClips)); + Assert.assertTrue( sumMatches == 0, String.format("Cigar %s -> %s -- FAILED (number of matches mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumMatches)); - logger.warn(String.format("Testing cigar %s, expecting Base: %s and Qual: %s", - cigar.toString(), readBases.substring(clipStart, readBases.length() - clipEnd), - baseQuals.substring(clipStart, baseQuals.length() - clipEnd))); - //if (expectEmptyRead) - // testBaseQual( readClipper.hardClipSoftClippedBases(), new byte[0], new byte[0] ); - //else - ClipReadsTestUtils.testBaseQual(readClipper.hardClipSoftClippedBases(), - readBases.substring(clipStart, readBases.length() - clipEnd).getBytes(), - baseQuals.substring(clipStart, baseQuals.length() - clipEnd).getBytes()); - logger.warn("Cigar: " + cigar.toString() + " PASSED!"); + + logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString())); } - // We will use testParameter in the following way - // Right tail, left tail, - } } \ No newline at end of file