From 6555361742e64829183c9cb056795f0fc0b43443 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 31 May 2013 15:21:12 -0400 Subject: [PATCH] Fix error in merging code in HC -- Ultimately this was caused by an underlying bug in the reverting of soft clipped bases in the read clipper. The read clipper would fail to properly set the alignment start for reads that were 100% clipped before reverting, such as 10H2S5H => 10H2M5H. This has been fixed and unit tested. -- Update 1 ReduceReads MD5, which was due to cases where we were clipping away all of the MATCH part of the read, leaving a cigar like 50H11S and the revert soft clips was failing to properly revert the bases. -- delivers #50655421 --- .../ReduceReadsIntegrationTest.java | 2 +- .../sting/utils/clipping/ClippingOp.java | 47 +++++++++++-------- .../utils/clipping/ReadClipperUnitTest.java | 9 +++- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index 405e616f1..4fbbe1d0c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -260,7 +260,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { public void testDivideByZero() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; // we expect to lose coverage due to the downsampling so don't run the systematic tests - executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("c459a6153a17c2cbf8441e1918fda9c8"))); + executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("4f0ef477c0417d1eb602b323474ef377"))); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index f51881e0b..2c2cbd98f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Iterator; +import java.util.List; import java.util.Stack; import java.util.Vector; @@ -559,26 +560,34 @@ public class ClippingOp { return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd); } + /** + * Compute the offset of the first "real" position in the cigar on the genome + * + * This is defined as a first position after a run of Hs followed by a run of Ss + * + * @param cigar A non-null cigar + * @return the offset (from 0) of the first on-genome base + */ + private int calcHardSoftOffset(final Cigar cigar) { + final List elements = cigar.getCigarElements(); + + int size = 0; + int i = 0; + while ( i < elements.size() && elements.get(i).getOperator() == CigarOperator.HARD_CLIP ) { + size += elements.get(i).getLength(); + i++; + } + while ( i < elements.size() && elements.get(i).getOperator() == CigarOperator.SOFT_CLIP ) { + size += elements.get(i).getLength(); + i++; + } + + return size; + } + private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { - int newShift = 0; - int oldShift = 0; - - boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift - for (CigarElement cigarElement : newCigar.getCigarElements()) { - if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) - newShift += cigarElement.getLength(); - else { - readHasStarted = true; - break; - } - } - - for (CigarElement cigarElement : oldCigar.getCigarElements()) { - if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) - oldShift += cigarElement.getLength(); - else if (readHasStarted) - break; - } + final int newShift = calcHardSoftOffset(newCigar); + final int oldShift = calcHardSoftOffset(oldCigar); return newShift - oldShift; } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index 6ec4336b0..0b4153535 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -48,7 +48,7 @@ import java.util.List; public class ReadClipperUnitTest extends BaseTest { List cigarList; - int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2 + int maximumCigarSize = 10; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2 @BeforeClass public void init() { @@ -391,4 +391,11 @@ public class ReadClipperUnitTest extends BaseTest { } } + @Test(enabled = true) + public void testRevertEntirelySoftclippedReads() { + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("2H1S3H"); + GATKSAMRecord clippedRead = ReadClipper.revertSoftClippedBases(read); + Assert.assertEquals(clippedRead.getAlignmentStart(), read.getSoftStart()); + } + } \ No newline at end of file