From 70479cb71dfd0c016d7901ec473a04dae0251ef8 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 19 Dec 2012 10:59:07 -0500 Subject: [PATCH] RR bug fix: we were failing when a read started with an insertion just at the edge of the consensus region. The weird part is that the comments claimed it was doing what it was supposed to, but it didn't actually do it. Now we maintain the last header element of the consensus (but without bases and quals) if it adjoins an element with an insertion. Added the user's test file as an integration test. --- .../reducereads/HeaderElement.java | 21 ++++++++++++++++++- .../reducereads/SlidingWindow.java | 11 ++++++++-- .../ReduceReadsIntegrationTest.java | 7 +++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java index 3097c2ee9..bebc27221 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java @@ -39,16 +39,27 @@ public class HeaderElement { * * @param location the reference location for the new element */ - public HeaderElement(int location) { + public HeaderElement(final int location) { this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location, new LinkedList()); } + /** + * Creates a new HeaderElement with the following default values: - empty consensusBaseCounts - empty + * filteredBaseCounts - empty mappingQuality list + * + * @param location the reference location for the new element + */ + public HeaderElement(final int location, final int insertionsToTheRight) { + this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location, new LinkedList()); + } + /** * Creates a new HeaderElement with all given parameters * * @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read * @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read * @param insertionsToTheRight number of insertions to the right of this HeaderElement + * @param nSoftClippedBases number of softclipped bases of this HeaderElement * @param location the reference location of this reference element * @param mappingQuality the list of mapping quality values of all reads that contributed to this * HeaderElement @@ -151,6 +162,14 @@ public class HeaderElement { throw new ReviewedStingException("Removed too many insertions, header is now negative!"); } + public boolean hasInsertionToTheRight() { + return insertionsToTheRight > 0; + } + + public int numInsertionsToTheRight() { + return insertionsToTheRight; + } + /** * Whether or not the HeaderElement is variant due to excess insertions * diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index fff1c20a5..9af54b4a8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -645,8 +645,15 @@ public class SlidingWindow { } } - for (int i = 0; i <= lastStop; i++) // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion) - windowHeader.remove(); + // clean up the window header elements up until the end of the variant region. + // note that we keep the last element of the region in the event that the following element has a read that starts with insertion. + if ( lastStop >= 0 ) { + for (int i = 0; i < lastStop; i++) + windowHeader.remove(); + final HeaderElement lastOfRegion = windowHeader.remove(); + if ( lastOfRegion.hasInsertionToTheRight() ) + windowHeader.addFirst(new HeaderElement(lastOfRegion.getLocation(), lastOfRegion.numInsertionsToTheRight())); + } } return allReads; } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index 446c3cfc3..7e662d3b2 100755 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -18,6 +18,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam"; final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057"; final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam"; + final String INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM = privateTestDir + "rr-too-many-insertions.bam"; private void RRTest(String testName, String args, String md5) { String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, BAM) + " -o %s "; @@ -30,6 +31,12 @@ public class ReduceReadsIntegrationTest extends WalkerTest { RRTest("testDefaultCompression ", L, "98080d3c53f441564796fc143cf510da"); } + @Test(enabled = true) + public void testInsertionsAtEdgeOfConsensus() { + String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s "; + executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("2a6e08a0206bd8ec7671224c4a55dae0"))); + } + @Test(enabled = true) public void testMultipleIntervals() { String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";