diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index e15c68774..964c6b401 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -494,10 +494,10 @@ public class SlidingWindow { */ private void genericAddBaseToConsensus(final SyntheticRead syntheticRead, final BaseAndQualsCounts baseCounts) { final BaseIndex base = baseCounts.baseIndexWithMostProbability(); - byte count = (byte) Math.min(baseCounts.countOfBase(base), Byte.MAX_VALUE); - byte qual = baseCounts.averageQualsOfBase(base); - byte insQual = baseCounts.averageInsertionQualsOfBase(base); - byte delQual = baseCounts.averageDeletionQualsOfBase(base); + final int count = baseCounts.countOfBase(base); + final byte qual = baseCounts.averageQualsOfBase(base); + final byte insQual = baseCounts.averageInsertionQualsOfBase(base); + final byte delQual = baseCounts.averageDeletionQualsOfBase(base); syntheticRead.add(base, count, qual, insQual, delQual, baseCounts.getRMS()); } @@ -533,20 +533,24 @@ public class SlidingWindow { final int refStart = windowHeader.get(start).getLocation(); final int refStop = windowHeader.get(stop).getLocation(); - final ObjectList toRemove = new ObjectArrayList<>(); + final ObjectList toRemoveFromWindow = new ObjectArrayList<>(); + final ObjectList toEmit = new ObjectArrayList<>(); for ( final GATKSAMRecord read : readsInWindow ) { if ( read.getSoftStart() <= refStop ) { if ( read.getAlignmentEnd() >= refStart ) { - allReads.reads.add(read); + toEmit.add(read); removeFromHeader(windowHeader, read); } - toRemove.add(read); + toRemoveFromWindow.add(read); } } // remove all used reads - for ( final GATKSAMRecord read : toRemove ) + for ( final GATKSAMRecord read : toRemoveFromWindow ) readsInWindow.remove(read); + + // down-sample the unreduced reads if needed + allReads.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(toEmit) : toEmit); } return allReads; @@ -632,12 +636,8 @@ public class SlidingWindow { @Ensures("result != null") protected CloseVariantRegionResult closeVariantRegion(final int start, final int stop, final ObjectSortedSet knownSnpPositions) { final CloseVariantRegionResult allReads = compressVariantRegion(start, stop, knownSnpPositions); - - final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed); - result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads); - result.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1)); - - return result; // finalized reads will be downsampled if necessary + allReads.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1)); + return allReads; } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index 9942821e1..067f36d58 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -46,9 +46,14 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; +import org.testng.Assert; import org.testng.annotations.Test; import java.io.File; @@ -70,6 +75,8 @@ public class ReduceReadsIntegrationTest extends WalkerTest { final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam"; final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057"; final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam"; + final String HIGH_COVERAGE_BAM = privateTestDir + "NA20313.highCoverageRegion.bam"; + final String HIGH_COVERAGE_L = " -L 1:1650830-1650870"; final String BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM = privateTestDir + "bothEndsOfPairInVariantRegion.bam"; final String INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM = privateTestDir + "rr-too-many-insertions.bam"; @@ -221,13 +228,13 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testCoReduction() { String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s "; - executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("2fdc77ff5139f62db9697427b559f866"))); + executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("58c2bae5a339af2ea3c22a46ce8faa68"))); } @Test(enabled = true) public void testCoReductionWithKnowns() { String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s "; - executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("6db7fca364ba64f7db9510b412d731f0"))); + executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5c251932b49d99a810581e3a6f762878"))); } @Test(enabled = true) @@ -258,7 +265,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { public void testDivideByZero() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; // we expect to lose coverage due to the downsampling so don't run the systematic tests - executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("82758efda419011642cb468809a50bf9"))); + executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7dfe2647992ce1154db340fc742d523a"))); } /** @@ -299,5 +306,42 @@ public class ReduceReadsIntegrationTest extends WalkerTest { String cmd = "-T ReduceReads -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null"; executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, UserException.BadInput.class)); } + + /** + * Confirm that compression is not capping coverage counts to max byte + */ + @Test(enabled = true) + public void testCompressionWorksForHighDepth() { + final String base = String.format("-T ReduceReads -npt -R %s -I %s %s", b37KGReference, HIGH_COVERAGE_BAM, HIGH_COVERAGE_L) + " -o %s"; + final File outputBam = executeTestWithoutAdditionalRRTests("testCompressionWorksForHighDepth", + new WalkerTestSpec(base, 1, Arrays.asList(""))).first.get(0); // No MD5s; we only want to check the coverage + + boolean sawHighCoveragePosition = false; + final SAMFileReader reader = new SAMFileReader(outputBam); + reader.setSAMRecordFactory(new GATKSamRecordFactory()); + + for ( final SAMRecord rawRead : reader ) { + final GATKSAMRecord read = (GATKSAMRecord)rawRead; + read.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, rawRead.getByteArrayAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG)); + + if ( ! read.isReducedRead() ) + continue; + + final int[] decodedCounts = read.getReducedReadCounts(); + for ( final int count : decodedCounts ) { + if ( count > Byte.MAX_VALUE ) { + sawHighCoveragePosition = true; + break; + } + } + + if ( sawHighCoveragePosition ) + break; + } + + reader.close(); + + Assert.assertTrue(sawHighCoveragePosition, "No positions were found with coverage over max byte (127); the coverage is incorrectly being capped somewhere!"); + } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java index 3534284cd..720f00f98 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java @@ -619,6 +619,76 @@ public class SlidingWindowUnitTest extends BaseTest { Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size())); } + @DataProvider(name = "DownsamplingFromClose") + public Object[][] createDownsamplingFromCloseTestData() { + + final ObjectList myReads = new ObjectArrayList<>(20); + for ( int i = 0; i < 21; i++ ) { + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read" + i, 0, globalStartPosition, readLength); + final byte[] bases = Utils.dupBytes((byte) 'A', readLength); + if ( i < 5 ) + bases[50] = 'C'; + read.setReadBases(bases); + read.setBaseQualities(Utils.dupBytes((byte)30, readLength)); + read.setMappingQuality(30); + read.setReadNegativeStrandFlag(false); + myReads.add(read); + } + + List tests = new ArrayList<>(); + + for ( int i = 1; i < 25; i++ ) + tests.add(new Object[]{myReads, i}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "DownsamplingFromClose", enabled = true) + public void testDownsamplingTestFromClose(final ObjectList myReads, final int dcov) { + + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false); + for ( final GATKSAMRecord read : myReads ) + slidingWindow.addRead(read); + Pair, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet()); // no het compression + + Assert.assertEquals(result.getFirst().size(), Math.min(dcov, myReads.size()), "Down-sampling was not performed correctly"); + } + + @DataProvider(name = "NoDownsamplingForConsensusReads") + public Object[][] createNoDownsamplingForConsensusReadsData() { + + final ObjectList myReads = new ObjectArrayList<>(20); + for ( int i = 0; i < 30; i++ ) { + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read" + i, 0, globalStartPosition, readLength); + final byte[] bases = Utils.dupBytes((byte) 'A', readLength); + if ( i < 10 ) + bases[50] = 'C'; + read.setReadBases(bases); + read.setBaseQualities(Utils.dupBytes((byte)30, readLength)); + read.setMappingQuality(30); + read.setReadNegativeStrandFlag(false); + read.setReadNegativeStrandFlag(i % 2 == 0); + myReads.add(read); + } + + List tests = new ArrayList<>(); + + for ( int i = 0; i < 5; i++ ) + tests.add(new Object[]{myReads, i}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "NoDownsamplingForConsensusReads", enabled = true) + public void testNoDownsamplingForConsensusReads(final ObjectList myReads, final int dcov) { + + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false); + for ( final GATKSAMRecord read : myReads ) + slidingWindow.addRead(read); + Pair, CompressionStash> result = slidingWindow.close(null); // allow het compression (so we expect 4 reads) + + Assert.assertEquals(result.getFirst().size(), 4, "Down-sampling was performed on consensus reads!"); + } ////////////////////////////////////////////////////////////// //// This section tests the consensus base quals accuracy ////