Merge pull request #354 from broadinstitute/eb_fix_rr_count_encoding

Two reduce reads updates/fixes
This commit is contained in:
Eric Banks 2013-08-01 12:34:19 -07:00
commit 0b062e7f22
3 changed files with 131 additions and 17 deletions

View File

@ -494,10 +494,10 @@ public class SlidingWindow {
*/ */
private void genericAddBaseToConsensus(final SyntheticRead syntheticRead, final BaseAndQualsCounts baseCounts) { private void genericAddBaseToConsensus(final SyntheticRead syntheticRead, final BaseAndQualsCounts baseCounts) {
final BaseIndex base = baseCounts.baseIndexWithMostProbability(); final BaseIndex base = baseCounts.baseIndexWithMostProbability();
byte count = (byte) Math.min(baseCounts.countOfBase(base), Byte.MAX_VALUE); final int count = baseCounts.countOfBase(base);
byte qual = baseCounts.averageQualsOfBase(base); final byte qual = baseCounts.averageQualsOfBase(base);
byte insQual = baseCounts.averageInsertionQualsOfBase(base); final byte insQual = baseCounts.averageInsertionQualsOfBase(base);
byte delQual = baseCounts.averageDeletionQualsOfBase(base); final byte delQual = baseCounts.averageDeletionQualsOfBase(base);
syntheticRead.add(base, count, qual, insQual, delQual, baseCounts.getRMS()); syntheticRead.add(base, count, qual, insQual, delQual, baseCounts.getRMS());
} }
@ -533,20 +533,24 @@ public class SlidingWindow {
final int refStart = windowHeader.get(start).getLocation(); final int refStart = windowHeader.get(start).getLocation();
final int refStop = windowHeader.get(stop).getLocation(); final int refStop = windowHeader.get(stop).getLocation();
final ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<>(); final ObjectList<GATKSAMRecord> toRemoveFromWindow = new ObjectArrayList<>();
final ObjectList<GATKSAMRecord> toEmit = new ObjectArrayList<>();
for ( final GATKSAMRecord read : readsInWindow ) { for ( final GATKSAMRecord read : readsInWindow ) {
if ( read.getSoftStart() <= refStop ) { if ( read.getSoftStart() <= refStop ) {
if ( read.getAlignmentEnd() >= refStart ) { if ( read.getAlignmentEnd() >= refStart ) {
allReads.reads.add(read); toEmit.add(read);
removeFromHeader(windowHeader, read); removeFromHeader(windowHeader, read);
} }
toRemove.add(read); toRemoveFromWindow.add(read);
} }
} }
// remove all used reads // remove all used reads
for ( final GATKSAMRecord read : toRemove ) for ( final GATKSAMRecord read : toRemoveFromWindow )
readsInWindow.remove(read); readsInWindow.remove(read);
// down-sample the unreduced reads if needed
allReads.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(toEmit) : toEmit);
} }
return allReads; return allReads;
@ -632,12 +636,8 @@ public class SlidingWindow {
@Ensures("result != null") @Ensures("result != null")
protected CloseVariantRegionResult closeVariantRegion(final int start, final int stop, final ObjectSortedSet<GenomeLoc> knownSnpPositions) { protected CloseVariantRegionResult closeVariantRegion(final int start, final int stop, final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
final CloseVariantRegionResult allReads = compressVariantRegion(start, stop, knownSnpPositions); final CloseVariantRegionResult allReads = compressVariantRegion(start, stop, knownSnpPositions);
allReads.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1));
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed); return allReads;
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
result.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1));
return result; // finalized reads will be downsampled if necessary
} }
/** /**

View File

@ -46,9 +46,14 @@
package org.broadinstitute.sting.gatk.walkers.compression.reducereads; package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
import org.testng.Assert;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.File; import java.io.File;
@ -70,6 +75,8 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam"; final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam";
final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057"; final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057";
final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam"; final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam";
final String HIGH_COVERAGE_BAM = privateTestDir + "NA20313.highCoverageRegion.bam";
final String HIGH_COVERAGE_L = " -L 1:1650830-1650870";
final String BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM = privateTestDir + "bothEndsOfPairInVariantRegion.bam"; final String BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM = privateTestDir + "bothEndsOfPairInVariantRegion.bam";
final String INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM = privateTestDir + "rr-too-many-insertions.bam"; final String INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM = privateTestDir + "rr-too-many-insertions.bam";
@ -221,13 +228,13 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testCoReduction() { public void testCoReduction() {
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s "; String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("2fdc77ff5139f62db9697427b559f866"))); executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("58c2bae5a339af2ea3c22a46ce8faa68")));
} }
@Test(enabled = true) @Test(enabled = true)
public void testCoReductionWithKnowns() { public void testCoReductionWithKnowns() {
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s "; String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("6db7fca364ba64f7db9510b412d731f0"))); executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5c251932b49d99a810581e3a6f762878")));
} }
@Test(enabled = true) @Test(enabled = true)
@ -258,7 +265,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
public void testDivideByZero() { public void testDivideByZero() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
// we expect to lose coverage due to the downsampling so don't run the systematic tests // we expect to lose coverage due to the downsampling so don't run the systematic tests
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("82758efda419011642cb468809a50bf9"))); executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7dfe2647992ce1154db340fc742d523a")));
} }
/** /**
@ -299,5 +306,42 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
String cmd = "-T ReduceReads -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null"; String cmd = "-T ReduceReads -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null";
executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, UserException.BadInput.class)); executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, UserException.BadInput.class));
} }
/**
* Confirm that compression is not capping coverage counts to max byte
*/
@Test(enabled = true)
public void testCompressionWorksForHighDepth() {
final String base = String.format("-T ReduceReads -npt -R %s -I %s %s", b37KGReference, HIGH_COVERAGE_BAM, HIGH_COVERAGE_L) + " -o %s";
final File outputBam = executeTestWithoutAdditionalRRTests("testCompressionWorksForHighDepth",
new WalkerTestSpec(base, 1, Arrays.asList(""))).first.get(0); // No MD5s; we only want to check the coverage
boolean sawHighCoveragePosition = false;
final SAMFileReader reader = new SAMFileReader(outputBam);
reader.setSAMRecordFactory(new GATKSamRecordFactory());
for ( final SAMRecord rawRead : reader ) {
final GATKSAMRecord read = (GATKSAMRecord)rawRead;
read.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, rawRead.getByteArrayAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG));
if ( ! read.isReducedRead() )
continue;
final int[] decodedCounts = read.getReducedReadCounts();
for ( final int count : decodedCounts ) {
if ( count > Byte.MAX_VALUE ) {
sawHighCoveragePosition = true;
break;
}
}
if ( sawHighCoveragePosition )
break;
}
reader.close();
Assert.assertTrue(sawHighCoveragePosition, "No positions were found with coverage over max byte (127); the coverage is incorrectly being capped somewhere!");
}
} }

View File

@ -619,6 +619,76 @@ public class SlidingWindowUnitTest extends BaseTest {
Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size())); Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size()));
} }
@DataProvider(name = "DownsamplingFromClose")
public Object[][] createDownsamplingFromCloseTestData() {
final ObjectList<GATKSAMRecord> myReads = new ObjectArrayList<>(20);
for ( int i = 0; i < 21; i++ ) {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read" + i, 0, globalStartPosition, readLength);
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
if ( i < 5 )
bases[50] = 'C';
read.setReadBases(bases);
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
read.setMappingQuality(30);
read.setReadNegativeStrandFlag(false);
myReads.add(read);
}
List<Object[]> tests = new ArrayList<>();
for ( int i = 1; i < 25; i++ )
tests.add(new Object[]{myReads, i});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "DownsamplingFromClose", enabled = true)
public void testDownsamplingTestFromClose(final ObjectList<GATKSAMRecord> myReads, final int dcov) {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : myReads )
slidingWindow.addRead(read);
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet<GenomeLoc>()); // no het compression
Assert.assertEquals(result.getFirst().size(), Math.min(dcov, myReads.size()), "Down-sampling was not performed correctly");
}
@DataProvider(name = "NoDownsamplingForConsensusReads")
public Object[][] createNoDownsamplingForConsensusReadsData() {
final ObjectList<GATKSAMRecord> myReads = new ObjectArrayList<>(20);
for ( int i = 0; i < 30; i++ ) {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read" + i, 0, globalStartPosition, readLength);
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
if ( i < 10 )
bases[50] = 'C';
read.setReadBases(bases);
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
read.setMappingQuality(30);
read.setReadNegativeStrandFlag(false);
read.setReadNegativeStrandFlag(i % 2 == 0);
myReads.add(read);
}
List<Object[]> tests = new ArrayList<>();
for ( int i = 0; i < 5; i++ )
tests.add(new Object[]{myReads, i});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "NoDownsamplingForConsensusReads", enabled = true)
public void testNoDownsamplingForConsensusReads(final ObjectList<GATKSAMRecord> myReads, final int dcov) {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : myReads )
slidingWindow.addRead(read);
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(null); // allow het compression (so we expect 4 reads)
Assert.assertEquals(result.getFirst().size(), 4, "Down-sampling was performed on consensus reads!");
}
////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////
//// This section tests the consensus base quals accuracy //// //// This section tests the consensus base quals accuracy ////