Merge pull request #354 from broadinstitute/eb_fix_rr_count_encoding

Two reduce reads updates/fixes
This commit is contained in:
Eric Banks 2013-08-01 12:34:19 -07:00
commit 0b062e7f22
3 changed files with 131 additions and 17 deletions

View File

@ -494,10 +494,10 @@ public class SlidingWindow {
*/
private void genericAddBaseToConsensus(final SyntheticRead syntheticRead, final BaseAndQualsCounts baseCounts) {
final BaseIndex base = baseCounts.baseIndexWithMostProbability();
byte count = (byte) Math.min(baseCounts.countOfBase(base), Byte.MAX_VALUE);
byte qual = baseCounts.averageQualsOfBase(base);
byte insQual = baseCounts.averageInsertionQualsOfBase(base);
byte delQual = baseCounts.averageDeletionQualsOfBase(base);
final int count = baseCounts.countOfBase(base);
final byte qual = baseCounts.averageQualsOfBase(base);
final byte insQual = baseCounts.averageInsertionQualsOfBase(base);
final byte delQual = baseCounts.averageDeletionQualsOfBase(base);
syntheticRead.add(base, count, qual, insQual, delQual, baseCounts.getRMS());
}
@ -533,20 +533,24 @@ public class SlidingWindow {
final int refStart = windowHeader.get(start).getLocation();
final int refStop = windowHeader.get(stop).getLocation();
final ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<>();
final ObjectList<GATKSAMRecord> toRemoveFromWindow = new ObjectArrayList<>();
final ObjectList<GATKSAMRecord> toEmit = new ObjectArrayList<>();
for ( final GATKSAMRecord read : readsInWindow ) {
if ( read.getSoftStart() <= refStop ) {
if ( read.getAlignmentEnd() >= refStart ) {
allReads.reads.add(read);
toEmit.add(read);
removeFromHeader(windowHeader, read);
}
toRemove.add(read);
toRemoveFromWindow.add(read);
}
}
// remove all used reads
for ( final GATKSAMRecord read : toRemove )
for ( final GATKSAMRecord read : toRemoveFromWindow )
readsInWindow.remove(read);
// down-sample the unreduced reads if needed
allReads.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(toEmit) : toEmit);
}
return allReads;
@ -632,12 +636,8 @@ public class SlidingWindow {
@Ensures("result != null")
protected CloseVariantRegionResult closeVariantRegion(final int start, final int stop, final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
final CloseVariantRegionResult allReads = compressVariantRegion(start, stop, knownSnpPositions);
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
result.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1));
return result; // finalized reads will be downsampled if necessary
allReads.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1));
return allReads;
}
/**

View File

@ -46,9 +46,14 @@
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.File;
@ -70,6 +75,8 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam";
final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057";
final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam";
final String HIGH_COVERAGE_BAM = privateTestDir + "NA20313.highCoverageRegion.bam";
final String HIGH_COVERAGE_L = " -L 1:1650830-1650870";
final String BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM = privateTestDir + "bothEndsOfPairInVariantRegion.bam";
final String INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM = privateTestDir + "rr-too-many-insertions.bam";
@ -221,13 +228,13 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testCoReduction() {
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("2fdc77ff5139f62db9697427b559f866")));
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("58c2bae5a339af2ea3c22a46ce8faa68")));
}
@Test(enabled = true)
public void testCoReductionWithKnowns() {
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("6db7fca364ba64f7db9510b412d731f0")));
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5c251932b49d99a810581e3a6f762878")));
}
@Test(enabled = true)
@ -258,7 +265,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
public void testDivideByZero() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
// we expect to lose coverage due to the downsampling so don't run the systematic tests
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("82758efda419011642cb468809a50bf9")));
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7dfe2647992ce1154db340fc742d523a")));
}
/**
@ -299,5 +306,42 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
String cmd = "-T ReduceReads -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null";
executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, UserException.BadInput.class));
}
/**
* Confirm that compression is not capping coverage counts to max byte
*/
@Test(enabled = true)
public void testCompressionWorksForHighDepth() {
final String base = String.format("-T ReduceReads -npt -R %s -I %s %s", b37KGReference, HIGH_COVERAGE_BAM, HIGH_COVERAGE_L) + " -o %s";
final File outputBam = executeTestWithoutAdditionalRRTests("testCompressionWorksForHighDepth",
new WalkerTestSpec(base, 1, Arrays.asList(""))).first.get(0); // No MD5s; we only want to check the coverage
boolean sawHighCoveragePosition = false;
final SAMFileReader reader = new SAMFileReader(outputBam);
reader.setSAMRecordFactory(new GATKSamRecordFactory());
for ( final SAMRecord rawRead : reader ) {
final GATKSAMRecord read = (GATKSAMRecord)rawRead;
read.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, rawRead.getByteArrayAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG));
if ( ! read.isReducedRead() )
continue;
final int[] decodedCounts = read.getReducedReadCounts();
for ( final int count : decodedCounts ) {
if ( count > Byte.MAX_VALUE ) {
sawHighCoveragePosition = true;
break;
}
}
if ( sawHighCoveragePosition )
break;
}
reader.close();
Assert.assertTrue(sawHighCoveragePosition, "No positions were found with coverage over max byte (127); the coverage is incorrectly being capped somewhere!");
}
}

View File

@ -619,6 +619,76 @@ public class SlidingWindowUnitTest extends BaseTest {
Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size()));
}
@DataProvider(name = "DownsamplingFromClose")
public Object[][] createDownsamplingFromCloseTestData() {
final ObjectList<GATKSAMRecord> myReads = new ObjectArrayList<>(20);
for ( int i = 0; i < 21; i++ ) {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read" + i, 0, globalStartPosition, readLength);
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
if ( i < 5 )
bases[50] = 'C';
read.setReadBases(bases);
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
read.setMappingQuality(30);
read.setReadNegativeStrandFlag(false);
myReads.add(read);
}
List<Object[]> tests = new ArrayList<>();
for ( int i = 1; i < 25; i++ )
tests.add(new Object[]{myReads, i});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "DownsamplingFromClose", enabled = true)
public void testDownsamplingTestFromClose(final ObjectList<GATKSAMRecord> myReads, final int dcov) {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : myReads )
slidingWindow.addRead(read);
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet<GenomeLoc>()); // no het compression
Assert.assertEquals(result.getFirst().size(), Math.min(dcov, myReads.size()), "Down-sampling was not performed correctly");
}
@DataProvider(name = "NoDownsamplingForConsensusReads")
public Object[][] createNoDownsamplingForConsensusReadsData() {
final ObjectList<GATKSAMRecord> myReads = new ObjectArrayList<>(20);
for ( int i = 0; i < 30; i++ ) {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read" + i, 0, globalStartPosition, readLength);
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
if ( i < 10 )
bases[50] = 'C';
read.setReadBases(bases);
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
read.setMappingQuality(30);
read.setReadNegativeStrandFlag(false);
read.setReadNegativeStrandFlag(i % 2 == 0);
myReads.add(read);
}
List<Object[]> tests = new ArrayList<>();
for ( int i = 0; i < 5; i++ )
tests.add(new Object[]{myReads, i});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "NoDownsamplingForConsensusReads", enabled = true)
public void testNoDownsamplingForConsensusReads(final ObjectList<GATKSAMRecord> myReads, final int dcov) {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : myReads )
slidingWindow.addRead(read);
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(null); // allow het compression (so we expect 4 reads)
Assert.assertEquals(result.getFirst().size(), 4, "Down-sampling was performed on consensus reads!");
}
//////////////////////////////////////////////////////////////
//// This section tests the consensus base quals accuracy ////