Merge pull request #354 from broadinstitute/eb_fix_rr_count_encoding
Two reduce reads updates/fixes
This commit is contained in:
commit
0b062e7f22
|
|
@ -494,10 +494,10 @@ public class SlidingWindow {
|
||||||
*/
|
*/
|
||||||
private void genericAddBaseToConsensus(final SyntheticRead syntheticRead, final BaseAndQualsCounts baseCounts) {
|
private void genericAddBaseToConsensus(final SyntheticRead syntheticRead, final BaseAndQualsCounts baseCounts) {
|
||||||
final BaseIndex base = baseCounts.baseIndexWithMostProbability();
|
final BaseIndex base = baseCounts.baseIndexWithMostProbability();
|
||||||
byte count = (byte) Math.min(baseCounts.countOfBase(base), Byte.MAX_VALUE);
|
final int count = baseCounts.countOfBase(base);
|
||||||
byte qual = baseCounts.averageQualsOfBase(base);
|
final byte qual = baseCounts.averageQualsOfBase(base);
|
||||||
byte insQual = baseCounts.averageInsertionQualsOfBase(base);
|
final byte insQual = baseCounts.averageInsertionQualsOfBase(base);
|
||||||
byte delQual = baseCounts.averageDeletionQualsOfBase(base);
|
final byte delQual = baseCounts.averageDeletionQualsOfBase(base);
|
||||||
syntheticRead.add(base, count, qual, insQual, delQual, baseCounts.getRMS());
|
syntheticRead.add(base, count, qual, insQual, delQual, baseCounts.getRMS());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -533,20 +533,24 @@ public class SlidingWindow {
|
||||||
final int refStart = windowHeader.get(start).getLocation();
|
final int refStart = windowHeader.get(start).getLocation();
|
||||||
final int refStop = windowHeader.get(stop).getLocation();
|
final int refStop = windowHeader.get(stop).getLocation();
|
||||||
|
|
||||||
final ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<>();
|
final ObjectList<GATKSAMRecord> toRemoveFromWindow = new ObjectArrayList<>();
|
||||||
|
final ObjectList<GATKSAMRecord> toEmit = new ObjectArrayList<>();
|
||||||
for ( final GATKSAMRecord read : readsInWindow ) {
|
for ( final GATKSAMRecord read : readsInWindow ) {
|
||||||
if ( read.getSoftStart() <= refStop ) {
|
if ( read.getSoftStart() <= refStop ) {
|
||||||
if ( read.getAlignmentEnd() >= refStart ) {
|
if ( read.getAlignmentEnd() >= refStart ) {
|
||||||
allReads.reads.add(read);
|
toEmit.add(read);
|
||||||
removeFromHeader(windowHeader, read);
|
removeFromHeader(windowHeader, read);
|
||||||
}
|
}
|
||||||
toRemove.add(read);
|
toRemoveFromWindow.add(read);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove all used reads
|
// remove all used reads
|
||||||
for ( final GATKSAMRecord read : toRemove )
|
for ( final GATKSAMRecord read : toRemoveFromWindow )
|
||||||
readsInWindow.remove(read);
|
readsInWindow.remove(read);
|
||||||
|
|
||||||
|
// down-sample the unreduced reads if needed
|
||||||
|
allReads.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(toEmit) : toEmit);
|
||||||
}
|
}
|
||||||
|
|
||||||
return allReads;
|
return allReads;
|
||||||
|
|
@ -632,12 +636,8 @@ public class SlidingWindow {
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
protected CloseVariantRegionResult closeVariantRegion(final int start, final int stop, final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
protected CloseVariantRegionResult closeVariantRegion(final int start, final int stop, final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
||||||
final CloseVariantRegionResult allReads = compressVariantRegion(start, stop, knownSnpPositions);
|
final CloseVariantRegionResult allReads = compressVariantRegion(start, stop, knownSnpPositions);
|
||||||
|
allReads.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1));
|
||||||
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
|
return allReads;
|
||||||
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
|
|
||||||
result.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1));
|
|
||||||
|
|
||||||
return result; // finalized reads will be downsampled if necessary
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -46,9 +46,14 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.WalkerTest;
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
|
||||||
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -70,6 +75,8 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam";
|
final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam";
|
||||||
final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057";
|
final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057";
|
||||||
final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam";
|
final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam";
|
||||||
|
final String HIGH_COVERAGE_BAM = privateTestDir + "NA20313.highCoverageRegion.bam";
|
||||||
|
final String HIGH_COVERAGE_L = " -L 1:1650830-1650870";
|
||||||
final String BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM = privateTestDir + "bothEndsOfPairInVariantRegion.bam";
|
final String BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM = privateTestDir + "bothEndsOfPairInVariantRegion.bam";
|
||||||
final String INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM = privateTestDir + "rr-too-many-insertions.bam";
|
final String INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM = privateTestDir + "rr-too-many-insertions.bam";
|
||||||
|
|
||||||
|
|
@ -221,13 +228,13 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testCoReduction() {
|
public void testCoReduction() {
|
||||||
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
||||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("2fdc77ff5139f62db9697427b559f866")));
|
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("58c2bae5a339af2ea3c22a46ce8faa68")));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testCoReductionWithKnowns() {
|
public void testCoReductionWithKnowns() {
|
||||||
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
|
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
|
||||||
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("6db7fca364ba64f7db9510b412d731f0")));
|
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5c251932b49d99a810581e3a6f762878")));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
|
|
@ -258,7 +265,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
public void testDivideByZero() {
|
public void testDivideByZero() {
|
||||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
||||||
// we expect to lose coverage due to the downsampling so don't run the systematic tests
|
// we expect to lose coverage due to the downsampling so don't run the systematic tests
|
||||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("82758efda419011642cb468809a50bf9")));
|
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7dfe2647992ce1154db340fc742d523a")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -299,5 +306,42 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
String cmd = "-T ReduceReads -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null";
|
String cmd = "-T ReduceReads -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null";
|
||||||
executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, UserException.BadInput.class));
|
executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, UserException.BadInput.class));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Confirm that compression is not capping coverage counts to max byte
|
||||||
|
*/
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testCompressionWorksForHighDepth() {
|
||||||
|
final String base = String.format("-T ReduceReads -npt -R %s -I %s %s", b37KGReference, HIGH_COVERAGE_BAM, HIGH_COVERAGE_L) + " -o %s";
|
||||||
|
final File outputBam = executeTestWithoutAdditionalRRTests("testCompressionWorksForHighDepth",
|
||||||
|
new WalkerTestSpec(base, 1, Arrays.asList(""))).first.get(0); // No MD5s; we only want to check the coverage
|
||||||
|
|
||||||
|
boolean sawHighCoveragePosition = false;
|
||||||
|
final SAMFileReader reader = new SAMFileReader(outputBam);
|
||||||
|
reader.setSAMRecordFactory(new GATKSamRecordFactory());
|
||||||
|
|
||||||
|
for ( final SAMRecord rawRead : reader ) {
|
||||||
|
final GATKSAMRecord read = (GATKSAMRecord)rawRead;
|
||||||
|
read.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, rawRead.getByteArrayAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG));
|
||||||
|
|
||||||
|
if ( ! read.isReducedRead() )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
final int[] decodedCounts = read.getReducedReadCounts();
|
||||||
|
for ( final int count : decodedCounts ) {
|
||||||
|
if ( count > Byte.MAX_VALUE ) {
|
||||||
|
sawHighCoveragePosition = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( sawHighCoveragePosition )
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
|
||||||
|
Assert.assertTrue(sawHighCoveragePosition, "No positions were found with coverage over max byte (127); the coverage is incorrectly being capped somewhere!");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -619,6 +619,76 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size()));
|
Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "DownsamplingFromClose")
|
||||||
|
public Object[][] createDownsamplingFromCloseTestData() {
|
||||||
|
|
||||||
|
final ObjectList<GATKSAMRecord> myReads = new ObjectArrayList<>(20);
|
||||||
|
for ( int i = 0; i < 21; i++ ) {
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read" + i, 0, globalStartPosition, readLength);
|
||||||
|
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
|
||||||
|
if ( i < 5 )
|
||||||
|
bases[50] = 'C';
|
||||||
|
read.setReadBases(bases);
|
||||||
|
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||||
|
read.setMappingQuality(30);
|
||||||
|
read.setReadNegativeStrandFlag(false);
|
||||||
|
myReads.add(read);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Object[]> tests = new ArrayList<>();
|
||||||
|
|
||||||
|
for ( int i = 1; i < 25; i++ )
|
||||||
|
tests.add(new Object[]{myReads, i});
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "DownsamplingFromClose", enabled = true)
|
||||||
|
public void testDownsamplingTestFromClose(final ObjectList<GATKSAMRecord> myReads, final int dcov) {
|
||||||
|
|
||||||
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
|
for ( final GATKSAMRecord read : myReads )
|
||||||
|
slidingWindow.addRead(read);
|
||||||
|
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet<GenomeLoc>()); // no het compression
|
||||||
|
|
||||||
|
Assert.assertEquals(result.getFirst().size(), Math.min(dcov, myReads.size()), "Down-sampling was not performed correctly");
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "NoDownsamplingForConsensusReads")
|
||||||
|
public Object[][] createNoDownsamplingForConsensusReadsData() {
|
||||||
|
|
||||||
|
final ObjectList<GATKSAMRecord> myReads = new ObjectArrayList<>(20);
|
||||||
|
for ( int i = 0; i < 30; i++ ) {
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read" + i, 0, globalStartPosition, readLength);
|
||||||
|
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
|
||||||
|
if ( i < 10 )
|
||||||
|
bases[50] = 'C';
|
||||||
|
read.setReadBases(bases);
|
||||||
|
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||||
|
read.setMappingQuality(30);
|
||||||
|
read.setReadNegativeStrandFlag(false);
|
||||||
|
read.setReadNegativeStrandFlag(i % 2 == 0);
|
||||||
|
myReads.add(read);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Object[]> tests = new ArrayList<>();
|
||||||
|
|
||||||
|
for ( int i = 0; i < 5; i++ )
|
||||||
|
tests.add(new Object[]{myReads, i});
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "NoDownsamplingForConsensusReads", enabled = true)
|
||||||
|
public void testNoDownsamplingForConsensusReads(final ObjectList<GATKSAMRecord> myReads, final int dcov) {
|
||||||
|
|
||||||
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
|
for ( final GATKSAMRecord read : myReads )
|
||||||
|
slidingWindow.addRead(read);
|
||||||
|
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(null); // allow het compression (so we expect 4 reads)
|
||||||
|
|
||||||
|
Assert.assertEquals(result.getFirst().size(), 4, "Down-sampling was performed on consensus reads!");
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////
|
||||||
//// This section tests the consensus base quals accuracy ////
|
//// This section tests the consensus base quals accuracy ////
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue