Merge pull request #354 from broadinstitute/eb_fix_rr_count_encoding
Two reduce reads updates/fixes
This commit is contained in:
commit
0b062e7f22
|
|
@ -494,10 +494,10 @@ public class SlidingWindow {
|
|||
*/
|
||||
private void genericAddBaseToConsensus(final SyntheticRead syntheticRead, final BaseAndQualsCounts baseCounts) {
|
||||
final BaseIndex base = baseCounts.baseIndexWithMostProbability();
|
||||
byte count = (byte) Math.min(baseCounts.countOfBase(base), Byte.MAX_VALUE);
|
||||
byte qual = baseCounts.averageQualsOfBase(base);
|
||||
byte insQual = baseCounts.averageInsertionQualsOfBase(base);
|
||||
byte delQual = baseCounts.averageDeletionQualsOfBase(base);
|
||||
final int count = baseCounts.countOfBase(base);
|
||||
final byte qual = baseCounts.averageQualsOfBase(base);
|
||||
final byte insQual = baseCounts.averageInsertionQualsOfBase(base);
|
||||
final byte delQual = baseCounts.averageDeletionQualsOfBase(base);
|
||||
syntheticRead.add(base, count, qual, insQual, delQual, baseCounts.getRMS());
|
||||
}
|
||||
|
||||
|
|
@ -533,20 +533,24 @@ public class SlidingWindow {
|
|||
final int refStart = windowHeader.get(start).getLocation();
|
||||
final int refStop = windowHeader.get(stop).getLocation();
|
||||
|
||||
final ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<>();
|
||||
final ObjectList<GATKSAMRecord> toRemoveFromWindow = new ObjectArrayList<>();
|
||||
final ObjectList<GATKSAMRecord> toEmit = new ObjectArrayList<>();
|
||||
for ( final GATKSAMRecord read : readsInWindow ) {
|
||||
if ( read.getSoftStart() <= refStop ) {
|
||||
if ( read.getAlignmentEnd() >= refStart ) {
|
||||
allReads.reads.add(read);
|
||||
toEmit.add(read);
|
||||
removeFromHeader(windowHeader, read);
|
||||
}
|
||||
toRemove.add(read);
|
||||
toRemoveFromWindow.add(read);
|
||||
}
|
||||
}
|
||||
|
||||
// remove all used reads
|
||||
for ( final GATKSAMRecord read : toRemove )
|
||||
for ( final GATKSAMRecord read : toRemoveFromWindow )
|
||||
readsInWindow.remove(read);
|
||||
|
||||
// down-sample the unreduced reads if needed
|
||||
allReads.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(toEmit) : toEmit);
|
||||
}
|
||||
|
||||
return allReads;
|
||||
|
|
@ -632,12 +636,8 @@ public class SlidingWindow {
|
|||
@Ensures("result != null")
|
||||
protected CloseVariantRegionResult closeVariantRegion(final int start, final int stop, final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
||||
final CloseVariantRegionResult allReads = compressVariantRegion(start, stop, knownSnpPositions);
|
||||
|
||||
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
|
||||
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
|
||||
result.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1));
|
||||
|
||||
return result; // finalized reads will be downsampled if necessary
|
||||
allReads.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1));
|
||||
return allReads;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -46,9 +46,14 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -70,6 +75,8 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam";
|
||||
final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057";
|
||||
final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam";
|
||||
final String HIGH_COVERAGE_BAM = privateTestDir + "NA20313.highCoverageRegion.bam";
|
||||
final String HIGH_COVERAGE_L = " -L 1:1650830-1650870";
|
||||
final String BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM = privateTestDir + "bothEndsOfPairInVariantRegion.bam";
|
||||
final String INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM = privateTestDir + "rr-too-many-insertions.bam";
|
||||
|
||||
|
|
@ -221,13 +228,13 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testCoReduction() {
|
||||
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("2fdc77ff5139f62db9697427b559f866")));
|
||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("58c2bae5a339af2ea3c22a46ce8faa68")));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testCoReductionWithKnowns() {
|
||||
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
|
||||
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("6db7fca364ba64f7db9510b412d731f0")));
|
||||
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5c251932b49d99a810581e3a6f762878")));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
@ -258,7 +265,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
public void testDivideByZero() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
||||
// we expect to lose coverage due to the downsampling so don't run the systematic tests
|
||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("82758efda419011642cb468809a50bf9")));
|
||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7dfe2647992ce1154db340fc742d523a")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -299,5 +306,42 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
String cmd = "-T ReduceReads -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null";
|
||||
executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, UserException.BadInput.class));
|
||||
}
|
||||
|
||||
/**
|
||||
* Confirm that compression is not capping coverage counts to max byte
|
||||
*/
|
||||
@Test(enabled = true)
|
||||
public void testCompressionWorksForHighDepth() {
|
||||
final String base = String.format("-T ReduceReads -npt -R %s -I %s %s", b37KGReference, HIGH_COVERAGE_BAM, HIGH_COVERAGE_L) + " -o %s";
|
||||
final File outputBam = executeTestWithoutAdditionalRRTests("testCompressionWorksForHighDepth",
|
||||
new WalkerTestSpec(base, 1, Arrays.asList(""))).first.get(0); // No MD5s; we only want to check the coverage
|
||||
|
||||
boolean sawHighCoveragePosition = false;
|
||||
final SAMFileReader reader = new SAMFileReader(outputBam);
|
||||
reader.setSAMRecordFactory(new GATKSamRecordFactory());
|
||||
|
||||
for ( final SAMRecord rawRead : reader ) {
|
||||
final GATKSAMRecord read = (GATKSAMRecord)rawRead;
|
||||
read.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, rawRead.getByteArrayAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG));
|
||||
|
||||
if ( ! read.isReducedRead() )
|
||||
continue;
|
||||
|
||||
final int[] decodedCounts = read.getReducedReadCounts();
|
||||
for ( final int count : decodedCounts ) {
|
||||
if ( count > Byte.MAX_VALUE ) {
|
||||
sawHighCoveragePosition = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( sawHighCoveragePosition )
|
||||
break;
|
||||
}
|
||||
|
||||
reader.close();
|
||||
|
||||
Assert.assertTrue(sawHighCoveragePosition, "No positions were found with coverage over max byte (127); the coverage is incorrectly being capped somewhere!");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -619,6 +619,76 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size()));
|
||||
}
|
||||
|
||||
@DataProvider(name = "DownsamplingFromClose")
|
||||
public Object[][] createDownsamplingFromCloseTestData() {
|
||||
|
||||
final ObjectList<GATKSAMRecord> myReads = new ObjectArrayList<>(20);
|
||||
for ( int i = 0; i < 21; i++ ) {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read" + i, 0, globalStartPosition, readLength);
|
||||
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
|
||||
if ( i < 5 )
|
||||
bases[50] = 'C';
|
||||
read.setReadBases(bases);
|
||||
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||
read.setMappingQuality(30);
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
myReads.add(read);
|
||||
}
|
||||
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
for ( int i = 1; i < 25; i++ )
|
||||
tests.add(new Object[]{myReads, i});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "DownsamplingFromClose", enabled = true)
|
||||
public void testDownsamplingTestFromClose(final ObjectList<GATKSAMRecord> myReads, final int dcov) {
|
||||
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : myReads )
|
||||
slidingWindow.addRead(read);
|
||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet<GenomeLoc>()); // no het compression
|
||||
|
||||
Assert.assertEquals(result.getFirst().size(), Math.min(dcov, myReads.size()), "Down-sampling was not performed correctly");
|
||||
}
|
||||
|
||||
@DataProvider(name = "NoDownsamplingForConsensusReads")
|
||||
public Object[][] createNoDownsamplingForConsensusReadsData() {
|
||||
|
||||
final ObjectList<GATKSAMRecord> myReads = new ObjectArrayList<>(20);
|
||||
for ( int i = 0; i < 30; i++ ) {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read" + i, 0, globalStartPosition, readLength);
|
||||
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
|
||||
if ( i < 10 )
|
||||
bases[50] = 'C';
|
||||
read.setReadBases(bases);
|
||||
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||
read.setMappingQuality(30);
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
read.setReadNegativeStrandFlag(i % 2 == 0);
|
||||
myReads.add(read);
|
||||
}
|
||||
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
for ( int i = 0; i < 5; i++ )
|
||||
tests.add(new Object[]{myReads, i});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "NoDownsamplingForConsensusReads", enabled = true)
|
||||
public void testNoDownsamplingForConsensusReads(final ObjectList<GATKSAMRecord> myReads, final int dcov) {
|
||||
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : myReads )
|
||||
slidingWindow.addRead(read);
|
||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(null); // allow het compression (so we expect 4 reads)
|
||||
|
||||
Assert.assertEquals(result.getFirst().size(), 4, "Down-sampling was performed on consensus reads!");
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//// This section tests the consensus base quals accuracy ////
|
||||
|
|
|
|||
Loading…
Reference in New Issue