LIBS bugfix: kept reads now only (correctly) includes reads that at least passed the reservoir
-- Added unit tests to ensure this behavior is correct
This commit is contained in:
parent
83fcc06e28
commit
19288b007d
|
|
@ -195,7 +195,15 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
|
||||||
|
|
||||||
for (final String sample : samples) {
|
for (final String sample : samples) {
|
||||||
final Collection<GATKSAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
|
final Collection<GATKSAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
|
||||||
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
|
|
||||||
|
// // if we're keeping reads, take the (potentially downsampled) list of new reads for this sample
|
||||||
|
// // and add to the list of reads. Note this may reorder the list of reads someone (it groups them
|
||||||
|
// // by sample, but it cannot change their absolute position on the genome as they all must
|
||||||
|
// // start at the current location
|
||||||
|
if ( keepSubmittedReads )
|
||||||
|
submittedReads.addAll(newReads);
|
||||||
|
|
||||||
|
final PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
|
||||||
addReadsToSample(statesBySample, newReads);
|
addReadsToSample(statesBySample, newReads);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -208,8 +216,6 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
|
||||||
*/
|
*/
|
||||||
@Requires("read != null")
|
@Requires("read != null")
|
||||||
protected void submitRead(final GATKSAMRecord read) {
|
protected void submitRead(final GATKSAMRecord read) {
|
||||||
if ( keepSubmittedReads )
|
|
||||||
submittedReads.add(read);
|
|
||||||
samplePartitioner.submitRead(read);
|
samplePartitioner.submitRead(read);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.ReadProperties;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
@ -350,7 +351,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
// Arrays.asList(3));
|
// Arrays.asList(3));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "LIBSTest")
|
@Test(enabled = true && ! DEBUG, dataProvider = "LIBSTest")
|
||||||
public void testLIBS(LIBSTest params) {
|
public void testLIBS(LIBSTest params) {
|
||||||
// create the iterator by state with the fake reads and fake records
|
// create the iterator by state with the fake reads and fake records
|
||||||
final GATKSAMRecord read = params.makeRead();
|
final GATKSAMRecord read = params.makeRead();
|
||||||
|
|
@ -406,22 +407,25 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
//
|
//
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
|
|
||||||
@DataProvider(name = "LIBSKeepSubmittedReads")
|
@DataProvider(name = "LIBS_ComplexPileupTests")
|
||||||
public Object[][] makeLIBSKeepSubmittedReads() {
|
public Object[][] makeLIBS_ComplexPileupTests() {
|
||||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||||
|
|
||||||
for ( final boolean doSampling : Arrays.asList(true, false) ) {
|
for ( final int downsampleTo : Arrays.asList(-1, 1, 2, 5, 10, 30)) {
|
||||||
for ( final int nReadsPerLocus : Arrays.asList(1, 10) ) {
|
for ( final int nReadsPerLocus : Arrays.asList(1, 10, 60) ) {
|
||||||
for ( final int nLoci : Arrays.asList(1, 10, 25) ) {
|
for ( final int nLoci : Arrays.asList(1, 10, 25) ) {
|
||||||
for ( final int nSamples : Arrays.asList(1, 2, 10) ) {
|
for ( final int nSamples : Arrays.asList(1, 2, 10) ) {
|
||||||
for ( final boolean keepReads : Arrays.asList(true, false) ) {
|
for ( final boolean keepReads : Arrays.asList(true, false) ) {
|
||||||
for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true, false) ) {
|
for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true, false) ) {
|
||||||
// for ( final int nReadsPerLocus : Arrays.asList(1) ) {
|
// for ( final int downsampleTo : Arrays.asList(1)) {
|
||||||
// for ( final int nLoci : Arrays.asList(1) ) {
|
// for ( final int nReadsPerLocus : Arrays.asList(10) ) {
|
||||||
// for ( final int nSamples : Arrays.asList(1) ) {
|
// for ( final int nLoci : Arrays.asList(25) ) {
|
||||||
// for ( final boolean keepReads : Arrays.asList(true) ) {
|
// for ( final int nSamples : Arrays.asList(1) ) {
|
||||||
// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true) ) {
|
// for ( final boolean keepReads : Arrays.asList(true) ) {
|
||||||
tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, doSampling});
|
// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true) ) {
|
||||||
|
tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples,
|
||||||
|
keepReads, grabReadsAfterEachCycle,
|
||||||
|
downsampleTo});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -432,14 +436,15 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true && ! DEBUG, dataProvider = "LIBSKeepSubmittedReads")
|
//@Test(enabled = true && ! DEBUG, dataProvider = "LIBS_ComplexPileupTests")
|
||||||
public void testLIBSKeepSubmittedReads(final int nReadsPerLocus,
|
@Test(enabled = true && ! DEBUG, dataProvider = "LIBS_ComplexPileupTests")
|
||||||
final int nLoci,
|
public void testLIBS_ComplexPileupTests(final int nReadsPerLocus,
|
||||||
final int nSamples,
|
final int nLoci,
|
||||||
final boolean keepReads,
|
final int nSamples,
|
||||||
final boolean grabReadsAfterEachCycle,
|
final boolean keepReads,
|
||||||
final boolean downsample) {
|
final boolean grabReadsAfterEachCycle,
|
||||||
logger.warn(String.format("testLIBSKeepSubmittedReads %d %d %d %b %b %b", nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, downsample));
|
final int downsampleTo) {
|
||||||
|
//logger.warn(String.format("testLIBSKeepSubmittedReads %d %d %d %b %b %b", nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, downsample));
|
||||||
final int readLength = 10;
|
final int readLength = 10;
|
||||||
|
|
||||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100000);
|
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100000);
|
||||||
|
|
@ -453,10 +458,9 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
header.addReadGroup(rg);
|
header.addReadGroup(rg);
|
||||||
}
|
}
|
||||||
|
|
||||||
final int maxCoveragePerSampleAtLocus = nReadsPerLocus * readLength / 2;
|
final boolean downsample = downsampleTo != -1;
|
||||||
final int maxDownsampledCoverage = Math.max(maxCoveragePerSampleAtLocus / 2, 1);
|
|
||||||
final DownsamplingMethod downsampler = downsample
|
final DownsamplingMethod downsampler = downsample
|
||||||
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, maxDownsampledCoverage, null, false)
|
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null, false)
|
||||||
: new DownsamplingMethod(DownsampleType.NONE, null, null, false);
|
: new DownsamplingMethod(DownsampleType.NONE, null, null, false);
|
||||||
final List<GATKSAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);
|
final List<GATKSAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);
|
||||||
li = new LocusIteratorByState(new FakeCloseableIterator<GATKSAMRecord>(reads.iterator()),
|
li = new LocusIteratorByState(new FakeCloseableIterator<GATKSAMRecord>(reads.iterator()),
|
||||||
|
|
@ -472,6 +476,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
final AlignmentContext alignmentContext = li.next();
|
final AlignmentContext alignmentContext = li.next();
|
||||||
final ReadBackedPileup p = alignmentContext.getBasePileup();
|
final ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||||
|
|
||||||
|
AssertWellOrderedPileup(p);
|
||||||
|
|
||||||
if ( downsample ) {
|
if ( downsample ) {
|
||||||
// just not a safe test
|
// just not a safe test
|
||||||
//Assert.assertTrue(p.getNumberOfElements() <= maxDownsampledCoverage * nSamples, "Too many reads at locus after downsampling");
|
//Assert.assertTrue(p.getNumberOfElements() <= maxDownsampledCoverage * nSamples, "Too many reads at locus after downsampling");
|
||||||
|
|
@ -480,22 +486,29 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
Assert.assertTrue(p.getNumberOfElements() >= minPileupSize);
|
Assert.assertTrue(p.getNumberOfElements() >= minPileupSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// the number of reads starting here
|
||||||
|
int nReadsStartingHere = 0;
|
||||||
|
for ( final GATKSAMRecord read : p.getReads() )
|
||||||
|
if ( read.getAlignmentStart() == alignmentContext.getPosition() )
|
||||||
|
nReadsStartingHere++;
|
||||||
|
|
||||||
|
// we can have no more than maxDownsampledCoverage per sample
|
||||||
|
final int maxCoveragePerLocus = downsample ? downsampleTo : nReadsPerLocus;
|
||||||
|
Assert.assertTrue(nReadsStartingHere <= maxCoveragePerLocus * nSamples);
|
||||||
|
|
||||||
seenSoFar.addAll(p.getReads());
|
seenSoFar.addAll(p.getReads());
|
||||||
if ( keepReads && grabReadsAfterEachCycle ) {
|
if ( keepReads && grabReadsAfterEachCycle ) {
|
||||||
final List<GATKSAMRecord> locusReads = li.transferReadsFromAllPreviousPileups();
|
final List<GATKSAMRecord> locusReads = li.transferReadsFromAllPreviousPileups();
|
||||||
|
|
||||||
// the number of reads starting here
|
|
||||||
int nReadsStartingHere = 0;
|
|
||||||
for ( final GATKSAMRecord read : p.getReads() )
|
|
||||||
if ( read.getAlignmentStart() == alignmentContext.getPosition() )
|
|
||||||
nReadsStartingHere++;
|
|
||||||
|
|
||||||
if ( downsample )
|
if ( downsample ) {
|
||||||
// with downsampling we might have some reads here that were downsampled away
|
// with downsampling we might have some reads here that were downsampled away
|
||||||
// in the pileup
|
// in the pileup. We want to ensure that no more than the max coverage per sample is added
|
||||||
Assert.assertTrue(locusReads.size() >= nReadsStartingHere);
|
Assert.assertTrue(locusReads.size() >= nReadsStartingHere);
|
||||||
else
|
Assert.assertTrue(locusReads.size() <= maxCoveragePerLocus * nSamples);
|
||||||
|
} else {
|
||||||
Assert.assertEquals(locusReads.size(), nReadsStartingHere);
|
Assert.assertEquals(locusReads.size(), nReadsStartingHere);
|
||||||
|
}
|
||||||
keptReads.addAll(locusReads);
|
keptReads.addAll(locusReads);
|
||||||
|
|
||||||
// check that all reads we've seen so far are in our keptReads
|
// check that all reads we've seen so far are in our keptReads
|
||||||
|
|
@ -543,6 +556,26 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
for ( final GATKSAMRecord read : seenSoFar ) {
|
for ( final GATKSAMRecord read : seenSoFar ) {
|
||||||
Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
|
Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( ! downsample ) {
|
||||||
|
// check that every read in the list of keep reads occurred at least once in one of the pileups
|
||||||
|
for ( final GATKSAMRecord keptRead : keptReads ) {
|
||||||
|
Assert.assertTrue(seenSoFar.contains(keptRead), "There's a read " + keptRead + " in our keptReads list that never appeared in any pileup");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void AssertWellOrderedPileup(final ReadBackedPileup pileup) {
|
||||||
|
if ( ! pileup.isEmpty() ) {
|
||||||
|
int leftMostPos = -1;
|
||||||
|
|
||||||
|
for ( final PileupElement pe : pileup ) {
|
||||||
|
Assert.assertTrue(pileup.getLocation().getContig().equals(pe.getRead().getReferenceName()), "ReadBackedPileup contains an element " + pe + " that's on a different contig than the pileup itself");
|
||||||
|
Assert.assertTrue(pe.getRead().getAlignmentStart() >= leftMostPos,
|
||||||
|
"ReadBackedPileup contains an element " + pe + " whose read's alignment start " + pe.getRead().getAlignmentStart()
|
||||||
|
+ " occurs before the leftmost position we've seen previously " + leftMostPos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue