LIBS bugfix: kept reads now only (correctly) includes reads that at least passed the reservoir
-- Added unit tests to ensure this behavior is correct
This commit is contained in:
parent
83fcc06e28
commit
19288b007d
|
|
@ -195,7 +195,15 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
|
|||
|
||||
for (final String sample : samples) {
|
||||
final Collection<GATKSAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
|
||||
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
|
||||
|
||||
// // if we're keeping reads, take the (potentially downsampled) list of new reads for this sample
|
||||
// // and add to the list of reads. Note this may reorder the list of reads someone (it groups them
|
||||
// // by sample, but it cannot change their absolute position on the genome as they all must
|
||||
// // start at the current location
|
||||
if ( keepSubmittedReads )
|
||||
submittedReads.addAll(newReads);
|
||||
|
||||
final PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
|
||||
addReadsToSample(statesBySample, newReads);
|
||||
}
|
||||
|
||||
|
|
@ -208,8 +216,6 @@ final class ReadStateManager implements Iterable<Map.Entry<String, ReadStateMana
|
|||
*/
|
||||
@Requires("read != null")
|
||||
protected void submitRead(final GATKSAMRecord read) {
|
||||
if ( keepSubmittedReads )
|
||||
submittedReads.add(read);
|
||||
samplePartitioner.submitRead(read);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.ReadProperties;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
|
@ -350,7 +351,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
// Arrays.asList(3));
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "LIBSTest")
|
||||
@Test(enabled = true && ! DEBUG, dataProvider = "LIBSTest")
|
||||
public void testLIBS(LIBSTest params) {
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
final GATKSAMRecord read = params.makeRead();
|
||||
|
|
@ -406,22 +407,25 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
//
|
||||
// ------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "LIBSKeepSubmittedReads")
|
||||
public Object[][] makeLIBSKeepSubmittedReads() {
|
||||
@DataProvider(name = "LIBS_ComplexPileupTests")
|
||||
public Object[][] makeLIBS_ComplexPileupTests() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
for ( final boolean doSampling : Arrays.asList(true, false) ) {
|
||||
for ( final int nReadsPerLocus : Arrays.asList(1, 10) ) {
|
||||
for ( final int downsampleTo : Arrays.asList(-1, 1, 2, 5, 10, 30)) {
|
||||
for ( final int nReadsPerLocus : Arrays.asList(1, 10, 60) ) {
|
||||
for ( final int nLoci : Arrays.asList(1, 10, 25) ) {
|
||||
for ( final int nSamples : Arrays.asList(1, 2, 10) ) {
|
||||
for ( final boolean keepReads : Arrays.asList(true, false) ) {
|
||||
for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true, false) ) {
|
||||
// for ( final int nReadsPerLocus : Arrays.asList(1) ) {
|
||||
// for ( final int nLoci : Arrays.asList(1) ) {
|
||||
// for ( final int nSamples : Arrays.asList(1) ) {
|
||||
// for ( final boolean keepReads : Arrays.asList(true) ) {
|
||||
// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true) ) {
|
||||
tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, doSampling});
|
||||
// for ( final int downsampleTo : Arrays.asList(1)) {
|
||||
// for ( final int nReadsPerLocus : Arrays.asList(10) ) {
|
||||
// for ( final int nLoci : Arrays.asList(25) ) {
|
||||
// for ( final int nSamples : Arrays.asList(1) ) {
|
||||
// for ( final boolean keepReads : Arrays.asList(true) ) {
|
||||
// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true) ) {
|
||||
tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples,
|
||||
keepReads, grabReadsAfterEachCycle,
|
||||
downsampleTo});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -432,14 +436,15 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true && ! DEBUG, dataProvider = "LIBSKeepSubmittedReads")
|
||||
public void testLIBSKeepSubmittedReads(final int nReadsPerLocus,
|
||||
final int nLoci,
|
||||
final int nSamples,
|
||||
final boolean keepReads,
|
||||
final boolean grabReadsAfterEachCycle,
|
||||
final boolean downsample) {
|
||||
logger.warn(String.format("testLIBSKeepSubmittedReads %d %d %d %b %b %b", nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, downsample));
|
||||
//@Test(enabled = true && ! DEBUG, dataProvider = "LIBS_ComplexPileupTests")
|
||||
@Test(enabled = true && ! DEBUG, dataProvider = "LIBS_ComplexPileupTests")
|
||||
public void testLIBS_ComplexPileupTests(final int nReadsPerLocus,
|
||||
final int nLoci,
|
||||
final int nSamples,
|
||||
final boolean keepReads,
|
||||
final boolean grabReadsAfterEachCycle,
|
||||
final int downsampleTo) {
|
||||
//logger.warn(String.format("testLIBSKeepSubmittedReads %d %d %d %b %b %b", nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, downsample));
|
||||
final int readLength = 10;
|
||||
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100000);
|
||||
|
|
@ -453,10 +458,9 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
header.addReadGroup(rg);
|
||||
}
|
||||
|
||||
final int maxCoveragePerSampleAtLocus = nReadsPerLocus * readLength / 2;
|
||||
final int maxDownsampledCoverage = Math.max(maxCoveragePerSampleAtLocus / 2, 1);
|
||||
final boolean downsample = downsampleTo != -1;
|
||||
final DownsamplingMethod downsampler = downsample
|
||||
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, maxDownsampledCoverage, null, false)
|
||||
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null, false)
|
||||
: new DownsamplingMethod(DownsampleType.NONE, null, null, false);
|
||||
final List<GATKSAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<GATKSAMRecord>(reads.iterator()),
|
||||
|
|
@ -472,6 +476,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
final AlignmentContext alignmentContext = li.next();
|
||||
final ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
|
||||
AssertWellOrderedPileup(p);
|
||||
|
||||
if ( downsample ) {
|
||||
// just not a safe test
|
||||
//Assert.assertTrue(p.getNumberOfElements() <= maxDownsampledCoverage * nSamples, "Too many reads at locus after downsampling");
|
||||
|
|
@ -480,22 +486,29 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
Assert.assertTrue(p.getNumberOfElements() >= minPileupSize);
|
||||
}
|
||||
|
||||
// the number of reads starting here
|
||||
int nReadsStartingHere = 0;
|
||||
for ( final GATKSAMRecord read : p.getReads() )
|
||||
if ( read.getAlignmentStart() == alignmentContext.getPosition() )
|
||||
nReadsStartingHere++;
|
||||
|
||||
// we can have no more than maxDownsampledCoverage per sample
|
||||
final int maxCoveragePerLocus = downsample ? downsampleTo : nReadsPerLocus;
|
||||
Assert.assertTrue(nReadsStartingHere <= maxCoveragePerLocus * nSamples);
|
||||
|
||||
seenSoFar.addAll(p.getReads());
|
||||
if ( keepReads && grabReadsAfterEachCycle ) {
|
||||
final List<GATKSAMRecord> locusReads = li.transferReadsFromAllPreviousPileups();
|
||||
|
||||
// the number of reads starting here
|
||||
int nReadsStartingHere = 0;
|
||||
for ( final GATKSAMRecord read : p.getReads() )
|
||||
if ( read.getAlignmentStart() == alignmentContext.getPosition() )
|
||||
nReadsStartingHere++;
|
||||
|
||||
if ( downsample )
|
||||
if ( downsample ) {
|
||||
// with downsampling we might have some reads here that were downsampled away
|
||||
// in the pileup
|
||||
// in the pileup. We want to ensure that no more than the max coverage per sample is added
|
||||
Assert.assertTrue(locusReads.size() >= nReadsStartingHere);
|
||||
else
|
||||
Assert.assertTrue(locusReads.size() <= maxCoveragePerLocus * nSamples);
|
||||
} else {
|
||||
Assert.assertEquals(locusReads.size(), nReadsStartingHere);
|
||||
}
|
||||
keptReads.addAll(locusReads);
|
||||
|
||||
// check that all reads we've seen so far are in our keptReads
|
||||
|
|
@ -543,6 +556,26 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
for ( final GATKSAMRecord read : seenSoFar ) {
|
||||
Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
|
||||
}
|
||||
|
||||
if ( ! downsample ) {
|
||||
// check that every read in the list of keep reads occurred at least once in one of the pileups
|
||||
for ( final GATKSAMRecord keptRead : keptReads ) {
|
||||
Assert.assertTrue(seenSoFar.contains(keptRead), "There's a read " + keptRead + " in our keptReads list that never appeared in any pileup");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void AssertWellOrderedPileup(final ReadBackedPileup pileup) {
|
||||
if ( ! pileup.isEmpty() ) {
|
||||
int leftMostPos = -1;
|
||||
|
||||
for ( final PileupElement pe : pileup ) {
|
||||
Assert.assertTrue(pileup.getLocation().getContig().equals(pe.getRead().getReferenceName()), "ReadBackedPileup contains an element " + pe + " that's on a different contig than the pileup itself");
|
||||
Assert.assertTrue(pe.getRead().getAlignmentStart() >= leftMostPos,
|
||||
"ReadBackedPileup contains an element " + pe + " whose read's alignment start " + pe.getRead().getAlignmentStart()
|
||||
+ " occurs before the leftmost position we've seen previously " + leftMostPos);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue