LIBS can now (optionally) track the unique reads it uses from the underlying read iterator
-- This capability is essential to provide an ordered set of used reads to downstream users of LIBS, such as ART, who want an efficient way to get the reads used in LIBS -- Vastly expanded the multi-read, multi-sample LIBS unit tests to make sure this capability is working -- Added createReadStream to ArtificialSAMUtils that makes it relatively easy to create multi-read, multi-sample read streams for testing
This commit is contained in:
parent
b3ecfbfce8
commit
0ac4352614
|
|
@ -52,6 +52,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
|||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
|
|
@ -842,6 +843,8 @@ public class GenomeAnalysisEngine {
|
|||
if (argCollection.keepProgramRecords)
|
||||
removeProgramRecords = false;
|
||||
|
||||
final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker && TraverseActiveRegions.KEEP_READS_IN_LIBS;
|
||||
|
||||
return new SAMDataSource(
|
||||
samReaderIDs,
|
||||
threadAllocation,
|
||||
|
|
@ -856,7 +859,8 @@ public class GenomeAnalysisEngine {
|
|||
readTransformers,
|
||||
includeReadsWithDeletionAtLoci(),
|
||||
argCollection.defaultBaseQualities,
|
||||
removeProgramRecords);
|
||||
removeProgramRecords,
|
||||
keepReadsInLIBS);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ public class ReadProperties {
|
|||
private final ValidationExclusion exclusionList;
|
||||
private final Collection<ReadFilter> supplementalFilters;
|
||||
private final List<ReadTransformer> readTransformers;
|
||||
private final boolean keepUniqueReadListInLIBS;
|
||||
private final boolean includeReadsWithDeletionAtLoci;
|
||||
private final boolean useOriginalBaseQualities;
|
||||
private final byte defaultBaseQualities;
|
||||
|
|
@ -74,6 +75,10 @@ public class ReadProperties {
|
|||
return includeReadsWithDeletionAtLoci;
|
||||
}
|
||||
|
||||
public boolean keepUniqueReadListInLIBS() {
|
||||
return keepUniqueReadListInLIBS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a list of the files acting as sources of reads.
|
||||
* @return A list of files storing reads data.
|
||||
|
|
@ -161,6 +166,8 @@ public class ReadProperties {
|
|||
* will explicitly list reads with deletion over the current reference base; otherwise, only observed
|
||||
* bases will be seen in the pileups, and the deletions will be skipped silently.
|
||||
* @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality.
|
||||
* @param keepUniqueReadListInLIBS If true, we will tell LocusIteratorByState to track the unique reads it sees
|
||||
* This is really useful for ActiveRegionTraversals
|
||||
*/
|
||||
public ReadProperties( Collection<SAMReaderID> samFiles,
|
||||
SAMFileHeader header,
|
||||
|
|
@ -172,7 +179,8 @@ public class ReadProperties {
|
|||
Collection<ReadFilter> supplementalFilters,
|
||||
List<ReadTransformer> readTransformers,
|
||||
boolean includeReadsWithDeletionAtLoci,
|
||||
byte defaultBaseQualities) {
|
||||
byte defaultBaseQualities,
|
||||
final boolean keepUniqueReadListInLIBS) {
|
||||
this.readers = samFiles;
|
||||
this.header = header;
|
||||
this.sortOrder = sortOrder;
|
||||
|
|
@ -184,5 +192,6 @@ public class ReadProperties {
|
|||
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
|
||||
this.useOriginalBaseQualities = useOriginalBaseQualities;
|
||||
this.defaultBaseQualities = defaultBaseQualities;
|
||||
this.keepUniqueReadListInLIBS = keepUniqueReadListInLIBS;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -158,6 +158,9 @@ public class SAMDataSource {
|
|||
|
||||
/**
|
||||
* Create a new SAM data source given the supplied read metadata.
|
||||
*
|
||||
* For testing purposes
|
||||
*
|
||||
* @param samFiles list of reads files.
|
||||
*/
|
||||
public SAMDataSource(Collection<SAMReaderID> samFiles, ThreadAllocation threadAllocation, Integer numFileHandles, GenomeLocParser genomeLocParser) {
|
||||
|
|
@ -177,6 +180,8 @@ public class SAMDataSource {
|
|||
|
||||
/**
|
||||
* See complete constructor. Does not enable BAQ by default.
|
||||
*
|
||||
* For testing purposes
|
||||
*/
|
||||
public SAMDataSource(
|
||||
Collection<SAMReaderID> samFiles,
|
||||
|
|
@ -203,6 +208,7 @@ public class SAMDataSource {
|
|||
Collections.<ReadTransformer>emptyList(),
|
||||
includeReadsWithDeletionAtLoci,
|
||||
(byte) -1,
|
||||
false,
|
||||
false);
|
||||
}
|
||||
|
||||
|
|
@ -219,6 +225,7 @@ public class SAMDataSource {
|
|||
* will explicitly list reads with deletion over the current reference base; otherwise, only observed
|
||||
* bases will be seen in the pileups, and the deletions will be skipped silently.
|
||||
* @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality.
|
||||
* @param keepReadsInLIBS should we keep a unique list of reads in LIBS?
|
||||
*/
|
||||
public SAMDataSource(
|
||||
Collection<SAMReaderID> samFiles,
|
||||
|
|
@ -234,7 +241,8 @@ public class SAMDataSource {
|
|||
List<ReadTransformer> readTransformers,
|
||||
boolean includeReadsWithDeletionAtLoci,
|
||||
byte defaultBaseQualities,
|
||||
boolean removeProgramRecords) {
|
||||
boolean removeProgramRecords,
|
||||
final boolean keepReadsInLIBS) {
|
||||
this.readMetrics = new ReadMetrics();
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
|
||||
|
|
@ -306,7 +314,8 @@ public class SAMDataSource {
|
|||
supplementalFilters,
|
||||
readTransformers,
|
||||
includeReadsWithDeletionAtLoci,
|
||||
defaultBaseQualities);
|
||||
defaultBaseQualities,
|
||||
keepReadsInLIBS);
|
||||
|
||||
// cache the read group id (original) -> read group id (merged)
|
||||
// and read group id (merged) -> read group id (original) mappings.
|
||||
|
|
|
|||
|
|
@ -51,6 +51,11 @@ import java.util.*;
|
|||
*/
|
||||
|
||||
public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,LocusShardDataProvider> {
|
||||
// TODO
|
||||
// TODO -- remove me when ART uses the LIBS traversal
|
||||
// TODO
|
||||
public static final boolean KEEP_READS_IN_LIBS = false;
|
||||
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
|
@ -63,7 +64,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
private final GenomeLocParser genomeLocParser;
|
||||
private final ArrayList<String> samples;
|
||||
private final ReadStateManager readStates;
|
||||
private final boolean keepSubmittedReads;
|
||||
private final boolean includeReadsWithDeletionAtLoci;
|
||||
|
||||
private AlignmentContext nextAlignmentContext;
|
||||
|
|
@ -82,19 +82,20 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
toDownsamplingInfo(readInformation),
|
||||
readInformation.includeReadsWithDeletionAtLoci(),
|
||||
genomeLocParser,
|
||||
samples);
|
||||
samples,
|
||||
readInformation.keepUniqueReadListInLIBS());
|
||||
}
|
||||
|
||||
protected LocusIteratorByState(final Iterator<SAMRecord> samIterator,
|
||||
final LIBSDownsamplingInfo downsamplingInfo,
|
||||
final boolean includeReadsWithDeletionAtLoci,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final Collection<String> samples) {
|
||||
final Collection<String> samples,
|
||||
final boolean maintainUniqueReadsList ) {
|
||||
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.samples = new ArrayList<String>(samples);
|
||||
this.keepSubmittedReads = false; // TODO -- HOOK UP SYSTEM
|
||||
this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, keepSubmittedReads);
|
||||
this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, maintainUniqueReadsList);
|
||||
|
||||
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
|
||||
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
|
||||
|
|
@ -237,6 +238,51 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// getting the list of reads
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Transfer current list of all unique reads that have ever been used in any pileup, clearing old list
|
||||
*
|
||||
* This list is guaranteed to only contain unique reads, even across calls to the this function. It is
|
||||
* literally the unique set of reads ever seen.
|
||||
*
|
||||
* The list occurs in the same order as they are encountered in the underlying iterator.
|
||||
*
|
||||
* Takes the maintained list of submitted reads, and transfers it to the caller of this
|
||||
* function. The old list of set to a new, cleanly allocated list so the caller officially
|
||||
* owns the list returned by this call. This is the only way to clear the tracking
|
||||
* of submitted reads, if enabled.
|
||||
*
|
||||
* The purpose of this function is allow users of LIBS to keep track of all of the reads pulled off the
|
||||
* underlying SAMRecord iterator and that appeared at any point in the list of SAMRecordAlignmentState for
|
||||
* any reads. This function is intended to allow users to efficiently reconstruct the unique set of reads
|
||||
* used across all pileups. This is necessary for LIBS to handle because attempting to do
|
||||
* so from the pileups coming out of LIBS is extremely expensive.
|
||||
*
|
||||
* This functionality is only available if LIBS was created with the argument to track the reads
|
||||
*
|
||||
* @throws UnsupportedOperationException if called when keepingSubmittedReads is false
|
||||
*
|
||||
* @return the current list
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public List<SAMRecord> transferReadsFromAllPreviousPileups() {
|
||||
return readStates.transferSubmittedReads();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the underlying list of tracked reads. For testing only
|
||||
* @return a non-null list
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
protected List<SAMRecord> getReadsFromAllPreviousPileups() {
|
||||
return readStates.getSubmittedReads();
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// utility functions
|
||||
|
|
|
|||
|
|
@ -206,7 +206,7 @@ class ReadStateManager {
|
|||
* interact with ReadStateManager in some way to make work unit
|
||||
* readsUsedInPileup = transferSubmittedReads)
|
||||
*
|
||||
* @throws UnsupportedOperationException if called when keepingSubmittedReads is false
|
||||
* @throws UnsupportedOperationException if called when keepSubmittedReads is false
|
||||
*
|
||||
* @return the current list of submitted reads
|
||||
*/
|
||||
|
|
@ -223,6 +223,14 @@ class ReadStateManager {
|
|||
return prevSubmittedReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Are we keeping submitted reads, or not?
|
||||
* @return true if we are keeping them, false otherwise
|
||||
*/
|
||||
public boolean isKeepingSubmittedReads() {
|
||||
return keepSubmittedReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain a pointer to the list of submitted reads.
|
||||
*
|
||||
|
|
@ -232,11 +240,11 @@ class ReadStateManager {
|
|||
*
|
||||
* For testing purposes only.
|
||||
*
|
||||
* Will always be empty if we are are not keepingSubmittedReads
|
||||
* Will always be empty if we are are not keepSubmittedReads
|
||||
*
|
||||
* @return a non-null list of reads that have been submitted to this ReadStateManager
|
||||
*/
|
||||
@Ensures({"result != null","keepingSubmittedReads || result.isEmpty()"})
|
||||
@Ensures({"result != null","keepSubmittedReads || result.isEmpty()"})
|
||||
protected List<SAMRecord> getSubmittedReads() {
|
||||
return submittedReads;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -335,16 +335,17 @@ public class ArtificialSAMUtils {
|
|||
* @return a collection of stackSize reads all sharing the above properties
|
||||
*/
|
||||
public static List<SAMRecord> createReadStream( final int nReadsPerLocus,
|
||||
final int nLoci,
|
||||
final SAMFileHeader header,
|
||||
final int alignmentStart,
|
||||
final int length ) {
|
||||
final String name = "readName";
|
||||
final int nLoci,
|
||||
final SAMFileHeader header,
|
||||
final int alignmentStart,
|
||||
final int length ) {
|
||||
final String baseName = "read";
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(nReadsPerLocus*nLoci);
|
||||
for ( int locus = 0; locus < nLoci; locus++ ) {
|
||||
for ( int readI = 0; readI < nReadsPerLocus; readI++ ) {
|
||||
for ( final SAMReadGroupRecord rg : header.getReadGroups() ) {
|
||||
final GATKSAMRecord read = createArtificialRead(header, name, 0, alignmentStart, length);
|
||||
final String readName = String.format("%s.%d.%d.%s", baseName, locus, readI, rg.getId());
|
||||
final GATKSAMRecord read = createArtificialRead(header, readName, 0, alignmentStart + locus, length);
|
||||
read.setReadGroup(new GATKSAMReadGroupRecord(rg));
|
||||
reads.add(read);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -80,7 +80,8 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
|
|||
Collections.<ReadFilter>emptyList(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
(byte)0);
|
||||
(byte)0,
|
||||
false);
|
||||
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
|
||||
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
|
||||
|
|
|
|||
|
|
@ -182,7 +182,8 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
(byte) -1,
|
||||
removeProgramRecords);
|
||||
removeProgramRecords,
|
||||
false);
|
||||
|
||||
List<SAMProgramRecord> dontRemoveProgramRecords = data.getHeader().getProgramRecords();
|
||||
assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
|
||||
|
|
@ -201,7 +202,8 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
(byte) -1,
|
||||
removeProgramRecords);
|
||||
removeProgramRecords,
|
||||
false);
|
||||
|
||||
List<SAMProgramRecord> doRemoveProgramRecords = data.getHeader().getProgramRecords();
|
||||
assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true");
|
||||
|
|
|
|||
|
|
@ -82,10 +82,10 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
|
|||
}
|
||||
|
||||
protected static ReadProperties createTestReadProperties() {
|
||||
return createTestReadProperties(null);
|
||||
return createTestReadProperties(null, false);
|
||||
}
|
||||
|
||||
protected static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) {
|
||||
protected static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod, final boolean keepReads ) {
|
||||
return new ReadProperties(
|
||||
Collections.<SAMReaderID>emptyList(),
|
||||
new SAMFileHeader(),
|
||||
|
|
@ -97,8 +97,8 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
|
|||
Collections.<ReadFilter>emptyList(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
(byte) -1
|
||||
);
|
||||
(byte) -1,
|
||||
keepReads);
|
||||
}
|
||||
|
||||
protected static class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@ package org.broadinstitute.sting.utils.locusiterator;
|
|||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -345,11 +347,20 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
public Object[][] makeLIBSKeepSubmittedReads() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
for ( final int nReadsPerLocus : Arrays.asList(1, 10) ) {
|
||||
for ( final int nLoci : Arrays.asList(1, 10, 100, 1000) ) {
|
||||
for ( final int nSamples : Arrays.asList(1, 2, 100) ) {
|
||||
for ( final boolean keepReads : Arrays.asList(true, false) ) {
|
||||
tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples, keepReads});
|
||||
for ( final boolean doSampling : Arrays.asList(true, false) ) {
|
||||
for ( final int nReadsPerLocus : Arrays.asList(1, 10) ) {
|
||||
for ( final int nLoci : Arrays.asList(1, 10, 25) ) {
|
||||
for ( final int nSamples : Arrays.asList(1, 2, 10) ) {
|
||||
for ( final boolean keepReads : Arrays.asList(true, false) ) {
|
||||
for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true, false) ) {
|
||||
// for ( final int nReadsPerLocus : Arrays.asList(1) ) {
|
||||
// for ( final int nLoci : Arrays.asList(10) ) {
|
||||
// for ( final int nSamples : Arrays.asList(1) ) {
|
||||
// for ( final boolean keepReads : Arrays.asList(true) ) {
|
||||
// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true) ) {
|
||||
tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, doSampling});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -358,27 +369,117 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = false, dataProvider = "LIBSKeepSubmittedReads")
|
||||
public void testLIBSKeepSubmittedReads(final int nReadsPerLocus, final int nLoci, final int nSamples, final boolean keepReads) {
|
||||
@Test(enabled = true, dataProvider = "LIBSKeepSubmittedReads")
|
||||
public void testLIBSKeepSubmittedReads(final int nReadsPerLocus,
|
||||
final int nLoci,
|
||||
final int nSamples,
|
||||
final boolean keepReads,
|
||||
final boolean grabReadsAfterEachCycle,
|
||||
final boolean downsample) {
|
||||
logger.warn(String.format("testLIBSKeepSubmittedReads %d %d %d %b %b %b", nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, downsample));
|
||||
final int readLength = 10;
|
||||
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100000);
|
||||
final List<String> samples = new ArrayList<String>(nSamples);
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("rg" + i);
|
||||
rg.setSample("sample" + i);
|
||||
final String sample = "sample" + i;
|
||||
samples.add(sample);
|
||||
rg.setSample(sample);
|
||||
rg.setPlatform(NGSPlatform.ILLUMINA.getDefaultPlatform());
|
||||
header.addReadGroup(rg);
|
||||
}
|
||||
|
||||
final int maxCoveragePerSampleAtLocus = nReadsPerLocus * readLength / 2;
|
||||
final int maxDownsampledCoverage = Math.max(maxCoveragePerSampleAtLocus / 2, 1);
|
||||
final DownsamplingMethod downsampler = downsample
|
||||
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, maxDownsampledCoverage, null, false)
|
||||
: new DownsamplingMethod(DownsampleType.NONE, null, null, false);
|
||||
final List<SAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
createTestReadProperties(downsampler, keepReads),
|
||||
genomeLocParser,
|
||||
samples);
|
||||
|
||||
final Set<SAMRecord> seenSoFar = new HashSet<SAMRecord>();
|
||||
final Set<SAMRecord> keptReads = new HashSet<SAMRecord>();
|
||||
int bpVisited = 0;
|
||||
while ( li.hasNext() ) {
|
||||
bpVisited++;
|
||||
final AlignmentContext alignmentContext = li.next();
|
||||
final ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
|
||||
if ( downsample ) {
|
||||
// just not a safe test
|
||||
//Assert.assertTrue(p.getNumberOfElements() <= maxDownsampledCoverage * nSamples, "Too many reads at locus after downsampling");
|
||||
} else {
|
||||
final int minPileupSize = nReadsPerLocus * nSamples;
|
||||
Assert.assertTrue(p.getNumberOfElements() >= minPileupSize);
|
||||
}
|
||||
|
||||
seenSoFar.addAll(p.getReads());
|
||||
if ( keepReads && grabReadsAfterEachCycle ) {
|
||||
final List<SAMRecord> locusReads = li.transferReadsFromAllPreviousPileups();
|
||||
|
||||
// the number of reads starting here
|
||||
int nReadsStartingHere = 0;
|
||||
for ( final SAMRecord read : p.getReads() )
|
||||
if ( read.getAlignmentStart() == alignmentContext.getPosition() )
|
||||
nReadsStartingHere++;
|
||||
|
||||
if ( downsample )
|
||||
// with downsampling we might have some reads here that were downsampled away
|
||||
// in the pileup
|
||||
Assert.assertTrue(locusReads.size() >= nReadsStartingHere);
|
||||
else
|
||||
Assert.assertEquals(locusReads.size(), nReadsStartingHere);
|
||||
keptReads.addAll(locusReads);
|
||||
|
||||
// check that all reads we've seen so far are in our keptReads
|
||||
for ( final SAMRecord read : seenSoFar ) {
|
||||
Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
|
||||
}
|
||||
}
|
||||
|
||||
if ( ! keepReads )
|
||||
Assert.assertTrue(li.getReadsFromAllPreviousPileups().isEmpty(), "Not keeping reads but the underlying list of reads isn't empty");
|
||||
}
|
||||
|
||||
final int expectedBpToVisit = nLoci + readLength;
|
||||
Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
|
||||
if ( keepReads && ! grabReadsAfterEachCycle )
|
||||
keptReads.addAll(li.transferReadsFromAllPreviousPileups());
|
||||
|
||||
if ( ! downsample ) { // downsampling may drop loci
|
||||
final int expectedBpToVisit = nLoci + readLength - 1;
|
||||
Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
|
||||
}
|
||||
|
||||
if ( keepReads ) {
|
||||
// check we have the right number of reads
|
||||
final int totalReads = nLoci * nReadsPerLocus * nSamples;
|
||||
if ( ! downsample ) { // downsampling may drop reads
|
||||
Assert.assertEquals(keptReads.size(), totalReads, "LIBS didn't keep the right number of reads during the traversal");
|
||||
|
||||
// check that the order of reads is the same as in our read list
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
final SAMRecord inputRead = reads.get(i);
|
||||
final SAMRecord keptRead = reads.get(i);
|
||||
Assert.assertSame(keptRead, inputRead, "Input reads and kept reads differ at position " + i);
|
||||
}
|
||||
} else {
|
||||
Assert.assertTrue(keptReads.size() <= totalReads, "LIBS didn't keep the right number of reads during the traversal");
|
||||
}
|
||||
|
||||
// check uniqueness
|
||||
final Set<String> readNames = new HashSet<String>();
|
||||
for ( final SAMRecord read : keptReads ) {
|
||||
Assert.assertFalse(readNames.contains(read.getReadName()), "Found duplicate reads in the kept reads");
|
||||
readNames.add(read.getReadName());
|
||||
}
|
||||
|
||||
// check that all reads we've seen are in our keptReads
|
||||
for ( final SAMRecord read : seenSoFar ) {
|
||||
Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,25 +25,10 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -79,14 +64,9 @@ public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest {
|
|||
public void run() {
|
||||
final List<String> samples = sampleListForSAMWithoutReadGroups();
|
||||
final Iterator<SAMRecord> iterator = new LinkedList<SAMRecord>().iterator();
|
||||
ReadStateManager readStateManager = new ReadStateManager(iterator, samples, LIBSDownsamplingInfo.NO_DOWNSAMPLING);
|
||||
ReadStateManager readStateManager = new ReadStateManager(iterator, samples, LIBSDownsamplingInfo.NO_DOWNSAMPLING, false);
|
||||
ReadStateManager.PerSampleReadStateManager perSampleReadStateManager = readStateManager.new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING);
|
||||
|
||||
// ReadStateManager readStateManager =
|
||||
// libs.new ReadStateManager(new ArrayList<SAMRecord>().iterator());
|
||||
// ReadStateManager.PerSampleReadStateManager perSampleReadStateManager =
|
||||
// readStateManager.new PerSampleReadStateManager();
|
||||
|
||||
makeReads();
|
||||
|
||||
for ( ArrayList<SAMRecordAlignmentState> stackRecordStates : recordStatesByAlignmentStart ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue