Checking in downsampling iterator alongside LocusIteratorByState, and removing
the reference implementation. Also implemented a heap size monitor that can be used to programmatically report the current heap size. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3367 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b7d21627ab
commit
0791beab8f
|
|
@ -138,6 +138,9 @@ public class GenomeAnalysisEngine {
|
||||||
* @return the value of this traversal.
|
* @return the value of this traversal.
|
||||||
*/
|
*/
|
||||||
public Object execute(GATKArgumentCollection args, Walker<?, ?> my_walker, Collection<SamRecordFilter> filters) {
|
public Object execute(GATKArgumentCollection args, Walker<?, ?> my_walker, Collection<SamRecordFilter> filters) {
|
||||||
|
//HeapSizeMonitor monitor = new HeapSizeMonitor();
|
||||||
|
//monitor.start();
|
||||||
|
|
||||||
// validate our parameters
|
// validate our parameters
|
||||||
if (args == null) {
|
if (args == null) {
|
||||||
throw new StingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null.");
|
throw new StingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null.");
|
||||||
|
|
@ -169,7 +172,12 @@ public class GenomeAnalysisEngine {
|
||||||
readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null);
|
readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null);
|
||||||
|
|
||||||
// execute the microscheduler, storing the results
|
// execute the microscheduler, storing the results
|
||||||
return microScheduler.execute(my_walker, shardStrategy, argCollection.maximumEngineIterations);
|
Object result = microScheduler.execute(my_walker, shardStrategy, argCollection.maximumEngineIterations);
|
||||||
|
|
||||||
|
//monitor.stop();
|
||||||
|
//logger.info(String.format("Maximum heap size consumed: %d",monitor.getMaxMemoryUsed()));
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -694,7 +702,7 @@ public class GenomeAnalysisEngine {
|
||||||
else
|
else
|
||||||
throw new StingException("The GATK cannot currently process unindexed BAM files");
|
throw new StingException("The GATK cannot currently process unindexed BAM files");
|
||||||
|
|
||||||
return new MonolithicShardStrategy(shardType);
|
return new (shardType);
|
||||||
}
|
}
|
||||||
|
|
||||||
ShardStrategy shardStrategy = null;
|
ShardStrategy shardStrategy = null;
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.sam.AlignmentStartComparator;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||||
|
|
@ -41,27 +40,20 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/** Iterator that traverses a SAM File, accumulating information on a per-locus basis */
|
/** Iterator that traverses a SAM File, accumulating information on a per-locus basis */
|
||||||
public class DownsamplingReferenceImplementation extends LocusIterator {
|
public class DownsamplingLocusIteratorByState extends LocusIterator {
|
||||||
// TODO: Reintegrate LocusOverflowTracker
|
|
||||||
|
|
||||||
/** our log, which we want to capture anything from this class */
|
/** our log, which we want to capture anything from this class */
|
||||||
private static Logger logger = Logger.getLogger(DownsamplingReferenceImplementation.class);
|
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
|
||||||
|
|
||||||
/**
|
|
||||||
* Store a random number generator with a consistent seed for consistent downsampling from run to run.
|
|
||||||
* Note that each shard will be initialized with the same random seed; this will ensure consistent results
|
|
||||||
* across parallelized runs, at the expense of decreasing our level of randomness.
|
|
||||||
*/
|
|
||||||
private Random downsampleRandomizer = new Random(38148309L);
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// member fields
|
// member fields
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
private final PeekableIterator<Collection<SAMRecord>> downsamplingIterator;
|
|
||||||
private boolean hasExtendedEvents = false; // will be set to true if at least one read had an indel right before the current position
|
private boolean hasExtendedEvents = false; // will be set to true if at least one read had an indel right before the current position
|
||||||
private Collection<String> sampleNames = new ArrayList<String>();
|
|
||||||
|
private final Collection<String> sampleNames = new ArrayList<String>();
|
||||||
|
private final ReadStateManager readStates;
|
||||||
|
|
||||||
private class SAMRecordState {
|
private class SAMRecordState {
|
||||||
SAMRecord read;
|
SAMRecord read;
|
||||||
|
|
@ -252,28 +244,24 @@ public class DownsamplingReferenceImplementation extends LocusIterator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private LinkedList<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
|
|
||||||
//final boolean DEBUG = false;
|
//final boolean DEBUG = false;
|
||||||
//final boolean DEBUG2 = false && DEBUG;
|
//final boolean DEBUG2 = false && DEBUG;
|
||||||
private Reads readInfo;
|
private Reads readInfo;
|
||||||
|
private AlignmentContext nextAlignmentContext;
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// constructors and other basic operations
|
// constructors and other basic operations
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
public DownsamplingReferenceImplementation(final Iterator<SAMRecord> samIterator, Reads readInformation) {
|
public DownsamplingLocusIteratorByState(final Iterator<SAMRecord> samIterator, Reads readInformation) {
|
||||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(samIterator,
|
|
||||||
new AlignmentStartComparator(),
|
|
||||||
new SampleNamePartitioner(),
|
|
||||||
readInformation.getMaxReadsAtLocus());
|
|
||||||
this.downsamplingIterator = new PeekableIterator<Collection<SAMRecord>>(downsampler);
|
|
||||||
this.readInfo = readInformation;
|
|
||||||
|
|
||||||
// Aggregate all sample names.
|
// Aggregate all sample names.
|
||||||
// TODO: Push in header via constructor
|
// TODO: Push in header via constructor
|
||||||
if(GenomeAnalysisEngine.instance.getDataSource() != null)
|
if(GenomeAnalysisEngine.instance.getDataSource() != null)
|
||||||
sampleNames.addAll(SampleUtils.getSAMFileSamples(GenomeAnalysisEngine.instance.getSAMFileHeader()));
|
sampleNames.addAll(SampleUtils.getSAMFileSamples(GenomeAnalysisEngine.instance.getSAMFileHeader()));
|
||||||
|
readStates = new ReadStateManager(samIterator,sampleNames,readInformation.getMaxReadsAtLocus());
|
||||||
|
this.readInfo = readInformation;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterator<AlignmentContext> iterator() {
|
public Iterator<AlignmentContext> iterator() {
|
||||||
|
|
@ -285,9 +273,9 @@ public class DownsamplingReferenceImplementation extends LocusIterator {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
boolean r = ! readStates.isEmpty() || downsamplingIterator.hasNext();
|
lazyLoadNextAlignmentContext();
|
||||||
|
boolean r = (nextAlignmentContext != null);
|
||||||
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
|
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -300,11 +288,6 @@ public class DownsamplingReferenceImplementation extends LocusIterator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void clear() {
|
|
||||||
logger.debug(String.format(("clear() called")));
|
|
||||||
readStates.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
private GenomeLoc getLocation() {
|
private GenomeLoc getLocation() {
|
||||||
return readStates.isEmpty() ? null : readStates.getFirst().getLocation();
|
return readStates.isEmpty() ? null : readStates.getFirst().getLocation();
|
||||||
}
|
}
|
||||||
|
|
@ -315,12 +298,20 @@ public class DownsamplingReferenceImplementation extends LocusIterator {
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
public AlignmentContext next() {
|
public AlignmentContext next() {
|
||||||
// keep iterating forward until we encounter a reference position that has something "real" hanging over it
|
lazyLoadNextAlignmentContext();
|
||||||
// (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
|
if(!hasNext())
|
||||||
|
throw new NoSuchElementException("LocusIteratorByState: out of elements.");
|
||||||
|
AlignmentContext currentAlignmentContext = nextAlignmentContext;
|
||||||
while(true) {
|
nextAlignmentContext = null;
|
||||||
|
return currentAlignmentContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates the next alignment context from the given state. Note that this is implemented as a lazy load method.
|
||||||
|
* nextAlignmentContext MUST BE null in order for this method to advance to the next entry.
|
||||||
|
*/
|
||||||
|
private void lazyLoadNextAlignmentContext() {
|
||||||
|
while(nextAlignmentContext == null && readStates.hasNext()) {
|
||||||
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
|
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
|
||||||
collectPendingReads(readInfo.getMaxReadsAtLocus());
|
collectPendingReads(readInfo.getMaxReadsAtLocus());
|
||||||
|
|
||||||
|
|
@ -335,7 +326,7 @@ public class DownsamplingReferenceImplementation extends LocusIterator {
|
||||||
// i.e. the one right *before* the indel) and do NOT shift the current position on the ref.
|
// i.e. the one right *before* the indel) and do NOT shift the current position on the ref.
|
||||||
// In this case, the subsequent call to next() will emit the normal pileup at the current base
|
// In this case, the subsequent call to next() will emit the normal pileup at the current base
|
||||||
// and shift the position.
|
// and shift the position.
|
||||||
if ( readInfo.generateExtendedEvents() && hasExtendedEvents ) {
|
if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
|
||||||
ArrayList<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size());
|
ArrayList<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size());
|
||||||
|
|
||||||
int maxDeletionLength = 0;
|
int maxDeletionLength = 0;
|
||||||
|
|
@ -382,12 +373,10 @@ public class DownsamplingReferenceImplementation extends LocusIterator {
|
||||||
GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1);
|
GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1);
|
||||||
// System.out.println("Indel(s) at "+loc);
|
// System.out.println("Indel(s) at "+loc);
|
||||||
// for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); }
|
// for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); }
|
||||||
return new AlignmentContext(loc, new ReadBackedExtendedEventPileup(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads));
|
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileup(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads));
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
||||||
|
|
||||||
|
|
||||||
// todo -- performance problem -- should be lazy, really
|
// todo -- performance problem -- should be lazy, really
|
||||||
for ( SAMRecordState state : readStates ) {
|
for ( SAMRecordState state : readStates ) {
|
||||||
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||||
|
|
@ -410,9 +399,8 @@ public class DownsamplingReferenceImplementation extends LocusIterator {
|
||||||
GenomeLoc loc = getLocation();
|
GenomeLoc loc = getLocation();
|
||||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||||
// if we got reads with non-D/N over the current position, we are done
|
// if we got reads with non-D/N over the current position, we are done
|
||||||
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions, nMQ0Reads));
|
if ( pile.size() != 0 ) nextAlignmentContext = new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions, nMQ0Reads));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -455,125 +443,10 @@ public class DownsamplingReferenceImplementation extends LocusIterator {
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
private void collectPendingReads(int maxReadsPerSample) {
|
private void collectPendingReads(int maxReads) {
|
||||||
if(maxReadsPerSample <= 0)
|
readStates.collectPendingReads();
|
||||||
throw new StingException("maxReadsPerSample is too low; it is " + maxReadsPerSample + ", but must be greater than 0");
|
|
||||||
|
|
||||||
while (downsamplingIterator.hasNext()) {
|
|
||||||
Collection<SAMRecord> reads = downsamplingIterator.peek();
|
|
||||||
if(!reads.isEmpty() && !readIsPastCurrentPosition(reads.iterator().next())) {
|
|
||||||
// Consume the collection of reads.
|
|
||||||
downsamplingIterator.next();
|
|
||||||
|
|
||||||
for(String sampleName: sampleNames) {
|
|
||||||
LinkedList<SAMRecord> newReads = getReadsForGivenSample(reads,sampleName);
|
|
||||||
LinkedList<SAMRecordState> existingReadStates = getReadStateForGivenSample(readStates,sampleName);
|
|
||||||
|
|
||||||
if(existingReadStates.size()+newReads.size() <= maxReadsPerSample) {
|
|
||||||
for(SAMRecord read: newReads) {
|
|
||||||
SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents());
|
|
||||||
state.stepForwardOnGenome();
|
|
||||||
readStates.add(state);
|
|
||||||
// TODO: What if we downsample the extended events away?
|
|
||||||
if (state.hadIndel()) hasExtendedEvents = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// If we've reached this point, the active list of read states needs to be pruned. Start by
|
|
||||||
// pruning one off each alignment start, working backward. Repeat until there's either < 1
|
|
||||||
// read available at any locus or
|
|
||||||
|
|
||||||
// readStatesAtAlignmentStart stores a full complement of reads starting at a given locus.
|
|
||||||
List<SAMRecordState> readStatesAtAlignmentStart = new ArrayList<SAMRecordState>();
|
|
||||||
List<SAMRecordState> readStatesToPrune = new LinkedList<SAMRecordState>();
|
|
||||||
|
|
||||||
while((existingReadStates.size()-readStatesToPrune.size()+newReads.size())>maxReadsPerSample) {
|
|
||||||
readStatesToPrune.clear();
|
|
||||||
Iterator<SAMRecordState> descendingIterator = existingReadStates.descendingIterator();
|
|
||||||
while(descendingIterator.hasNext()) {
|
|
||||||
// Accumulate all reads at a given alignment start.
|
|
||||||
SAMRecordState currentState = descendingIterator.next();
|
|
||||||
if(readStatesAtAlignmentStart.isEmpty() ||
|
|
||||||
readStatesAtAlignmentStart.get(0).getRead().getAlignmentStart()==currentState.getRead().getAlignmentStart())
|
|
||||||
readStatesAtAlignmentStart.add(currentState);
|
|
||||||
else {
|
|
||||||
if(readStatesAtAlignmentStart.size() > 1) {
|
|
||||||
SAMRecordState stateToRemove = readStatesAtAlignmentStart.get(downsampleRandomizer.nextInt(readStatesAtAlignmentStart.size()));
|
|
||||||
readStatesToPrune.add(stateToRemove);
|
|
||||||
if((existingReadStates.size()-readStatesToPrune.size()+newReads.size())<=maxReadsPerSample)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
readStatesAtAlignmentStart.clear();
|
|
||||||
readStatesAtAlignmentStart.add(currentState);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cleanup on last locus viewed.
|
|
||||||
if(readStatesAtAlignmentStart.size() > 1 && (existingReadStates.size()-readStatesToPrune.size()+newReads.size())>maxReadsPerSample) {
|
|
||||||
SAMRecordState stateToRemove = readStatesAtAlignmentStart.get(downsampleRandomizer.nextInt(readStatesAtAlignmentStart.size()));
|
|
||||||
readStatesToPrune.add(stateToRemove);
|
|
||||||
}
|
|
||||||
readStatesAtAlignmentStart.clear();
|
|
||||||
|
|
||||||
// Nothing left to prune. Break out to avoid infinite loop.
|
|
||||||
if(readStatesToPrune.isEmpty())
|
|
||||||
break;
|
|
||||||
|
|
||||||
// Get rid of all the chosen reads.
|
|
||||||
existingReadStates.removeAll(readStatesToPrune);
|
|
||||||
readStates.removeAll(readStatesToPrune);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Still no space available? Prune the leftmost read.
|
|
||||||
if(existingReadStates.size() >= maxReadsPerSample) {
|
|
||||||
SAMRecordState initialReadState = existingReadStates.remove();
|
|
||||||
readStates.remove(initialReadState);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fill from the list of new reads until we're either out of new reads or at capacity.
|
|
||||||
for(SAMRecord read: newReads) {
|
|
||||||
SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents());
|
|
||||||
state.stepForwardOnGenome();
|
|
||||||
existingReadStates.add(state);
|
|
||||||
readStates.add(state);
|
|
||||||
// TODO: What if we downsample the extended events away?
|
|
||||||
if (state.hadIndel()) hasExtendedEvents = true;
|
|
||||||
if(existingReadStates.size()>=maxReadsPerSample)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//if (DEBUG) logger.debug(String.format(" ... added read %s", read.getReadName()));
|
|
||||||
}
|
|
||||||
else if(readIsPastCurrentPosition(reads.iterator().next()))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private LinkedList<SAMRecord> getReadsForGivenSample(final Collection<SAMRecord> reads, final String sampleName) {
|
|
||||||
// TODO: What about files with no read groups? What about files with no samples?
|
|
||||||
LinkedList<SAMRecord> readsForGivenSample = new LinkedList<SAMRecord>();
|
|
||||||
for(SAMRecord read: reads) {
|
|
||||||
Object readSampleName = read.getReadGroup().getSample();
|
|
||||||
if(readSampleName != null && readSampleName.equals(sampleName))
|
|
||||||
readsForGivenSample.add(read);
|
|
||||||
}
|
|
||||||
return readsForGivenSample;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LinkedList<SAMRecordState> getReadStateForGivenSample(final Collection<SAMRecordState> readStates, final String sampleName) {
|
|
||||||
// TODO: What about files with no read groups? What about files with no samples?
|
|
||||||
LinkedList<SAMRecordState> readStatesForGivenSample = new LinkedList<SAMRecordState>();
|
|
||||||
for(SAMRecordState readState: readStates) {
|
|
||||||
Object readSampleName = readState.getRead().getReadGroup().getSample();
|
|
||||||
if(readSampleName != null && readSampleName.equals(sampleName))
|
|
||||||
readStatesForGivenSample.add(readState);
|
|
||||||
}
|
|
||||||
return readStatesForGivenSample;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// fast testing of position
|
// fast testing of position
|
||||||
private boolean readIsPastCurrentPosition(SAMRecord read) {
|
private boolean readIsPastCurrentPosition(SAMRecord read) {
|
||||||
if ( readStates.isEmpty() )
|
if ( readStates.isEmpty() )
|
||||||
|
|
@ -632,14 +505,245 @@ public class DownsamplingReferenceImplementation extends LocusIterator {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private class ReadStateManager implements Iterable<SAMRecordState> {
|
||||||
* Partitions a dataset by sample name.
|
private final PeekableIterator<SAMRecord> iterator;
|
||||||
*/
|
private final Map<String,ReservoirDownsampler<SAMRecord>> downsamplersBySampleName = new HashMap<String,ReservoirDownsampler<SAMRecord>>();
|
||||||
private class SampleNamePartitioner implements ReservoirDownsampler.Partitioner<SAMRecord> {
|
private final int maxReadsPerSample;
|
||||||
public Object partition(SAMRecord read) {
|
|
||||||
if(read.getReadGroup() != null && read.getReadGroup().getAttribute("SM") != null)
|
private final Deque<Map<String,List<SAMRecordState>>> readStatesByAlignmentStart;
|
||||||
return read.getReadGroup().getAttribute("SM");
|
|
||||||
return null;
|
/**
|
||||||
|
* Store a random number generator with a consistent seed for consistent downsampling from run to run.
|
||||||
|
* Note that each shard will be initialized with the same random seed; this will ensure consistent results
|
||||||
|
* across parallelized runs, at the expense of decreasing our level of randomness.
|
||||||
|
*/
|
||||||
|
private Random downsampleRandomizer = new Random(38148309L);
|
||||||
|
|
||||||
|
public ReadStateManager(Iterator<SAMRecord> source, Collection<String> sampleNames, int maxReadsPerSample) {
|
||||||
|
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||||
|
this.maxReadsPerSample = maxReadsPerSample;
|
||||||
|
for(String sampleName: sampleNames)
|
||||||
|
downsamplersBySampleName.put(sampleName,new ReservoirDownsampler<SAMRecord>(maxReadsPerSample));
|
||||||
|
this.readStatesByAlignmentStart = new LinkedList<Map<String,List<SAMRecordState>>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator<SAMRecordState> iterator() {
|
||||||
|
return new Iterator<SAMRecordState>() {
|
||||||
|
private final Iterator<Map<String,List<SAMRecordState>>> alignmentStartIterator;
|
||||||
|
private Iterator<List<SAMRecordState>> sampleIterator;
|
||||||
|
private Iterator<SAMRecordState> readStateIterator;
|
||||||
|
private SAMRecordState nextReadState;
|
||||||
|
private int readsInHanger = countReadsInHanger();
|
||||||
|
|
||||||
|
{
|
||||||
|
pruneEmptyElementsInHanger();
|
||||||
|
alignmentStartIterator = readStatesByAlignmentStart.iterator();
|
||||||
|
sampleIterator = alignmentStartIterator.hasNext() ? alignmentStartIterator.next().values().iterator() : null;
|
||||||
|
readStateIterator = (sampleIterator!=null && sampleIterator.hasNext()) ? sampleIterator.next().iterator() : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return readsInHanger > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecordState next() {
|
||||||
|
advance();
|
||||||
|
if(nextReadState==null) throw new NoSuchElementException("reader is out of elements");
|
||||||
|
try {
|
||||||
|
return nextReadState;
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
nextReadState = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
if(readStateIterator == null)
|
||||||
|
throw new StingException("Attempted to remove read, but no previous read was found.");
|
||||||
|
readStateIterator.remove();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void advance() {
|
||||||
|
nextReadState = null;
|
||||||
|
if(readStateIterator!=null && readStateIterator.hasNext())
|
||||||
|
nextReadState = readStateIterator.next();
|
||||||
|
else if(sampleIterator!=null && sampleIterator.hasNext()) {
|
||||||
|
readStateIterator = sampleIterator.next().iterator();
|
||||||
|
nextReadState = readStateIterator.hasNext() ? readStateIterator.next() : null;
|
||||||
|
}
|
||||||
|
else if(alignmentStartIterator!=null && alignmentStartIterator.hasNext()) {
|
||||||
|
sampleIterator = alignmentStartIterator.next().values().iterator();
|
||||||
|
readStateIterator = sampleIterator.hasNext() ? sampleIterator.next().iterator() : null;
|
||||||
|
nextReadState = (readStateIterator!=null && readStateIterator.hasNext()) ? readStateIterator.next() : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(nextReadState != null) readsInHanger--;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
pruneEmptyElementsInHanger();
|
||||||
|
return readStatesByAlignmentStart.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
int size = 0;
|
||||||
|
for(Map<String,List<SAMRecordState>> readStatesBySample: readStatesByAlignmentStart) {
|
||||||
|
for(Collection<SAMRecordState> readStates: readStatesBySample.values())
|
||||||
|
size += readStates.size();
|
||||||
|
}
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecordState getFirst() {
|
||||||
|
return iterator().next();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
pruneEmptyElementsInHanger();
|
||||||
|
return !readStatesByAlignmentStart.isEmpty() || iterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void collectPendingReads() {
|
||||||
|
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
|
||||||
|
SAMRecord read = iterator.next();
|
||||||
|
downsamplersBySampleName.get(read.getReadGroup().getSample()).add(read);
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String,List<SAMRecordState>> culledReadStatesBySample = new HashMap<String,List<SAMRecordState>>();
|
||||||
|
|
||||||
|
for(Map.Entry<String,ReservoirDownsampler<SAMRecord>> entry: downsamplersBySampleName.entrySet()) {
|
||||||
|
String sampleName = entry.getKey();
|
||||||
|
ReservoirDownsampler<SAMRecord> downsampler = entry.getValue();
|
||||||
|
|
||||||
|
Collection<SAMRecord> newReads = downsampler.getDownsampledContents();
|
||||||
|
downsampler.clear();
|
||||||
|
int readsInHanger = countReadsInHanger(sampleName);
|
||||||
|
|
||||||
|
if(readsInHanger+newReads.size() <= maxReadsPerSample)
|
||||||
|
addReadsToHanger(culledReadStatesBySample,sampleName,newReads,newReads.size());
|
||||||
|
else {
|
||||||
|
Iterator<Map<String,List<SAMRecordState>>> backIterator = readStatesByAlignmentStart.descendingIterator();
|
||||||
|
boolean readPruned = true;
|
||||||
|
while(readsInHanger+newReads.size()>maxReadsPerSample && readPruned) {
|
||||||
|
readPruned = false;
|
||||||
|
while(readsInHanger+newReads.size()>maxReadsPerSample && backIterator.hasNext()) {
|
||||||
|
List<SAMRecordState> readsAtLocus = backIterator.next().get(sampleName);
|
||||||
|
if(readsAtLocus.size() > 1) {
|
||||||
|
readsAtLocus.remove(downsampleRandomizer.nextInt(readsAtLocus.size()));
|
||||||
|
readPruned = true;
|
||||||
|
readsInHanger--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(readsInHanger == maxReadsPerSample) {
|
||||||
|
Collection<SAMRecordState> firstHangerForSample = readStatesByAlignmentStart.getFirst().get(sampleName);
|
||||||
|
readsInHanger -= firstHangerForSample.size();
|
||||||
|
firstHangerForSample.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
addReadsToHanger(culledReadStatesBySample,sampleName,newReads,maxReadsPerSample-readsInHanger);
|
||||||
|
}
|
||||||
|
|
||||||
|
readStatesByAlignmentStart.add(culledReadStatesBySample);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* else {
|
||||||
|
if() {
|
||||||
|
// Consume the collection of reads.
|
||||||
|
downsamplingIterator.next();
|
||||||
|
|
||||||
|
Map<String,Collection<SAMRecord>> newReadsBySample = new HashMap<String,Collection<SAMRecord>>();
|
||||||
|
Map<String,List<SAMRecordState>> culledReadStatesBySample = new HashMap<String,List<SAMRecordState>>();
|
||||||
|
|
||||||
|
for(String sampleName: sampleNames)
|
||||||
|
newReadsBySample.put(sampleName,getReadsForGivenSample(reads,sampleName));
|
||||||
|
|
||||||
|
for(String sampleName: newReadsBySample.keySet()) {
|
||||||
|
Collection<SAMRecord> newReads = newReadsBySample.get(sampleName);
|
||||||
|
int readsInHanger = countReadsInHanger(sampleName);
|
||||||
|
|
||||||
|
//if(readsInHanger+newReads.size() <= maxReadsPerSample)
|
||||||
|
addReadsToHanger(culledReadStatesBySample,sampleName,newReads,newReads.size());
|
||||||
|
Iterator<Map<String,List<SAMRecordState>>> backIterator = readStatesByAlignmentStart.descendingIterator();
|
||||||
|
boolean readPruned = true;
|
||||||
|
while(readsInHanger+newReads.size()>maxReadsPerSample && readPruned) {
|
||||||
|
readPruned = false;
|
||||||
|
while(readsInHanger+newReads.size()>maxReadsPerSample && backIterator.hasNext()) {
|
||||||
|
List<SAMRecordState> readsAtLocus = backIterator.next().get(sampleName);
|
||||||
|
if(readsAtLocus.size() > 1) {
|
||||||
|
readsAtLocus.remove(downsampleRandomizer.nextInt(readsAtLocus.size()));
|
||||||
|
readPruned = true;
|
||||||
|
readsInHanger--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(readsInHanger == maxReadsPerSample) {
|
||||||
|
Collection<SAMRecordState> firstHangerForSample = readStatesByAlignmentStart.getFirst().get(sampleName);
|
||||||
|
readsInHanger -= firstHangerForSample.size();
|
||||||
|
firstHangerForSample.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
addReadsToHanger(culledReadStatesBySample,sampleName,newReads,maxReadsPerSample-readsInHanger);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
readStatesByAlignmentStart.add(culledReadStatesBySample);
|
||||||
|
}
|
||||||
|
else if(readIsPastCurrentPosition(reads.iterator().next()))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
private int countReadsInHanger() {
|
||||||
|
int count = 0;
|
||||||
|
for(Map<String,List<SAMRecordState>> hangerEntry: readStatesByAlignmentStart) {
|
||||||
|
for(List<SAMRecordState> reads: hangerEntry.values())
|
||||||
|
count += reads.size();
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int countReadsInHanger(final String sampleName) {
|
||||||
|
int count = 0;
|
||||||
|
for(Map<String,List<SAMRecordState>> hangerEntry: readStatesByAlignmentStart) {
|
||||||
|
if(sampleName == null && hangerEntry.containsKey(sampleName))
|
||||||
|
count += hangerEntry.get(sampleName).size();
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addReadsToHanger(final Map<String,List<SAMRecordState>> newHanger, final String sampleName, final Collection<SAMRecord> reads, final int maxReads) {
|
||||||
|
List<SAMRecordState> hangerEntry = new LinkedList<SAMRecordState>();
|
||||||
|
int readCount = 0;
|
||||||
|
for(SAMRecord read: reads) {
|
||||||
|
if(readCount >= maxReads)
|
||||||
|
break;
|
||||||
|
SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents());
|
||||||
|
state.stepForwardOnGenome();
|
||||||
|
hangerEntry.add(state);
|
||||||
|
// TODO: What if we downsample the extended events away?
|
||||||
|
if (state.hadIndel()) hasExtendedEvents = true;
|
||||||
|
readCount++;
|
||||||
|
}
|
||||||
|
newHanger.put(sampleName,hangerEntry);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void pruneEmptyElementsInHanger() {
|
||||||
|
Iterator<Map<String,List<SAMRecordState>>> hangerIterator = readStatesByAlignmentStart.iterator();
|
||||||
|
while(hangerIterator.hasNext()) {
|
||||||
|
Map<String,List<SAMRecordState>> hangerEntry = hangerIterator.next();
|
||||||
|
for(String sampleName: sampleNames) {
|
||||||
|
if(hangerEntry.containsKey(sampleName) && hangerEntry.get(sampleName).size() == 0)
|
||||||
|
hangerEntry.remove(sampleName);
|
||||||
|
}
|
||||||
|
if(hangerEntry.size() == 0)
|
||||||
|
hangerIterator.remove();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,80 @@
|
||||||
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import java.lang.management.ManagementFactory;
|
||||||
|
import java.lang.management.MemoryMXBean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Monitor the current heap size, allowing the application to programmatically
|
||||||
|
* access the data.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class HeapSizeMonitor {
|
||||||
|
private final int monitorFrequencyMillis;
|
||||||
|
private final MonitorRunnable monitorRunnable;
|
||||||
|
|
||||||
|
private Thread monitorThread;
|
||||||
|
|
||||||
|
public HeapSizeMonitor() {
|
||||||
|
this(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
public HeapSizeMonitor(final int monitorFrequencyMillis) {
|
||||||
|
this.monitorFrequencyMillis = monitorFrequencyMillis;
|
||||||
|
this.monitorRunnable = new MonitorRunnable();
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getMaxMemoryUsed() {
|
||||||
|
return monitorRunnable.getMaxMemoryUsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void start() {
|
||||||
|
monitorThread = new Thread(monitorRunnable);
|
||||||
|
monitorThread.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void stop() {
|
||||||
|
monitorRunnable.stop = true;
|
||||||
|
try {
|
||||||
|
monitorThread.join();
|
||||||
|
}
|
||||||
|
catch(InterruptedException ex) {
|
||||||
|
throw new StingException("Unable to connect to monitor thread");
|
||||||
|
}
|
||||||
|
monitorThread = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private class MonitorRunnable implements Runnable {
|
||||||
|
private MemoryMXBean monitor;
|
||||||
|
|
||||||
|
private long maxMemoryUsed;
|
||||||
|
private boolean stop;
|
||||||
|
|
||||||
|
public MonitorRunnable() {
|
||||||
|
monitor = ManagementFactory.getMemoryMXBean();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
maxMemoryUsed = 0L;
|
||||||
|
stop = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getMaxMemoryUsed() {
|
||||||
|
return maxMemoryUsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run() {
|
||||||
|
while(!stop) {
|
||||||
|
System.gc();
|
||||||
|
maxMemoryUsed = Math.max(monitor.getHeapMemoryUsage().getUsed(),maxMemoryUsed);
|
||||||
|
try {
|
||||||
|
Thread.sleep(monitorFrequencyMillis);
|
||||||
|
}
|
||||||
|
catch(InterruptedException ex) {
|
||||||
|
throw new StingException("Unable to continue monitoring heap consumption",ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -9,37 +9,20 @@ import java.util.*;
|
||||||
* naive implementation of reservoir downsampling as described in "Random Downsampling
|
* naive implementation of reservoir downsampling as described in "Random Downsampling
|
||||||
* with a Reservoir" (Vitter 1985). At time of writing, this paper is located here:
|
* with a Reservoir" (Vitter 1985). At time of writing, this paper is located here:
|
||||||
* http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.138.784&rep=rep1&type=pdf
|
* http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.138.784&rep=rep1&type=pdf
|
||||||
*
|
|
||||||
* Contains an enhancement allowing users to partition downsampled data. If a partitioner
|
|
||||||
* is used, each partition will be allowed to contain maxElements elements.
|
|
||||||
*
|
|
||||||
* Note that using the ReservoirDownsampler will leave the given iterator in an undefined
|
|
||||||
* state. Do not attempt to use the iterator (other than closing it) after the Downsampler
|
|
||||||
* completes.
|
|
||||||
*
|
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class ReservoirDownsampler<T> implements Iterator<Collection<T>> {
|
public class ReservoirDownsampler<T> implements Collection<T> {
|
||||||
/**
|
/**
|
||||||
* Create a random number generator with a random, but reproducible, seed.
|
* Create a random number generator with a random, but reproducible, seed.
|
||||||
*/
|
*/
|
||||||
private final Random random = new Random(47382911L);
|
private final Random random = new Random(47382911L);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The data source, wrapped in a peekable input stream.
|
* The reservoir of elements tracked by this downsampler.
|
||||||
*/
|
*/
|
||||||
private final PeekableIterator<T> iterator;
|
private final ArrayList<T> reservoir;
|
||||||
|
|
||||||
/**
|
|
||||||
* Used to identify whether two elements are 'equal' in the eyes of the downsampler.
|
|
||||||
*/
|
|
||||||
private final Comparator<T> comparator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Partitions the elements into subsets, each having an equal number of maxElements.
|
|
||||||
*/
|
|
||||||
private final Partitioner<T> partitioner;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* What is the maximum number of reads that can be returned in a single batch.
|
* What is the maximum number of reads that can be returned in a single batch.
|
||||||
|
|
@ -48,138 +31,105 @@ public class ReservoirDownsampler<T> implements Iterator<Collection<T>> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new downsampler with the given source iterator and given comparator.
|
* Create a new downsampler with the given source iterator and given comparator.
|
||||||
* @param iterator Source of the data stream.
|
|
||||||
* @param comparator Used to compare two records to see whether they're 'equal' at this position.
|
|
||||||
* @param maxElements What is the maximum number of reads that can be returned in any partition of any call of this iterator.
|
|
||||||
*/
|
|
||||||
public ReservoirDownsampler(final Iterator<T> iterator, final Comparator<T> comparator, final int maxElements) {
|
|
||||||
this(iterator,comparator,null,maxElements);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new downsampler with the given source iterator and given comparator.
|
|
||||||
* @param iterator Source of the data stream.
|
|
||||||
* @param comparator Used to compare two records to see whether they're 'equal' at this position.
|
|
||||||
* @param partitioner Used to divide the elements into bins. Each bin can have maxElements elements.
|
|
||||||
* @param maxElements What is the maximum number of reads that can be returned in any call of this
|
* @param maxElements What is the maximum number of reads that can be returned in any call of this
|
||||||
*/
|
*/
|
||||||
public ReservoirDownsampler(final Iterator<T> iterator, final Comparator<T> comparator, final Partitioner<T> partitioner, final int maxElements) {
|
public ReservoirDownsampler(final int maxElements) {
|
||||||
this.iterator = new PeekableIterator<T>(iterator);
|
|
||||||
this.comparator = comparator;
|
|
||||||
this.partitioner = partitioner;
|
|
||||||
if(maxElements < 0)
|
if(maxElements < 0)
|
||||||
throw new StingException("Unable to work with an negative size collection of elements");
|
throw new StingException("Unable to work with an negative size collection of elements");
|
||||||
|
this.reservoir = new ArrayList<T>(maxElements);
|
||||||
this.maxElements = maxElements;
|
this.maxElements = maxElements;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasNext() {
|
@Override
|
||||||
return iterator.hasNext();
|
public boolean add(T element) {
|
||||||
}
|
if(maxElements <= 0)
|
||||||
|
return false;
|
||||||
/**
|
else if(reservoir.size() < maxElements) {
|
||||||
* Gets a collection of 'equal' elements, as judged by the comparator. If the number of equal elements
|
reservoir.add(element);
|
||||||
* is greater than the maximum, then the elements in the collection should be a truly random sampling.
|
return true;
|
||||||
* @return Collection of equal elements.
|
|
||||||
*/
|
|
||||||
public Collection<T> next() {
|
|
||||||
if(!hasNext())
|
|
||||||
throw new NoSuchElementException("No next element is present.");
|
|
||||||
|
|
||||||
Map<Object,Partition<T>> partitions = new HashMap<Object,Partition<T>>();
|
|
||||||
|
|
||||||
// Determine our basis of equality.
|
|
||||||
T first = iterator.next();
|
|
||||||
|
|
||||||
if(maxElements > 0)
|
|
||||||
getPartitionForEntry(partitions,first).add(first);
|
|
||||||
|
|
||||||
while(iterator.hasNext() && comparator.compare(first,iterator.peek()) == 0) {
|
|
||||||
T candidate = iterator.next();
|
|
||||||
getPartitionForEntry(partitions,candidate).add(candidate);
|
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
LinkedList<T> batch = new LinkedList<T>();
|
// Get a uniformly distributed int. If the chosen slot lives within the partition, replace the entry in that slot with the newest entry.
|
||||||
for(Partition<T> partition: partitions.values())
|
int slot = random.nextInt(maxElements);
|
||||||
batch.addAll(partition.elements);
|
if(slot >= 0 && slot < maxElements) {
|
||||||
|
reservoir.set(slot,element);
|
||||||
return batch;
|
return true;
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the appropriate partition for the given entry from storage.
|
|
||||||
* @param partitions List of partitions from which to choose.
|
|
||||||
* @param entry Entry for which to compute the partition.
|
|
||||||
* @return The partition associated with this entry. Will be created if not present.
|
|
||||||
*/
|
|
||||||
private Partition<T> getPartitionForEntry(final Map<Object,Partition<T>> partitions, final T entry) {
|
|
||||||
Object partition = partitioner!=null ? partitioner.partition(entry) : null;
|
|
||||||
if(!partitions.containsKey(partition))
|
|
||||||
partitions.put(partition,new Partition<T>(maxElements));
|
|
||||||
return partitions.get(partition);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Unsupported; throws exception to that effect.
|
|
||||||
*/
|
|
||||||
public void remove() {
|
|
||||||
throw new UnsupportedOperationException("Cannot remove from a ReservoirDownsampler.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A common interface for a functor that can take data of
|
|
||||||
* some type and return an object that can be used to partition
|
|
||||||
* that data in some way. Really just a declaration of a
|
|
||||||
* specialized map function.
|
|
||||||
*/
|
|
||||||
public interface Partitioner<T> {
|
|
||||||
public Object partition(T input);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Models a partition of a given set of elements. Knows how to select
|
|
||||||
* random elements with replacement.
|
|
||||||
* @param <T> Data type for the elements of the partition.
|
|
||||||
*/
|
|
||||||
private class Partition<T> {
|
|
||||||
/**
|
|
||||||
* How large can this partition grow?
|
|
||||||
*/
|
|
||||||
private final int partitionSize;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The elements of the partition.
|
|
||||||
*/
|
|
||||||
private List<T> elements = new ArrayList<T>();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The total number of elements seen.
|
|
||||||
*/
|
|
||||||
private long elementsSeen = 0;
|
|
||||||
|
|
||||||
public Partition(final int partitionSize) {
|
|
||||||
this.partitionSize = partitionSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add a new element to this collection, downsampling as necessary so that the partition
|
|
||||||
* stays under partitionSize elements.
|
|
||||||
* @param element Element to conditionally add.
|
|
||||||
*/
|
|
||||||
public void add(T element) {
|
|
||||||
if(elements.size() < partitionSize)
|
|
||||||
elements.add(element);
|
|
||||||
else {
|
|
||||||
// Get a uniformly distributed long > 0 and remap it to the range from [0,elementsSeen).
|
|
||||||
long slot = random.nextLong();
|
|
||||||
while(slot == Long.MIN_VALUE)
|
|
||||||
slot = random.nextLong();
|
|
||||||
slot = (long)(((float)Math.abs(slot))/Long.MAX_VALUE * (elementsSeen-1));
|
|
||||||
|
|
||||||
// If the chosen slot lives within the partition, replace the entry in that slot with the newest entry.
|
|
||||||
if(slot >= 0 && slot < partitionSize)
|
|
||||||
elements.set((int)slot,element);
|
|
||||||
}
|
}
|
||||||
elementsSeen++;
|
else
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean addAll(Collection<? extends T> elements) {
|
||||||
|
boolean added = false;
|
||||||
|
for(T element: elements)
|
||||||
|
added |= add(element);
|
||||||
|
return added;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the contents of this reservoir, downsampled to the given value. Note that the return value
|
||||||
|
* @return The downsampled contents of this reservoir.
|
||||||
|
*/
|
||||||
|
public Collection<T> getDownsampledContents() {
|
||||||
|
return Collections.unmodifiableCollection(reservoir);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clear() {
|
||||||
|
reservoir.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return reservoir.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
return reservoir.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<T> iterator() {
|
||||||
|
return reservoir.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean contains(Object o) {
|
||||||
|
return reservoir.contains(o);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean containsAll(Collection<?> elements) {
|
||||||
|
return reservoir.containsAll(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean retainAll(Collection<?> elements) {
|
||||||
|
return reservoir.retainAll(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean remove(Object o) {
|
||||||
|
return reservoir.remove(o);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean removeAll(Collection<?> elements) {
|
||||||
|
return reservoir.removeAll(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object[] toArray() {
|
||||||
|
Object[] contents = new Object[reservoir.size()];
|
||||||
|
reservoir.toArray(contents);
|
||||||
|
return contents;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public <T> T[] toArray(T[] array) {
|
||||||
|
return reservoir.toArray(array);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.broadinstitute.sting.utils.sam.AlignmentStartComparator;
|
import org.broadinstitute.sting.utils.sam.AlignmentStartComparator;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
|
||||||
|
|
@ -24,19 +25,18 @@ public class ReservoirDownsamplerUnitTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEmptyIterator() {
|
public void testEmptyIterator() {
|
||||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(Collections.<SAMRecord>emptyList().iterator(),
|
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
|
||||||
new AlignmentStartComparator(),1);
|
Assert.assertTrue("Downsampler is not empty but should be.",downsampler.isEmpty());
|
||||||
Assert.assertFalse("Downsampler is not empty but should be.",downsampler.hasNext());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testOneElementWithPoolSizeOne() {
|
public void testOneElementWithPoolSizeOne() {
|
||||||
List<SAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
List<SAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(reads.iterator(),
|
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
|
||||||
new AlignmentStartComparator(),1);
|
downsampler.addAll(reads);
|
||||||
|
|
||||||
Assert.assertTrue("Downsampler is empty but shouldn't be",downsampler.hasNext());
|
Assert.assertFalse("Downsampler is empty but shouldn't be",downsampler.isEmpty());
|
||||||
Collection<SAMRecord> batchedReads = downsampler.next();
|
Collection<SAMRecord> batchedReads = downsampler.getDownsampledContents();
|
||||||
Assert.assertEquals("Downsampler is returning the wrong number of reads",1,batchedReads.size());
|
Assert.assertEquals("Downsampler is returning the wrong number of reads",1,batchedReads.size());
|
||||||
Assert.assertSame("Downsampler is returning an incorrect read",reads.get(0),batchedReads.iterator().next());
|
Assert.assertSame("Downsampler is returning an incorrect read",reads.get(0),batchedReads.iterator().next());
|
||||||
}
|
}
|
||||||
|
|
@ -44,11 +44,11 @@ public class ReservoirDownsamplerUnitTest {
|
||||||
@Test
|
@Test
|
||||||
public void testOneElementWithPoolSizeGreaterThanOne() {
|
public void testOneElementWithPoolSizeGreaterThanOne() {
|
||||||
List<SAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
List<SAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(reads.iterator(),
|
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
|
||||||
new AlignmentStartComparator(),5);
|
downsampler.addAll(reads);
|
||||||
|
|
||||||
Assert.assertTrue("Downsampler is empty but shouldn't be",downsampler.hasNext());
|
Assert.assertFalse("Downsampler is empty but shouldn't be",downsampler.isEmpty());
|
||||||
Collection<SAMRecord> batchedReads = downsampler.next();
|
Collection<SAMRecord> batchedReads = downsampler.getDownsampledContents();
|
||||||
Assert.assertEquals("Downsampler is returning the wrong number of reads",1,batchedReads.size());
|
Assert.assertEquals("Downsampler is returning the wrong number of reads",1,batchedReads.size());
|
||||||
Assert.assertSame("Downsampler is returning an incorrect read",reads.get(0),batchedReads.iterator().next());
|
Assert.assertSame("Downsampler is returning an incorrect read",reads.get(0),batchedReads.iterator().next());
|
||||||
|
|
||||||
|
|
@ -60,11 +60,11 @@ public class ReservoirDownsamplerUnitTest {
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(reads.iterator(),
|
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
|
||||||
new AlignmentStartComparator(),5);
|
downsampler.addAll(reads);
|
||||||
|
|
||||||
Assert.assertTrue("Downsampler is empty but shouldn't be",downsampler.hasNext());
|
Assert.assertFalse("Downsampler is empty but shouldn't be",downsampler.isEmpty());
|
||||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.next());
|
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||||
Assert.assertEquals("Downsampler is returning the wrong number of reads",3,batchedReads.size());
|
Assert.assertEquals("Downsampler is returning the wrong number of reads",3,batchedReads.size());
|
||||||
|
|
||||||
Assert.assertSame("Downsampler read 1 is incorrect",reads.get(0),batchedReads.get(0));
|
Assert.assertSame("Downsampler read 1 is incorrect",reads.get(0),batchedReads.get(0));
|
||||||
|
|
@ -80,11 +80,11 @@ public class ReservoirDownsamplerUnitTest {
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
|
||||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(reads.iterator(),
|
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
|
||||||
new AlignmentStartComparator(),5);
|
downsampler.addAll(reads);
|
||||||
|
|
||||||
Assert.assertTrue("Downsampler is empty but shouldn't be",downsampler.hasNext());
|
Assert.assertFalse("Downsampler is empty but shouldn't be",downsampler.isEmpty());
|
||||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.next());
|
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||||
Assert.assertEquals("Downsampler is returning the wrong number of reads",5,batchedReads.size());
|
Assert.assertEquals("Downsampler is returning the wrong number of reads",5,batchedReads.size());
|
||||||
Assert.assertSame("Downsampler is returning an incorrect read",reads.get(0),batchedReads.iterator().next());
|
Assert.assertSame("Downsampler is returning an incorrect read",reads.get(0),batchedReads.iterator().next());
|
||||||
|
|
||||||
|
|
@ -101,13 +101,12 @@ public class ReservoirDownsamplerUnitTest {
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(reads.iterator(),
|
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(0);
|
||||||
new AlignmentStartComparator(),0);
|
downsampler.addAll(reads);
|
||||||
|
|
||||||
Assert.assertTrue("Downsampler is empty but shouldn't be",downsampler.hasNext());
|
Assert.assertTrue("Downsampler isn't empty but should be",downsampler.isEmpty());
|
||||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.next());
|
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||||
Assert.assertEquals("Downsampler is returning the wrong number of reads",0,batchedReads.size());
|
Assert.assertEquals("Downsampler is returning the wrong number of reads",0,batchedReads.size());
|
||||||
Assert.assertFalse("Downsampler is not empty but should be",downsampler.hasNext());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -118,73 +117,52 @@ public class ReservoirDownsamplerUnitTest {
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
|
||||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(reads.iterator(),
|
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
|
||||||
new AlignmentStartComparator(),1);
|
downsampler.addAll(reads);
|
||||||
|
|
||||||
Assert.assertTrue("Downsampler is empty but shouldn't be",downsampler.hasNext());
|
Assert.assertFalse("Downsampler is empty but shouldn't be",downsampler.isEmpty());
|
||||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.next());
|
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||||
Assert.assertEquals("Downsampler is returning the wrong number of reads",1,batchedReads.size());
|
Assert.assertEquals("Downsampler is returning the wrong number of reads",1,batchedReads.size());
|
||||||
Assert.assertTrue("Downsampler is returning a bad read.",reads.contains(batchedReads.get(0))) ;
|
Assert.assertTrue("Downsampler is returning a bad read.",reads.contains(batchedReads.get(0))) ;
|
||||||
Assert.assertFalse("Downsampler is not empty but should be",downsampler.hasNext());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFillingAcrossLoci() {
|
public void testFillingAcrossLoci() {
|
||||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,2,76));
|
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,2,76));
|
downsampler.addAll(reads);
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,3,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,3,76));
|
|
||||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(reads.iterator(),
|
|
||||||
new AlignmentStartComparator(),5);
|
|
||||||
|
|
||||||
Assert.assertTrue("Downsampler is empty but shouldn't be",downsampler.hasNext());
|
Assert.assertFalse("Downsampler is empty but shouldn't be",downsampler.isEmpty());
|
||||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.next());
|
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||||
Assert.assertEquals("Downsampler is returning the wrong number of reads",1,batchedReads.size());
|
Assert.assertEquals("Downsampler is returning the wrong number of reads",1,batchedReads.size());
|
||||||
Assert.assertEquals("Downsampler is returning an incorrect read.",reads.get(0),batchedReads.get(0));
|
Assert.assertEquals("Downsampler is returning an incorrect read.",reads.get(0),batchedReads.get(0));
|
||||||
|
|
||||||
Assert.assertTrue("Downsampler is empty but shouldn't be",downsampler.hasNext());
|
reads.clear();
|
||||||
batchedReads = new ArrayList<SAMRecord>(downsampler.next());
|
|
||||||
Assert.assertEquals("Downsampler is returning the wrong number of reads",2,batchedReads.size());
|
|
||||||
Assert.assertEquals("Downsampler is returning an incorrect read.",reads.get(1),batchedReads.get(0));
|
|
||||||
Assert.assertEquals("Downsampler is returning an incorrect read.",reads.get(2),batchedReads.get(1));
|
|
||||||
|
|
||||||
Assert.assertTrue("Downsampler is empty but shouldn't be",downsampler.hasNext());
|
|
||||||
batchedReads = new ArrayList<SAMRecord>(downsampler.next());
|
|
||||||
Assert.assertEquals("Downsampler is returning the wrong number of reads",2,batchedReads.size());
|
|
||||||
Assert.assertEquals("Downsampler is returning an incorrect read.",reads.get(3),batchedReads.get(0));
|
|
||||||
Assert.assertEquals("Downsampler is returning an incorrect read.",reads.get(4),batchedReads.get(1));
|
|
||||||
|
|
||||||
Assert.assertFalse("Downsampler is not empty but should be",downsampler.hasNext());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testDownsamplingAcrossLoci() {
|
|
||||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,2,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,2,76));
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,2,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,2,76));
|
||||||
|
|
||||||
|
downsampler.clear();
|
||||||
|
downsampler.addAll(reads);
|
||||||
|
|
||||||
|
Assert.assertFalse("Downsampler is empty but shouldn't be",downsampler.isEmpty());
|
||||||
|
batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||||
|
Assert.assertEquals("Downsampler is returning the wrong number of reads",2,batchedReads.size());
|
||||||
|
Assert.assertEquals("Downsampler is returning an incorrect read.",reads.get(0),batchedReads.get(0));
|
||||||
|
Assert.assertEquals("Downsampler is returning an incorrect read.",reads.get(1),batchedReads.get(1));
|
||||||
|
|
||||||
|
reads.clear();
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,3,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,3,76));
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,3,76));
|
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,3,76));
|
||||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(reads.iterator(),
|
|
||||||
new AlignmentStartComparator(),1);
|
|
||||||
|
|
||||||
Assert.assertTrue("Downsampler is empty but shouldn't be",downsampler.hasNext());
|
downsampler.clear();
|
||||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.next());
|
downsampler.addAll(reads);
|
||||||
Assert.assertEquals("Downsampler is returning the wrong number of reads",1,batchedReads.size());
|
|
||||||
|
Assert.assertFalse("Downsampler is empty but shouldn't be",downsampler.isEmpty());
|
||||||
|
batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||||
|
Assert.assertEquals("Downsampler is returning the wrong number of reads",2,batchedReads.size());
|
||||||
Assert.assertEquals("Downsampler is returning an incorrect read.",reads.get(0),batchedReads.get(0));
|
Assert.assertEquals("Downsampler is returning an incorrect read.",reads.get(0),batchedReads.get(0));
|
||||||
|
Assert.assertEquals("Downsampler is returning an incorrect read.",reads.get(1),batchedReads.get(1));
|
||||||
Assert.assertTrue("Downsampler is empty but shouldn't be",downsampler.hasNext());
|
|
||||||
batchedReads = new ArrayList<SAMRecord>(downsampler.next());
|
|
||||||
Assert.assertEquals("Downsampler is returning the wrong number of reads",1,batchedReads.size());
|
|
||||||
Assert.assertTrue("Downsampler is returning an incorrect read.",batchedReads.get(0).equals(reads.get(1)) || batchedReads.get(0).equals(reads.get(2)));
|
|
||||||
|
|
||||||
Assert.assertTrue("Downsampler is empty but shouldn't be",downsampler.hasNext());
|
|
||||||
batchedReads = new ArrayList<SAMRecord>(downsampler.next());
|
|
||||||
Assert.assertEquals("Downsampler is returning the wrong number of reads",1,batchedReads.size());
|
|
||||||
Assert.assertTrue("Downsampler is returning an incorrect read.",batchedReads.get(0).equals(reads.get(3)) || batchedReads.get(0).equals(reads.get(4)));
|
|
||||||
|
|
||||||
Assert.assertFalse("Downsampler is not empty but should be",downsampler.hasNext());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue