Collapse the downsampling fork in the GATK engine
With LegacyLocusIteratorByState deleted, the legacy downsampling implementation was already non-functional. This commit removes all remaining code in the engine belonging to the legacy implementation.
This commit is contained in:
parent
5003deafb6
commit
3744d1a596
|
|
@ -434,12 +434,9 @@ public class GenomeAnalysisEngine {
|
|||
protected DownsamplingMethod getDownsamplingMethod() {
|
||||
GATKArgumentCollection argCollection = this.getArguments();
|
||||
|
||||
// Legacy downsampler can only be selected via the command line, not via walker annotations
|
||||
boolean useLegacyDownsampler = argCollection.useLegacyDownsampler;
|
||||
|
||||
DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod();
|
||||
DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker, useLegacyDownsampler);
|
||||
DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker, useLegacyDownsampler);
|
||||
DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker);
|
||||
DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker);
|
||||
|
||||
DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod);
|
||||
method.checkCompatibilityWithWalker(walker);
|
||||
|
|
@ -572,15 +569,10 @@ public class GenomeAnalysisEngine {
|
|||
throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");
|
||||
}
|
||||
|
||||
// Use the legacy ReadShardBalancer if legacy downsampling is enabled
|
||||
ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ?
|
||||
new LegacyReadShardBalancer() :
|
||||
new ReadShardBalancer();
|
||||
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverAllReads(readShardBalancer);
|
||||
return readsDataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(intervals, readShardBalancer);
|
||||
return readsDataSource.createShardIteratorOverIntervals(intervals, new ReadShardBalancer());
|
||||
}
|
||||
else
|
||||
throw new ReviewedStingException("Unable to determine walker type for walker " + walker.getClass().getName());
|
||||
|
|
@ -793,7 +785,7 @@ public class GenomeAnalysisEngine {
|
|||
DownsamplingMethod downsamplingMethod = getDownsamplingMethod();
|
||||
|
||||
// Synchronize the method back into the collection so that it shows up when
|
||||
// interrogating for the downsample method during command line recreation.
|
||||
// interrogating for the downsampling method during command line recreation.
|
||||
setDownsamplingMethod(downsamplingMethod);
|
||||
|
||||
logger.info(downsamplingMethod);
|
||||
|
|
|
|||
|
|
@ -306,11 +306,10 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* downsampling method is specified on the command-line, the command-line version will
|
||||
* be used instead.
|
||||
* @param walker The walker to interrogate.
|
||||
* @param useLegacyDownsampler If true, use the legacy downsampling implementation
|
||||
* @return The downsampling method, as specified by the walker. Null if none exists.
|
||||
*/
|
||||
public static DownsamplingMethod getDownsamplingMethod(Walker walker, boolean useLegacyDownsampler) {
|
||||
return getDownsamplingMethod(walker.getClass(), useLegacyDownsampler);
|
||||
public static DownsamplingMethod getDownsamplingMethod( Walker walker ) {
|
||||
return getDownsamplingMethod(walker.getClass());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -318,10 +317,9 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* downsampling method is specified on the command-line, the command-line version will
|
||||
* be used instead.
|
||||
* @param walkerClass The class of the walker to interrogate.
|
||||
* @param useLegacyDownsampler If true, use the legacy downsampling implementation
|
||||
* @return The downsampling method, as specified by the walker. Null if none exists.
|
||||
*/
|
||||
public static DownsamplingMethod getDownsamplingMethod(Class<? extends Walker> walkerClass, boolean useLegacyDownsampler) {
|
||||
public static DownsamplingMethod getDownsamplingMethod( Class<? extends Walker> walkerClass ) {
|
||||
DownsamplingMethod downsamplingMethod = null;
|
||||
|
||||
if( walkerClass.isAnnotationPresent(Downsample.class) ) {
|
||||
|
|
@ -329,7 +327,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
DownsampleType type = downsampleParameters.by();
|
||||
Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
|
||||
Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
|
||||
downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction,useLegacyDownsampler);
|
||||
downsamplingMethod = new DownsamplingMethod(type, toCoverage, toFraction);
|
||||
}
|
||||
|
||||
return downsamplingMethod;
|
||||
|
|
|
|||
|
|
@ -126,9 +126,6 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position.", required = false)
|
||||
public Integer downsampleCoverage = null;
|
||||
|
||||
@Argument(fullName = "use_legacy_downsampler", shortName = "use_legacy_downsampler", doc = "Use the legacy downsampling implementation instead of the newer, less-tested implementation", required = false)
|
||||
public boolean useLegacyDownsampler = false;
|
||||
|
||||
/**
|
||||
* Gets the downsampling method explicitly specified by the user. If the user didn't specify
|
||||
* a default downsampling mechanism, return the default.
|
||||
|
|
@ -138,7 +135,7 @@ public class GATKArgumentCollection {
|
|||
if ( downsamplingType == null && downsampleFraction == null && downsampleCoverage == null )
|
||||
return null;
|
||||
|
||||
return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction, useLegacyDownsampler);
|
||||
return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -152,7 +149,6 @@ public class GATKArgumentCollection {
|
|||
downsamplingType = method.type;
|
||||
downsampleCoverage = method.toCoverage;
|
||||
downsampleFraction = method.toFraction;
|
||||
useLegacyDownsampler = method.useLegacyDownsampler;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -162,14 +162,6 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
// Cache the current and apply filtering.
|
||||
AlignmentContext current = nextLocus;
|
||||
|
||||
// The old ALL_READS downsampling implementation -- use only if legacy downsampling was requested:
|
||||
if ( sourceInfo.getDownsamplingMethod().useLegacyDownsampler &&
|
||||
sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS &&
|
||||
sourceInfo.getDownsamplingMethod().toCoverage != null ) {
|
||||
|
||||
current.downsampleToCoverage(sourceInfo.getDownsamplingMethod().toCoverage);
|
||||
}
|
||||
|
||||
// Indicate that the next operation will need to advance.
|
||||
nextLocus = null;
|
||||
|
||||
|
|
|
|||
|
|
@ -133,14 +133,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
|
||||
Map<SAMReaderID,GATKBAMFileSpan> currentPosition;
|
||||
|
||||
// Only use the deprecated SAMDataSource.getCurrentPosition() if we're not using experimental downsampling
|
||||
// TODO: clean this up once the experimental downsampling engine fork collapses
|
||||
if ( dataSource.getReadsInfo().getDownsamplingMethod() != null && dataSource.getReadsInfo().getDownsamplingMethod().useLegacyDownsampler ) {
|
||||
currentPosition = dataSource.getCurrentPosition();
|
||||
}
|
||||
else {
|
||||
currentPosition = dataSource.getInitialReaderPositions();
|
||||
}
|
||||
currentPosition = dataSource.getInitialReaderPositions();
|
||||
|
||||
for(SAMReaderID reader: dataSource.getReaderIDs())
|
||||
filePointer.addFileSpans(reader,createSpanToEndOfFile(currentPosition.get(reader).getGATKChunks().get(0).getChunkStart()));
|
||||
|
|
|
|||
|
|
@ -1,130 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.GATKBAMFileSpan;
|
||||
import net.sf.samtools.SAMFileSpan;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
/**
|
||||
* Divide up large file pointers containing reads into more manageable subcomponents.
|
||||
*
|
||||
* TODO: delete this class once the experimental downsampling engine fork collapses
|
||||
*/
|
||||
public class LegacyReadShardBalancer extends ShardBalancer {
|
||||
/**
|
||||
* Convert iterators of file pointers into balanced iterators of shards.
|
||||
* @return An iterator over balanced shards.
|
||||
*/
|
||||
public Iterator<Shard> iterator() {
|
||||
return new Iterator<Shard>() {
|
||||
/**
|
||||
* The cached shard to be returned next. Prefetched in the peekable iterator style.
|
||||
*/
|
||||
private Shard nextShard = null;
|
||||
|
||||
/**
|
||||
* The file pointer currently being processed.
|
||||
*/
|
||||
private FilePointer currentFilePointer;
|
||||
|
||||
/**
|
||||
* Ending position of the last shard in the file.
|
||||
*/
|
||||
private Map<SAMReaderID,GATKBAMFileSpan> position = readsDataSource.getCurrentPosition();
|
||||
|
||||
{
|
||||
if(filePointers.hasNext())
|
||||
currentFilePointer = filePointers.next();
|
||||
advance();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return nextShard != null;
|
||||
}
|
||||
|
||||
public Shard next() {
|
||||
if(!hasNext())
|
||||
throw new NoSuchElementException("No next read shard available");
|
||||
Shard currentShard = nextShard;
|
||||
advance();
|
||||
return currentShard;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
|
||||
}
|
||||
|
||||
private void advance() {
|
||||
Map<SAMReaderID,SAMFileSpan> shardPosition;
|
||||
nextShard = null;
|
||||
|
||||
Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
|
||||
while(selectedReaders.size() == 0 && currentFilePointer != null) {
|
||||
shardPosition = currentFilePointer.fileSpans;
|
||||
|
||||
for(SAMReaderID id: shardPosition.keySet()) {
|
||||
SAMFileSpan fileSpan = new GATKBAMFileSpan(shardPosition.get(id).removeContentsBefore(position.get(id)));
|
||||
selectedReaders.put(id,fileSpan);
|
||||
}
|
||||
|
||||
if(!isEmpty(selectedReaders)) {
|
||||
Shard shard = new ReadShard(parser,readsDataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
|
||||
readsDataSource.fillShard(shard);
|
||||
|
||||
if(!shard.isBufferEmpty()) {
|
||||
nextShard = shard;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
selectedReaders.clear();
|
||||
currentFilePointer = filePointers.hasNext() ? filePointers.next() : null;
|
||||
}
|
||||
|
||||
position = readsDataSource.getCurrentPosition();
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects whether the list of file spans contain any read data.
|
||||
* @param selectedSpans Mapping of readers to file spans.
|
||||
* @return True if file spans are completely empty; false otherwise.
|
||||
*/
|
||||
private boolean isEmpty(Map<SAMReaderID,SAMFileSpan> selectedSpans) {
|
||||
for(SAMFileSpan fileSpan: selectedSpans.values()) {
|
||||
if(!fileSpan.isEmpty())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -97,13 +97,6 @@ public class SAMDataSource {
|
|||
*/
|
||||
private final Map<SAMReaderID,GATKBAMIndex> bamIndices = new HashMap<SAMReaderID,GATKBAMIndex>();
|
||||
|
||||
/**
|
||||
* How far along is each reader?
|
||||
*
|
||||
* TODO: delete this once the experimental downsampling engine fork collapses
|
||||
*/
|
||||
private final Map<SAMReaderID,GATKBAMFileSpan> readerPositions = new HashMap<SAMReaderID,GATKBAMFileSpan>();
|
||||
|
||||
/**
|
||||
* The merged header.
|
||||
*/
|
||||
|
|
@ -298,8 +291,6 @@ public class SAMDataSource {
|
|||
this.sortOrder = sortOrder;
|
||||
}
|
||||
|
||||
initializeReaderPositions(readers);
|
||||
|
||||
mergedHeader = readers.getMergedHeader();
|
||||
hasReadGroupCollisions = readers.hasReadGroupCollisions();
|
||||
|
||||
|
|
@ -387,17 +378,6 @@ public class SAMDataSource {
|
|||
return resourcePool.getReaderID(read.getFileSource().getReader());
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the current position within the BAM file.
|
||||
* @return A mapping of reader to current position.
|
||||
*
|
||||
* TODO: delete this once the experimental downsampling engine fork collapses
|
||||
*/
|
||||
@Deprecated
|
||||
public Map<SAMReaderID,GATKBAMFileSpan> getCurrentPosition() {
|
||||
return readerPositions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the merged header from the SAM file.
|
||||
* @return The merged header.
|
||||
|
|
@ -475,67 +455,6 @@ public class SAMDataSource {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Legacy method to fill the given buffering shard with reads.
|
||||
*
|
||||
* Shard.fill() is used instead of this method unless legacy downsampling is enabled
|
||||
*
|
||||
* TODO: delete this method once the experimental downsampling engine fork collapses
|
||||
*
|
||||
* @param shard Shard to fill.
|
||||
*/
|
||||
@Deprecated
|
||||
public void fillShard(Shard shard) {
|
||||
if(!shard.buffersReads())
|
||||
throw new ReviewedStingException("Attempting to fill a non-buffering shard.");
|
||||
|
||||
SAMReaders readers = resourcePool.getAvailableReaders();
|
||||
// Cache the most recently viewed read so that we can check whether we've reached the end of a pair.
|
||||
SAMRecord read = null;
|
||||
|
||||
Map<SAMFileReader,GATKBAMFileSpan> positionUpdates = new IdentityHashMap<SAMFileReader,GATKBAMFileSpan>();
|
||||
|
||||
CloseableIterator<SAMRecord> iterator = getIterator(readers,shard,sortOrder == SAMFileHeader.SortOrder.coordinate);
|
||||
while(!shard.isBufferFull() && iterator.hasNext()) {
|
||||
final SAMRecord nextRead = iterator.next();
|
||||
if ( read == null || (nextRead.getReferenceIndex().equals(read.getReferenceIndex())) ) {
|
||||
// only add reads to the shard if they are on the same contig
|
||||
read = nextRead;
|
||||
shard.addRead(read);
|
||||
noteFilePositionUpdate(positionUpdates,read);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If the reads are sorted in queryname order, ensure that all reads
|
||||
// having the same queryname become part of the same shard.
|
||||
if(sortOrder == SAMFileHeader.SortOrder.queryname) {
|
||||
while(iterator.hasNext()) {
|
||||
SAMRecord nextRead = iterator.next();
|
||||
if(read == null || !read.getReadName().equals(nextRead.getReadName()))
|
||||
break;
|
||||
shard.addRead(nextRead);
|
||||
noteFilePositionUpdate(positionUpdates,nextRead);
|
||||
}
|
||||
}
|
||||
|
||||
iterator.close();
|
||||
|
||||
// Make the updates specified by the reader.
|
||||
for(Map.Entry<SAMFileReader,GATKBAMFileSpan> positionUpdate: positionUpdates.entrySet())
|
||||
readerPositions.put(readers.getReaderID(positionUpdate.getKey()),positionUpdate.getValue());
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: delete this method once the experimental downsampling engine fork collapses
|
||||
*/
|
||||
@Deprecated
|
||||
private void noteFilePositionUpdate(Map<SAMFileReader,GATKBAMFileSpan> positionMapping, SAMRecord read) {
|
||||
GATKBAMFileSpan endChunk = new GATKBAMFileSpan(read.getFileSource().getFilePointer().getContentsFollowing());
|
||||
positionMapping.put(read.getFileSource().getReader(),endChunk);
|
||||
}
|
||||
|
||||
public StingSAMIterator seek(Shard shard) {
|
||||
if(shard.buffersReads()) {
|
||||
return shard.iterator();
|
||||
|
|
@ -559,19 +478,6 @@ public class SAMDataSource {
|
|||
throw new ReviewedStingException("Unable to find id for reader associated with read " + read.getReadName());
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the current reader positions
|
||||
*
|
||||
* TODO: delete this once the experimental downsampling engine fork collapses
|
||||
*
|
||||
* @param readers
|
||||
*/
|
||||
@Deprecated
|
||||
private void initializeReaderPositions(SAMReaders readers) {
|
||||
for(SAMReaderID id: getReaderIDs())
|
||||
readerPositions.put(id,new GATKBAMFileSpan(readers.getReader(id).getFilePointerSpanningReads()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the initial reader positions across all BAM files
|
||||
*
|
||||
|
|
@ -646,7 +552,6 @@ public class SAMDataSource {
|
|||
enableVerification,
|
||||
readProperties.useOriginalBaseQualities(),
|
||||
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),
|
||||
readProperties.getDownsamplingMethod().toFraction,
|
||||
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
readProperties.getSupplementalFilters(),
|
||||
readProperties.getReadTransformers(),
|
||||
|
|
@ -704,7 +609,6 @@ public class SAMDataSource {
|
|||
* @param enableVerification Verify the order of reads.
|
||||
* @param useOriginalBaseQualities True if original base qualities should be used.
|
||||
* @param wrappedIterator the raw data source.
|
||||
* @param downsamplingFraction whether and how much to downsample the reads themselves (not at a locus).
|
||||
* @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this.
|
||||
* @param supplementalFilters additional filters to apply to the reads.
|
||||
* @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality.
|
||||
|
|
@ -715,7 +619,6 @@ public class SAMDataSource {
|
|||
boolean enableVerification,
|
||||
boolean useOriginalBaseQualities,
|
||||
StingSAMIterator wrappedIterator,
|
||||
Double downsamplingFraction,
|
||||
Boolean noValidationOfReadOrder,
|
||||
Collection<ReadFilter> supplementalFilters,
|
||||
List<ReadTransformer> readTransformers,
|
||||
|
|
@ -727,30 +630,25 @@ public class SAMDataSource {
|
|||
// * (otherwise we will process something that we may end up throwing away) * //
|
||||
// ************************************************************************************************ //
|
||||
|
||||
// Filters:
|
||||
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
|
||||
|
||||
// If we're using the new downsampling implementation, apply downsampling iterators at this
|
||||
// point in the read stream for most (but not all) cases
|
||||
if ( ! readProperties.getDownsamplingMethod().useLegacyDownsampler ) {
|
||||
// Downsampling:
|
||||
|
||||
// For locus traversals where we're downsampling to coverage by sample, assume that the downsamplers
|
||||
// will be invoked downstream from us in LocusIteratorByState. This improves performance by avoiding
|
||||
// splitting/re-assembly of the read stream at this stage, and also allows for partial downsampling
|
||||
// of individual reads.
|
||||
boolean assumeDownstreamLIBSDownsampling = isLocusBasedTraversal &&
|
||||
readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
||||
readProperties.getDownsamplingMethod().toCoverage != null;
|
||||
// For locus traversals where we're downsampling to coverage by sample, assume that the downsamplers
|
||||
// will be invoked downstream from us in LocusIteratorByState. This improves performance by avoiding
|
||||
// splitting/re-assembly of the read stream at this stage, and also allows for partial downsampling
|
||||
// of individual reads.
|
||||
boolean assumeDownstreamLIBSDownsampling = isLocusBasedTraversal &&
|
||||
readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
||||
readProperties.getDownsamplingMethod().toCoverage != null;
|
||||
|
||||
if ( ! assumeDownstreamLIBSDownsampling ) {
|
||||
wrappedIterator = applyDownsamplingIterator(wrappedIterator);
|
||||
}
|
||||
// Apply downsampling iterators here only in cases where we know that LocusIteratorByState won't be
|
||||
// doing any downsampling downstream of us
|
||||
if ( ! assumeDownstreamLIBSDownsampling ) {
|
||||
wrappedIterator = applyDownsamplingIterator(wrappedIterator);
|
||||
}
|
||||
|
||||
// Use the old fractional downsampler only if we're using legacy downsampling:
|
||||
// TODO: remove this statement (and associated classes) once the downsampling engine fork collapses
|
||||
if ( readProperties.getDownsamplingMethod().useLegacyDownsampler && downsamplingFraction != null )
|
||||
wrappedIterator = new LegacyDownsampleIterator(wrappedIterator, downsamplingFraction);
|
||||
|
||||
// unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification,
|
||||
// verify the read ordering by applying a sort order iterator
|
||||
if (!noValidationOfReadOrder && enableVerification)
|
||||
|
|
|
|||
|
|
@ -50,35 +50,43 @@ public class DownsamplingMethod {
|
|||
*/
|
||||
public final Double toFraction;
|
||||
|
||||
/**
|
||||
* Use the legacy downsampling implementation instead of the newer implementation?
|
||||
*/
|
||||
public final boolean useLegacyDownsampler;
|
||||
|
||||
/**
|
||||
* Expresses no downsampling applied at all.
|
||||
*/
|
||||
public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE,null,null,false);
|
||||
public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE, null, null);
|
||||
|
||||
/**
|
||||
* Default type to use if no type is specified
|
||||
*/
|
||||
public static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
|
||||
public static final DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
|
||||
|
||||
/**
|
||||
* Default target coverage for locus-based traversals
|
||||
*/
|
||||
public static int DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
|
||||
public static final int DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
|
||||
|
||||
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useLegacyDownsampler ) {
|
||||
/**
|
||||
* Default downsampling method for locus-based traversals
|
||||
*/
|
||||
public static final DownsamplingMethod DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD =
|
||||
new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE, null);
|
||||
|
||||
/**
|
||||
* Default downsampling method for read-based traversals
|
||||
*/
|
||||
public static final DownsamplingMethod DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD = NONE;
|
||||
|
||||
|
||||
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction ) {
|
||||
this.type = type != null ? type : DEFAULT_DOWNSAMPLING_TYPE;
|
||||
this.toCoverage = toCoverage;
|
||||
this.toFraction = toFraction;
|
||||
this.useLegacyDownsampler = useLegacyDownsampler;
|
||||
|
||||
if ( type == DownsampleType.NONE ) {
|
||||
toCoverage = null;
|
||||
toFraction = null;
|
||||
this.toCoverage = null;
|
||||
this.toFraction = null;
|
||||
}
|
||||
else {
|
||||
this.toCoverage = toCoverage;
|
||||
this.toFraction = toFraction;
|
||||
}
|
||||
|
||||
validate();
|
||||
|
|
@ -87,34 +95,28 @@ public class DownsamplingMethod {
|
|||
private void validate() {
|
||||
// Can't leave toFraction and toCoverage null unless type is NONE
|
||||
if ( type != DownsampleType.NONE && toFraction == null && toCoverage == null )
|
||||
throw new UserException.CommandLineException("Must specify either toFraction or toCoverage when downsampling.");
|
||||
throw new UserException("Must specify either toFraction or toCoverage when downsampling.");
|
||||
|
||||
// Fraction and coverage cannot both be specified.
|
||||
if ( toFraction != null && toCoverage != null )
|
||||
throw new UserException.CommandLineException("Downsampling coverage and fraction are both specified. Please choose only one.");
|
||||
throw new UserException("Downsampling coverage and fraction are both specified. Please choose only one.");
|
||||
|
||||
// toCoverage must be > 0 when specified
|
||||
if ( toCoverage != null && toCoverage <= 0 ) {
|
||||
throw new UserException.CommandLineException("toCoverage must be > 0 when downsampling to coverage");
|
||||
throw new UserException("toCoverage must be > 0 when downsampling to coverage");
|
||||
}
|
||||
|
||||
// toFraction must be >= 0.0 and <= 1.0 when specified
|
||||
if ( toFraction != null && (toFraction < 0.0 || toFraction > 1.0) ) {
|
||||
throw new UserException.CommandLineException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads");
|
||||
throw new UserException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads");
|
||||
}
|
||||
}
|
||||
|
||||
public void checkCompatibilityWithWalker( Walker walker ) {
|
||||
boolean isLocusTraversal = walker instanceof LocusWalker || walker instanceof ActiveRegionWalker;
|
||||
|
||||
if ( ! isLocusTraversal && useLegacyDownsampler && toCoverage != null ) {
|
||||
throw new UserException.CommandLineException("Downsampling to coverage for read-based traversals (eg., ReadWalkers) is not supported in the legacy downsampling implementation. " +
|
||||
"The newer downsampling implementation does not have this limitation.");
|
||||
}
|
||||
|
||||
if ( isLocusTraversal && ! useLegacyDownsampler && type == DownsampleType.ALL_READS && toCoverage != null ) {
|
||||
throw new UserException.CommandLineException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not yet supported in the new downsampling implementation (though it is supported for ReadWalkers). " +
|
||||
"You can run with --use_legacy_downsampler for a broken and poorly-maintained implementation of ALL_READS to-coverage downsampling, but this is not recommended.");
|
||||
if ( isLocusTraversal && type == DownsampleType.ALL_READS && toCoverage != null ) {
|
||||
throw new UserException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not currently supported (though it is supported for ReadWalkers).");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -128,31 +130,22 @@ public class DownsamplingMethod {
|
|||
builder.append(String.format("Method: %s, ", type));
|
||||
|
||||
if ( toCoverage != null ) {
|
||||
builder.append(String.format("Target Coverage: %d, ", toCoverage));
|
||||
builder.append(String.format("Target Coverage: %d", toCoverage));
|
||||
}
|
||||
else {
|
||||
builder.append(String.format("Target Fraction: %.2f, ", toFraction));
|
||||
}
|
||||
|
||||
if ( useLegacyDownsampler ) {
|
||||
builder.append("Using the legacy downsampling implementation");
|
||||
}
|
||||
else {
|
||||
builder.append("Using the new downsampling implementation");
|
||||
builder.append(String.format("Target Fraction: %.2f", toFraction));
|
||||
}
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useLegacyDownsampler ) {
|
||||
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker ) {
|
||||
if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) {
|
||||
return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE,
|
||||
null, useLegacyDownsampler);
|
||||
return DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD;
|
||||
}
|
||||
else {
|
||||
// Downsampling is off by default for non-locus-based traversals
|
||||
return new DownsamplingMethod(DownsampleType.NONE, null, null, useLegacyDownsampler);
|
||||
return DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -111,9 +111,6 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
this.sourceInfo = shard.getReadProperties();
|
||||
this.readIterator = new GATKSAMIterator(iterator);
|
||||
|
||||
// Use the legacy version of LocusIteratorByState if legacy downsampling was requested:
|
||||
if ( sourceInfo.getDownsamplingMethod().useLegacyDownsampler )
|
||||
throw new IllegalArgumentException("legacy downsampler no longer supported in the window maker");
|
||||
this.libs = new LocusIteratorByState(readIterator,sourceInfo,genomeLocParser,sampleNames);
|
||||
this.sourceIterator = new PeekableIterator<AlignmentContext>(libs);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,77 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
||||
public class LegacyDownsampleIterator implements StingSAMIterator {
|
||||
|
||||
StingSAMIterator it;
|
||||
int cutoff;
|
||||
SAMRecord next;
|
||||
|
||||
public LegacyDownsampleIterator(StingSAMIterator it, double fraction) {
|
||||
this.it = it;
|
||||
cutoff = (int)(fraction * 10000);
|
||||
next = getNextRecord();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return next != null;
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
SAMRecord result = next;
|
||||
next = getNextRecord();
|
||||
return result;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
private SAMRecord getNextRecord() {
|
||||
while ( true ) {
|
||||
if ( !it.hasNext() )
|
||||
return null;
|
||||
SAMRecord rec = it.next();
|
||||
if ( GenomeAnalysisEngine.getRandomGenerator().nextInt(10000) < cutoff )
|
||||
return rec;
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
it.close();
|
||||
}
|
||||
|
||||
public Iterator<SAMRecord> iterator() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,153 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* THIS IMPLEMENTATION IS BROKEN AND WILL BE REMOVED ONCE THE DOWNSAMPLING ENGINE FORK COLLAPSES
|
||||
*
|
||||
* Randomly downsample from a stream of elements. This algorithm is a direct,
|
||||
* naive implementation of reservoir downsampling as described in "Random Downsampling
|
||||
* with a Reservoir" (Vitter 1985). At time of writing, this paper is located here:
|
||||
* http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.138.784&rep=rep1&type=pdf
|
||||
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class LegacyReservoirDownsampler<T> {
|
||||
/**
|
||||
* The reservoir of elements tracked by this downsampler.
|
||||
*/
|
||||
private final ArrayList<T> reservoir;
|
||||
|
||||
/**
|
||||
* What is the maximum number of reads that can be returned in a single batch.
|
||||
*/
|
||||
private final int maxElements;
|
||||
|
||||
/**
|
||||
* Create a new downsampler with the given source iterator and given comparator.
|
||||
* @param maxElements What is the maximum number of reads that can be returned in any call of this
|
||||
*/
|
||||
public LegacyReservoirDownsampler(final int maxElements) {
|
||||
if(maxElements < 0)
|
||||
throw new ReviewedStingException("Unable to work with an negative size collection of elements");
|
||||
this.reservoir = new ArrayList<T>(maxElements);
|
||||
this.maxElements = maxElements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the eliminated element.
|
||||
* @param element Eliminated element; null if no element has been eliminated.
|
||||
* @return
|
||||
*/
|
||||
public T add(T element) {
|
||||
if(maxElements <= 0)
|
||||
return element;
|
||||
else if(reservoir.size() < maxElements) {
|
||||
reservoir.add(element);
|
||||
return null;
|
||||
}
|
||||
else {
|
||||
// Get a uniformly distributed int. If the chosen slot lives within the partition, replace the entry in that slot with the newest entry.
|
||||
int slot = GenomeAnalysisEngine.getRandomGenerator().nextInt(maxElements);
|
||||
if(slot >= 0 && slot < maxElements) {
|
||||
T displaced = reservoir.get(slot);
|
||||
reservoir.set(slot,element);
|
||||
return displaced;
|
||||
}
|
||||
else
|
||||
return element;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean addAll(Collection<? extends T> elements) {
|
||||
boolean added = false;
|
||||
for(T element: elements)
|
||||
added |= (add(element) != null);
|
||||
return added;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the contents of this reservoir, downsampled to the given value. Note that the return value
|
||||
* @return The downsampled contents of this reservoir.
|
||||
*/
|
||||
public Collection<T> getDownsampledContents() {
|
||||
return reservoir;
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
reservoir.clear();
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return reservoir.isEmpty();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return reservoir.size();
|
||||
}
|
||||
|
||||
public Iterator<T> iterator() {
|
||||
return reservoir.iterator();
|
||||
}
|
||||
|
||||
public boolean contains(Object o) {
|
||||
return reservoir.contains(o);
|
||||
}
|
||||
|
||||
public boolean containsAll(Collection<?> elements) {
|
||||
return reservoir.containsAll(elements);
|
||||
}
|
||||
|
||||
public boolean retainAll(Collection<?> elements) {
|
||||
return reservoir.retainAll(elements);
|
||||
}
|
||||
|
||||
public boolean remove(Object o) {
|
||||
return reservoir.remove(o);
|
||||
}
|
||||
|
||||
public boolean removeAll(Collection<?> elements) {
|
||||
return reservoir.removeAll(elements);
|
||||
}
|
||||
|
||||
public Object[] toArray() {
|
||||
Object[] contents = new Object[reservoir.size()];
|
||||
reservoir.toArray(contents);
|
||||
return contents;
|
||||
}
|
||||
|
||||
public <T> T[] toArray(T[] array) {
|
||||
return reservoir.toArray(array);
|
||||
}
|
||||
}
|
||||
|
|
@ -86,7 +86,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
|
|||
},
|
||||
PER_SAMPLE {
|
||||
@Override
|
||||
DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci(), false); }
|
||||
DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci()); }
|
||||
};
|
||||
abstract DownsamplingMethod create();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ public class ReadShardBalancerUnitTest extends BaseTest {
|
|||
* Tests to ensure that ReadShardBalancer works as expected and does not place shard boundaries
|
||||
* at inappropriate places, such as within an alignment start position
|
||||
*/
|
||||
private static class ExperimentalReadShardBalancerTest extends TestDataProvider {
|
||||
private static class ReadShardBalancerTest extends TestDataProvider {
|
||||
private int numContigs;
|
||||
private int numStacksPerContig;
|
||||
private int stackSize;
|
||||
|
|
@ -63,19 +63,19 @@ public class ReadShardBalancerUnitTest extends BaseTest {
|
|||
private SAMFileHeader header;
|
||||
private SAMReaderID testBAM;
|
||||
|
||||
public ExperimentalReadShardBalancerTest( int numContigs,
|
||||
int numStacksPerContig,
|
||||
int stackSize,
|
||||
int numUnmappedReads,
|
||||
int downsamplingTargetCoverage ) {
|
||||
super(ExperimentalReadShardBalancerTest.class);
|
||||
public ReadShardBalancerTest( int numContigs,
|
||||
int numStacksPerContig,
|
||||
int stackSize,
|
||||
int numUnmappedReads,
|
||||
int downsamplingTargetCoverage ) {
|
||||
super(ReadShardBalancerTest.class);
|
||||
|
||||
this.numContigs = numContigs;
|
||||
this.numStacksPerContig = numStacksPerContig;
|
||||
this.stackSize = stackSize;
|
||||
this.numUnmappedReads = numUnmappedReads;
|
||||
|
||||
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null, false);
|
||||
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null);
|
||||
this.expectedReadCount = Math.min(stackSize, downsamplingTargetCoverage) * numStacksPerContig * numContigs + numUnmappedReads;
|
||||
|
||||
setName(String.format("%s: numContigs=%d numStacksPerContig=%d stackSize=%d numUnmappedReads=%d downsamplingTargetCoverage=%d",
|
||||
|
|
@ -176,8 +176,8 @@ public class ReadShardBalancerUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "ExperimentalReadShardBalancerTestDataProvider")
|
||||
public Object[][] createExperimentalReadShardBalancerTests() {
|
||||
@DataProvider(name = "ReadShardBalancerTestDataProvider")
|
||||
public Object[][] createReadShardBalancerTests() {
|
||||
for ( int numContigs = 1; numContigs <= 3; numContigs++ ) {
|
||||
for ( int numStacksPerContig : Arrays.asList(1, 2, 4) ) {
|
||||
// Use crucial read shard boundary values as the stack sizes
|
||||
|
|
@ -185,18 +185,18 @@ public class ReadShardBalancerUnitTest extends BaseTest {
|
|||
for ( int numUnmappedReads : Arrays.asList(0, ReadShard.DEFAULT_MAX_READS / 2, ReadShard.DEFAULT_MAX_READS * 2) ) {
|
||||
// The first value will result in no downsampling at all, the others in some downsampling
|
||||
for ( int downsamplingTargetCoverage : Arrays.asList(ReadShard.DEFAULT_MAX_READS * 10, ReadShard.DEFAULT_MAX_READS, ReadShard.DEFAULT_MAX_READS / 2) ) {
|
||||
new ExperimentalReadShardBalancerTest(numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage);
|
||||
new ReadShardBalancerTest(numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ExperimentalReadShardBalancerTest.getTests(ExperimentalReadShardBalancerTest.class);
|
||||
return ReadShardBalancerTest.getTests(ReadShardBalancerTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ExperimentalReadShardBalancerTestDataProvider")
|
||||
public void runExperimentalReadShardBalancerTest( ExperimentalReadShardBalancerTest test ) {
|
||||
@Test(dataProvider = "ReadShardBalancerTestDataProvider")
|
||||
public void runReadShardBalancerTest( ReadShardBalancerTest test ) {
|
||||
logger.warn("Running test: " + test);
|
||||
|
||||
test.run();
|
||||
|
|
|
|||
|
|
@ -31,10 +31,7 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.LegacyReadShardBalancer;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.CountReads;
|
||||
|
|
@ -139,7 +136,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testUnmappedReadCount() {
|
||||
SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser);
|
||||
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new LegacyReadShardBalancer());
|
||||
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
|
||||
|
||||
countReadWalker.initialize();
|
||||
Object accumulator = countReadWalker.reduceInit();
|
||||
|
|
|
|||
|
|
@ -1,191 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Basic tests to prove the integrity of the reservoir downsampler.
|
||||
* At the moment, always run tests on SAM records as that's the task
|
||||
* for which the downsampler was conceived.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class LegacyReservoirDownsamplerUnitTest {
|
||||
private static final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,200);
|
||||
|
||||
|
||||
@Test
|
||||
public void testEmptyIterator() {
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
|
||||
Assert.assertTrue(downsampler.isEmpty(),"Downsampler is not empty but should be.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOneElementWithPoolSizeOne() {
|
||||
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
Collection<SAMRecord> batchedReads = downsampler.getDownsampledContents();
|
||||
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOneElementWithPoolSizeGreaterThanOne() {
|
||||
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
Collection<SAMRecord> batchedReads = downsampler.getDownsampledContents();
|
||||
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPoolFilledPartially() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 3, "Downsampler is returning the wrong number of reads");
|
||||
|
||||
Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect");
|
||||
Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect");
|
||||
Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPoolFilledExactly() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 5, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
|
||||
|
||||
Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect");
|
||||
Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect");
|
||||
Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect");
|
||||
Assert.assertSame(batchedReads.get(3), reads.get(3), "Downsampler read 4 is incorrect");
|
||||
Assert.assertSame(batchedReads.get(4), reads.get(4), "Downsampler read 5 is incorrect");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLargerPileWithZeroElementPool() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(0);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertTrue(downsampler.isEmpty(),"Downsampler isn't empty but should be");
|
||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 0, "Downsampler is returning the wrong number of reads");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLargerPileWithSingleElementPool() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertTrue(reads.contains(batchedReads.get(0)),"Downsampler is returning a bad read.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFillingAcrossLoci() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
|
||||
|
||||
reads.clear();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,2,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,2,76));
|
||||
|
||||
downsampler.clear();
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
|
||||
Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read.");
|
||||
|
||||
reads.clear();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,3,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,3,76));
|
||||
|
||||
downsampler.clear();
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
|
||||
Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read.");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -474,8 +474,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
|
||||
final boolean downsample = downsampleTo != -1;
|
||||
final DownsamplingMethod downsampler = downsample
|
||||
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null, false)
|
||||
: new DownsamplingMethod(DownsampleType.NONE, null, null, false);
|
||||
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null)
|
||||
: new DownsamplingMethod(DownsampleType.NONE, null, null);
|
||||
|
||||
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(header.getSequenceDictionary(), nReadsPerLocus, nLoci);
|
||||
bamBuilder.createAndSetHeader(nSamples).setReadLength(readLength).setAlignmentStart(1);
|
||||
|
|
@ -635,8 +635,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
|
||||
final boolean downsample = downsampleTo != -1;
|
||||
final DownsamplingMethod downsampler = downsample
|
||||
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null, false)
|
||||
: new DownsamplingMethod(DownsampleType.NONE, null, null, false);
|
||||
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null)
|
||||
: new DownsamplingMethod(DownsampleType.NONE, null, null);
|
||||
|
||||
// final List<GATKSAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue