Collapse the downsampling fork in the GATK engine
With LegacyLocusIteratorByState deleted, the legacy downsampling implementation was already non-functional. This commit removes all remaining code in the engine belonging to the legacy implementation.
This commit is contained in:
parent
5003deafb6
commit
3744d1a596
|
|
@ -434,12 +434,9 @@ public class GenomeAnalysisEngine {
|
||||||
protected DownsamplingMethod getDownsamplingMethod() {
|
protected DownsamplingMethod getDownsamplingMethod() {
|
||||||
GATKArgumentCollection argCollection = this.getArguments();
|
GATKArgumentCollection argCollection = this.getArguments();
|
||||||
|
|
||||||
// Legacy downsampler can only be selected via the command line, not via walker annotations
|
|
||||||
boolean useLegacyDownsampler = argCollection.useLegacyDownsampler;
|
|
||||||
|
|
||||||
DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod();
|
DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod();
|
||||||
DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker, useLegacyDownsampler);
|
DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker);
|
||||||
DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker, useLegacyDownsampler);
|
DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker);
|
||||||
|
|
||||||
DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod);
|
DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod);
|
||||||
method.checkCompatibilityWithWalker(walker);
|
method.checkCompatibilityWithWalker(walker);
|
||||||
|
|
@ -572,15 +569,10 @@ public class GenomeAnalysisEngine {
|
||||||
throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");
|
throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use the legacy ReadShardBalancer if legacy downsampling is enabled
|
|
||||||
ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ?
|
|
||||||
new LegacyReadShardBalancer() :
|
|
||||||
new ReadShardBalancer();
|
|
||||||
|
|
||||||
if(intervals == null)
|
if(intervals == null)
|
||||||
return readsDataSource.createShardIteratorOverAllReads(readShardBalancer);
|
return readsDataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
|
||||||
else
|
else
|
||||||
return readsDataSource.createShardIteratorOverIntervals(intervals, readShardBalancer);
|
return readsDataSource.createShardIteratorOverIntervals(intervals, new ReadShardBalancer());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw new ReviewedStingException("Unable to determine walker type for walker " + walker.getClass().getName());
|
throw new ReviewedStingException("Unable to determine walker type for walker " + walker.getClass().getName());
|
||||||
|
|
@ -793,7 +785,7 @@ public class GenomeAnalysisEngine {
|
||||||
DownsamplingMethod downsamplingMethod = getDownsamplingMethod();
|
DownsamplingMethod downsamplingMethod = getDownsamplingMethod();
|
||||||
|
|
||||||
// Synchronize the method back into the collection so that it shows up when
|
// Synchronize the method back into the collection so that it shows up when
|
||||||
// interrogating for the downsample method during command line recreation.
|
// interrogating for the downsampling method during command line recreation.
|
||||||
setDownsamplingMethod(downsamplingMethod);
|
setDownsamplingMethod(downsamplingMethod);
|
||||||
|
|
||||||
logger.info(downsamplingMethod);
|
logger.info(downsamplingMethod);
|
||||||
|
|
|
||||||
|
|
@ -306,11 +306,10 @@ public class WalkerManager extends PluginManager<Walker> {
|
||||||
* downsampling method is specified on the command-line, the command-line version will
|
* downsampling method is specified on the command-line, the command-line version will
|
||||||
* be used instead.
|
* be used instead.
|
||||||
* @param walker The walker to interrogate.
|
* @param walker The walker to interrogate.
|
||||||
* @param useLegacyDownsampler If true, use the legacy downsampling implementation
|
|
||||||
* @return The downsampling method, as specified by the walker. Null if none exists.
|
* @return The downsampling method, as specified by the walker. Null if none exists.
|
||||||
*/
|
*/
|
||||||
public static DownsamplingMethod getDownsamplingMethod(Walker walker, boolean useLegacyDownsampler) {
|
public static DownsamplingMethod getDownsamplingMethod( Walker walker ) {
|
||||||
return getDownsamplingMethod(walker.getClass(), useLegacyDownsampler);
|
return getDownsamplingMethod(walker.getClass());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -318,10 +317,9 @@ public class WalkerManager extends PluginManager<Walker> {
|
||||||
* downsampling method is specified on the command-line, the command-line version will
|
* downsampling method is specified on the command-line, the command-line version will
|
||||||
* be used instead.
|
* be used instead.
|
||||||
* @param walkerClass The class of the walker to interrogate.
|
* @param walkerClass The class of the walker to interrogate.
|
||||||
* @param useLegacyDownsampler If true, use the legacy downsampling implementation
|
|
||||||
* @return The downsampling method, as specified by the walker. Null if none exists.
|
* @return The downsampling method, as specified by the walker. Null if none exists.
|
||||||
*/
|
*/
|
||||||
public static DownsamplingMethod getDownsamplingMethod(Class<? extends Walker> walkerClass, boolean useLegacyDownsampler) {
|
public static DownsamplingMethod getDownsamplingMethod( Class<? extends Walker> walkerClass ) {
|
||||||
DownsamplingMethod downsamplingMethod = null;
|
DownsamplingMethod downsamplingMethod = null;
|
||||||
|
|
||||||
if( walkerClass.isAnnotationPresent(Downsample.class) ) {
|
if( walkerClass.isAnnotationPresent(Downsample.class) ) {
|
||||||
|
|
@ -329,7 +327,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
||||||
DownsampleType type = downsampleParameters.by();
|
DownsampleType type = downsampleParameters.by();
|
||||||
Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
|
Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
|
||||||
Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
|
Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
|
||||||
downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction,useLegacyDownsampler);
|
downsamplingMethod = new DownsamplingMethod(type, toCoverage, toFraction);
|
||||||
}
|
}
|
||||||
|
|
||||||
return downsamplingMethod;
|
return downsamplingMethod;
|
||||||
|
|
|
||||||
|
|
@ -126,9 +126,6 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position.", required = false)
|
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position.", required = false)
|
||||||
public Integer downsampleCoverage = null;
|
public Integer downsampleCoverage = null;
|
||||||
|
|
||||||
@Argument(fullName = "use_legacy_downsampler", shortName = "use_legacy_downsampler", doc = "Use the legacy downsampling implementation instead of the newer, less-tested implementation", required = false)
|
|
||||||
public boolean useLegacyDownsampler = false;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the downsampling method explicitly specified by the user. If the user didn't specify
|
* Gets the downsampling method explicitly specified by the user. If the user didn't specify
|
||||||
* a default downsampling mechanism, return the default.
|
* a default downsampling mechanism, return the default.
|
||||||
|
|
@ -138,7 +135,7 @@ public class GATKArgumentCollection {
|
||||||
if ( downsamplingType == null && downsampleFraction == null && downsampleCoverage == null )
|
if ( downsamplingType == null && downsampleFraction == null && downsampleCoverage == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction, useLegacyDownsampler);
|
return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -152,7 +149,6 @@ public class GATKArgumentCollection {
|
||||||
downsamplingType = method.type;
|
downsamplingType = method.type;
|
||||||
downsampleCoverage = method.toCoverage;
|
downsampleCoverage = method.toCoverage;
|
||||||
downsampleFraction = method.toFraction;
|
downsampleFraction = method.toFraction;
|
||||||
useLegacyDownsampler = method.useLegacyDownsampler;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -162,14 +162,6 @@ public abstract class LocusView extends LocusIterator implements View {
|
||||||
// Cache the current and apply filtering.
|
// Cache the current and apply filtering.
|
||||||
AlignmentContext current = nextLocus;
|
AlignmentContext current = nextLocus;
|
||||||
|
|
||||||
// The old ALL_READS downsampling implementation -- use only if legacy downsampling was requested:
|
|
||||||
if ( sourceInfo.getDownsamplingMethod().useLegacyDownsampler &&
|
|
||||||
sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS &&
|
|
||||||
sourceInfo.getDownsamplingMethod().toCoverage != null ) {
|
|
||||||
|
|
||||||
current.downsampleToCoverage(sourceInfo.getDownsamplingMethod().toCoverage);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Indicate that the next operation will need to advance.
|
// Indicate that the next operation will need to advance.
|
||||||
nextLocus = null;
|
nextLocus = null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -133,14 +133,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
||||||
|
|
||||||
Map<SAMReaderID,GATKBAMFileSpan> currentPosition;
|
Map<SAMReaderID,GATKBAMFileSpan> currentPosition;
|
||||||
|
|
||||||
// Only use the deprecated SAMDataSource.getCurrentPosition() if we're not using experimental downsampling
|
|
||||||
// TODO: clean this up once the experimental downsampling engine fork collapses
|
|
||||||
if ( dataSource.getReadsInfo().getDownsamplingMethod() != null && dataSource.getReadsInfo().getDownsamplingMethod().useLegacyDownsampler ) {
|
|
||||||
currentPosition = dataSource.getCurrentPosition();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
currentPosition = dataSource.getInitialReaderPositions();
|
currentPosition = dataSource.getInitialReaderPositions();
|
||||||
}
|
|
||||||
|
|
||||||
for(SAMReaderID reader: dataSource.getReaderIDs())
|
for(SAMReaderID reader: dataSource.getReaderIDs())
|
||||||
filePointer.addFileSpans(reader,createSpanToEndOfFile(currentPosition.get(reader).getGATKChunks().get(0).getChunkStart()));
|
filePointer.addFileSpans(reader,createSpanToEndOfFile(currentPosition.get(reader).getGATKChunks().get(0).getChunkStart()));
|
||||||
|
|
|
||||||
|
|
@ -1,130 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2012 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
|
||||||
|
|
||||||
import net.sf.samtools.GATKBAMFileSpan;
|
|
||||||
import net.sf.samtools.SAMFileSpan;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.NoSuchElementException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Divide up large file pointers containing reads into more manageable subcomponents.
|
|
||||||
*
|
|
||||||
* TODO: delete this class once the experimental downsampling engine fork collapses
|
|
||||||
*/
|
|
||||||
public class LegacyReadShardBalancer extends ShardBalancer {
|
|
||||||
/**
|
|
||||||
* Convert iterators of file pointers into balanced iterators of shards.
|
|
||||||
* @return An iterator over balanced shards.
|
|
||||||
*/
|
|
||||||
public Iterator<Shard> iterator() {
|
|
||||||
return new Iterator<Shard>() {
|
|
||||||
/**
|
|
||||||
* The cached shard to be returned next. Prefetched in the peekable iterator style.
|
|
||||||
*/
|
|
||||||
private Shard nextShard = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The file pointer currently being processed.
|
|
||||||
*/
|
|
||||||
private FilePointer currentFilePointer;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Ending position of the last shard in the file.
|
|
||||||
*/
|
|
||||||
private Map<SAMReaderID,GATKBAMFileSpan> position = readsDataSource.getCurrentPosition();
|
|
||||||
|
|
||||||
{
|
|
||||||
if(filePointers.hasNext())
|
|
||||||
currentFilePointer = filePointers.next();
|
|
||||||
advance();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasNext() {
|
|
||||||
return nextShard != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Shard next() {
|
|
||||||
if(!hasNext())
|
|
||||||
throw new NoSuchElementException("No next read shard available");
|
|
||||||
Shard currentShard = nextShard;
|
|
||||||
advance();
|
|
||||||
return currentShard;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void remove() {
|
|
||||||
throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
|
|
||||||
}
|
|
||||||
|
|
||||||
private void advance() {
|
|
||||||
Map<SAMReaderID,SAMFileSpan> shardPosition;
|
|
||||||
nextShard = null;
|
|
||||||
|
|
||||||
Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
|
|
||||||
while(selectedReaders.size() == 0 && currentFilePointer != null) {
|
|
||||||
shardPosition = currentFilePointer.fileSpans;
|
|
||||||
|
|
||||||
for(SAMReaderID id: shardPosition.keySet()) {
|
|
||||||
SAMFileSpan fileSpan = new GATKBAMFileSpan(shardPosition.get(id).removeContentsBefore(position.get(id)));
|
|
||||||
selectedReaders.put(id,fileSpan);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(!isEmpty(selectedReaders)) {
|
|
||||||
Shard shard = new ReadShard(parser,readsDataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
|
|
||||||
readsDataSource.fillShard(shard);
|
|
||||||
|
|
||||||
if(!shard.isBufferEmpty()) {
|
|
||||||
nextShard = shard;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
selectedReaders.clear();
|
|
||||||
currentFilePointer = filePointers.hasNext() ? filePointers.next() : null;
|
|
||||||
}
|
|
||||||
|
|
||||||
position = readsDataSource.getCurrentPosition();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Detects whether the list of file spans contain any read data.
|
|
||||||
* @param selectedSpans Mapping of readers to file spans.
|
|
||||||
* @return True if file spans are completely empty; false otherwise.
|
|
||||||
*/
|
|
||||||
private boolean isEmpty(Map<SAMReaderID,SAMFileSpan> selectedSpans) {
|
|
||||||
for(SAMFileSpan fileSpan: selectedSpans.values()) {
|
|
||||||
if(!fileSpan.isEmpty())
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -97,13 +97,6 @@ public class SAMDataSource {
|
||||||
*/
|
*/
|
||||||
private final Map<SAMReaderID,GATKBAMIndex> bamIndices = new HashMap<SAMReaderID,GATKBAMIndex>();
|
private final Map<SAMReaderID,GATKBAMIndex> bamIndices = new HashMap<SAMReaderID,GATKBAMIndex>();
|
||||||
|
|
||||||
/**
|
|
||||||
* How far along is each reader?
|
|
||||||
*
|
|
||||||
* TODO: delete this once the experimental downsampling engine fork collapses
|
|
||||||
*/
|
|
||||||
private final Map<SAMReaderID,GATKBAMFileSpan> readerPositions = new HashMap<SAMReaderID,GATKBAMFileSpan>();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The merged header.
|
* The merged header.
|
||||||
*/
|
*/
|
||||||
|
|
@ -298,8 +291,6 @@ public class SAMDataSource {
|
||||||
this.sortOrder = sortOrder;
|
this.sortOrder = sortOrder;
|
||||||
}
|
}
|
||||||
|
|
||||||
initializeReaderPositions(readers);
|
|
||||||
|
|
||||||
mergedHeader = readers.getMergedHeader();
|
mergedHeader = readers.getMergedHeader();
|
||||||
hasReadGroupCollisions = readers.hasReadGroupCollisions();
|
hasReadGroupCollisions = readers.hasReadGroupCollisions();
|
||||||
|
|
||||||
|
|
@ -387,17 +378,6 @@ public class SAMDataSource {
|
||||||
return resourcePool.getReaderID(read.getFileSource().getReader());
|
return resourcePool.getReaderID(read.getFileSource().getReader());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves the current position within the BAM file.
|
|
||||||
* @return A mapping of reader to current position.
|
|
||||||
*
|
|
||||||
* TODO: delete this once the experimental downsampling engine fork collapses
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public Map<SAMReaderID,GATKBAMFileSpan> getCurrentPosition() {
|
|
||||||
return readerPositions;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the merged header from the SAM file.
|
* Gets the merged header from the SAM file.
|
||||||
* @return The merged header.
|
* @return The merged header.
|
||||||
|
|
@ -475,67 +455,6 @@ public class SAMDataSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Legacy method to fill the given buffering shard with reads.
|
|
||||||
*
|
|
||||||
* Shard.fill() is used instead of this method unless legacy downsampling is enabled
|
|
||||||
*
|
|
||||||
* TODO: delete this method once the experimental downsampling engine fork collapses
|
|
||||||
*
|
|
||||||
* @param shard Shard to fill.
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public void fillShard(Shard shard) {
|
|
||||||
if(!shard.buffersReads())
|
|
||||||
throw new ReviewedStingException("Attempting to fill a non-buffering shard.");
|
|
||||||
|
|
||||||
SAMReaders readers = resourcePool.getAvailableReaders();
|
|
||||||
// Cache the most recently viewed read so that we can check whether we've reached the end of a pair.
|
|
||||||
SAMRecord read = null;
|
|
||||||
|
|
||||||
Map<SAMFileReader,GATKBAMFileSpan> positionUpdates = new IdentityHashMap<SAMFileReader,GATKBAMFileSpan>();
|
|
||||||
|
|
||||||
CloseableIterator<SAMRecord> iterator = getIterator(readers,shard,sortOrder == SAMFileHeader.SortOrder.coordinate);
|
|
||||||
while(!shard.isBufferFull() && iterator.hasNext()) {
|
|
||||||
final SAMRecord nextRead = iterator.next();
|
|
||||||
if ( read == null || (nextRead.getReferenceIndex().equals(read.getReferenceIndex())) ) {
|
|
||||||
// only add reads to the shard if they are on the same contig
|
|
||||||
read = nextRead;
|
|
||||||
shard.addRead(read);
|
|
||||||
noteFilePositionUpdate(positionUpdates,read);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the reads are sorted in queryname order, ensure that all reads
|
|
||||||
// having the same queryname become part of the same shard.
|
|
||||||
if(sortOrder == SAMFileHeader.SortOrder.queryname) {
|
|
||||||
while(iterator.hasNext()) {
|
|
||||||
SAMRecord nextRead = iterator.next();
|
|
||||||
if(read == null || !read.getReadName().equals(nextRead.getReadName()))
|
|
||||||
break;
|
|
||||||
shard.addRead(nextRead);
|
|
||||||
noteFilePositionUpdate(positionUpdates,nextRead);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
iterator.close();
|
|
||||||
|
|
||||||
// Make the updates specified by the reader.
|
|
||||||
for(Map.Entry<SAMFileReader,GATKBAMFileSpan> positionUpdate: positionUpdates.entrySet())
|
|
||||||
readerPositions.put(readers.getReaderID(positionUpdate.getKey()),positionUpdate.getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* TODO: delete this method once the experimental downsampling engine fork collapses
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
private void noteFilePositionUpdate(Map<SAMFileReader,GATKBAMFileSpan> positionMapping, SAMRecord read) {
|
|
||||||
GATKBAMFileSpan endChunk = new GATKBAMFileSpan(read.getFileSource().getFilePointer().getContentsFollowing());
|
|
||||||
positionMapping.put(read.getFileSource().getReader(),endChunk);
|
|
||||||
}
|
|
||||||
|
|
||||||
public StingSAMIterator seek(Shard shard) {
|
public StingSAMIterator seek(Shard shard) {
|
||||||
if(shard.buffersReads()) {
|
if(shard.buffersReads()) {
|
||||||
return shard.iterator();
|
return shard.iterator();
|
||||||
|
|
@ -559,19 +478,6 @@ public class SAMDataSource {
|
||||||
throw new ReviewedStingException("Unable to find id for reader associated with read " + read.getReadName());
|
throw new ReviewedStingException("Unable to find id for reader associated with read " + read.getReadName());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize the current reader positions
|
|
||||||
*
|
|
||||||
* TODO: delete this once the experimental downsampling engine fork collapses
|
|
||||||
*
|
|
||||||
* @param readers
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
private void initializeReaderPositions(SAMReaders readers) {
|
|
||||||
for(SAMReaderID id: getReaderIDs())
|
|
||||||
readerPositions.put(id,new GATKBAMFileSpan(readers.getReader(id).getFilePointerSpanningReads()));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the initial reader positions across all BAM files
|
* Get the initial reader positions across all BAM files
|
||||||
*
|
*
|
||||||
|
|
@ -646,7 +552,6 @@ public class SAMDataSource {
|
||||||
enableVerification,
|
enableVerification,
|
||||||
readProperties.useOriginalBaseQualities(),
|
readProperties.useOriginalBaseQualities(),
|
||||||
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),
|
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),
|
||||||
readProperties.getDownsamplingMethod().toFraction,
|
|
||||||
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||||
readProperties.getSupplementalFilters(),
|
readProperties.getSupplementalFilters(),
|
||||||
readProperties.getReadTransformers(),
|
readProperties.getReadTransformers(),
|
||||||
|
|
@ -704,7 +609,6 @@ public class SAMDataSource {
|
||||||
* @param enableVerification Verify the order of reads.
|
* @param enableVerification Verify the order of reads.
|
||||||
* @param useOriginalBaseQualities True if original base qualities should be used.
|
* @param useOriginalBaseQualities True if original base qualities should be used.
|
||||||
* @param wrappedIterator the raw data source.
|
* @param wrappedIterator the raw data source.
|
||||||
* @param downsamplingFraction whether and how much to downsample the reads themselves (not at a locus).
|
|
||||||
* @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this.
|
* @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this.
|
||||||
* @param supplementalFilters additional filters to apply to the reads.
|
* @param supplementalFilters additional filters to apply to the reads.
|
||||||
* @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality.
|
* @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality.
|
||||||
|
|
@ -715,7 +619,6 @@ public class SAMDataSource {
|
||||||
boolean enableVerification,
|
boolean enableVerification,
|
||||||
boolean useOriginalBaseQualities,
|
boolean useOriginalBaseQualities,
|
||||||
StingSAMIterator wrappedIterator,
|
StingSAMIterator wrappedIterator,
|
||||||
Double downsamplingFraction,
|
|
||||||
Boolean noValidationOfReadOrder,
|
Boolean noValidationOfReadOrder,
|
||||||
Collection<ReadFilter> supplementalFilters,
|
Collection<ReadFilter> supplementalFilters,
|
||||||
List<ReadTransformer> readTransformers,
|
List<ReadTransformer> readTransformers,
|
||||||
|
|
@ -727,11 +630,10 @@ public class SAMDataSource {
|
||||||
// * (otherwise we will process something that we may end up throwing away) * //
|
// * (otherwise we will process something that we may end up throwing away) * //
|
||||||
// ************************************************************************************************ //
|
// ************************************************************************************************ //
|
||||||
|
|
||||||
|
// Filters:
|
||||||
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
|
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
|
||||||
|
|
||||||
// If we're using the new downsampling implementation, apply downsampling iterators at this
|
// Downsampling:
|
||||||
// point in the read stream for most (but not all) cases
|
|
||||||
if ( ! readProperties.getDownsamplingMethod().useLegacyDownsampler ) {
|
|
||||||
|
|
||||||
// For locus traversals where we're downsampling to coverage by sample, assume that the downsamplers
|
// For locus traversals where we're downsampling to coverage by sample, assume that the downsamplers
|
||||||
// will be invoked downstream from us in LocusIteratorByState. This improves performance by avoiding
|
// will be invoked downstream from us in LocusIteratorByState. This improves performance by avoiding
|
||||||
|
|
@ -741,15 +643,11 @@ public class SAMDataSource {
|
||||||
readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
||||||
readProperties.getDownsamplingMethod().toCoverage != null;
|
readProperties.getDownsamplingMethod().toCoverage != null;
|
||||||
|
|
||||||
|
// Apply downsampling iterators here only in cases where we know that LocusIteratorByState won't be
|
||||||
|
// doing any downsampling downstream of us
|
||||||
if ( ! assumeDownstreamLIBSDownsampling ) {
|
if ( ! assumeDownstreamLIBSDownsampling ) {
|
||||||
wrappedIterator = applyDownsamplingIterator(wrappedIterator);
|
wrappedIterator = applyDownsamplingIterator(wrappedIterator);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Use the old fractional downsampler only if we're using legacy downsampling:
|
|
||||||
// TODO: remove this statement (and associated classes) once the downsampling engine fork collapses
|
|
||||||
if ( readProperties.getDownsamplingMethod().useLegacyDownsampler && downsamplingFraction != null )
|
|
||||||
wrappedIterator = new LegacyDownsampleIterator(wrappedIterator, downsamplingFraction);
|
|
||||||
|
|
||||||
// unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification,
|
// unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification,
|
||||||
// verify the read ordering by applying a sort order iterator
|
// verify the read ordering by applying a sort order iterator
|
||||||
|
|
|
||||||
|
|
@ -50,35 +50,43 @@ public class DownsamplingMethod {
|
||||||
*/
|
*/
|
||||||
public final Double toFraction;
|
public final Double toFraction;
|
||||||
|
|
||||||
/**
|
|
||||||
* Use the legacy downsampling implementation instead of the newer implementation?
|
|
||||||
*/
|
|
||||||
public final boolean useLegacyDownsampler;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expresses no downsampling applied at all.
|
* Expresses no downsampling applied at all.
|
||||||
*/
|
*/
|
||||||
public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE,null,null,false);
|
public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE, null, null);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default type to use if no type is specified
|
* Default type to use if no type is specified
|
||||||
*/
|
*/
|
||||||
public static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
|
public static final DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default target coverage for locus-based traversals
|
* Default target coverage for locus-based traversals
|
||||||
*/
|
*/
|
||||||
public static int DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
|
public static final int DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
|
||||||
|
|
||||||
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useLegacyDownsampler ) {
|
/**
|
||||||
|
* Default downsampling method for locus-based traversals
|
||||||
|
*/
|
||||||
|
public static final DownsamplingMethod DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD =
|
||||||
|
new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE, null);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default downsampling method for read-based traversals
|
||||||
|
*/
|
||||||
|
public static final DownsamplingMethod DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD = NONE;
|
||||||
|
|
||||||
|
|
||||||
|
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction ) {
|
||||||
this.type = type != null ? type : DEFAULT_DOWNSAMPLING_TYPE;
|
this.type = type != null ? type : DEFAULT_DOWNSAMPLING_TYPE;
|
||||||
this.toCoverage = toCoverage;
|
|
||||||
this.toFraction = toFraction;
|
|
||||||
this.useLegacyDownsampler = useLegacyDownsampler;
|
|
||||||
|
|
||||||
if ( type == DownsampleType.NONE ) {
|
if ( type == DownsampleType.NONE ) {
|
||||||
toCoverage = null;
|
this.toCoverage = null;
|
||||||
toFraction = null;
|
this.toFraction = null;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
this.toCoverage = toCoverage;
|
||||||
|
this.toFraction = toFraction;
|
||||||
}
|
}
|
||||||
|
|
||||||
validate();
|
validate();
|
||||||
|
|
@ -87,34 +95,28 @@ public class DownsamplingMethod {
|
||||||
private void validate() {
|
private void validate() {
|
||||||
// Can't leave toFraction and toCoverage null unless type is NONE
|
// Can't leave toFraction and toCoverage null unless type is NONE
|
||||||
if ( type != DownsampleType.NONE && toFraction == null && toCoverage == null )
|
if ( type != DownsampleType.NONE && toFraction == null && toCoverage == null )
|
||||||
throw new UserException.CommandLineException("Must specify either toFraction or toCoverage when downsampling.");
|
throw new UserException("Must specify either toFraction or toCoverage when downsampling.");
|
||||||
|
|
||||||
// Fraction and coverage cannot both be specified.
|
// Fraction and coverage cannot both be specified.
|
||||||
if ( toFraction != null && toCoverage != null )
|
if ( toFraction != null && toCoverage != null )
|
||||||
throw new UserException.CommandLineException("Downsampling coverage and fraction are both specified. Please choose only one.");
|
throw new UserException("Downsampling coverage and fraction are both specified. Please choose only one.");
|
||||||
|
|
||||||
// toCoverage must be > 0 when specified
|
// toCoverage must be > 0 when specified
|
||||||
if ( toCoverage != null && toCoverage <= 0 ) {
|
if ( toCoverage != null && toCoverage <= 0 ) {
|
||||||
throw new UserException.CommandLineException("toCoverage must be > 0 when downsampling to coverage");
|
throw new UserException("toCoverage must be > 0 when downsampling to coverage");
|
||||||
}
|
}
|
||||||
|
|
||||||
// toFraction must be >= 0.0 and <= 1.0 when specified
|
// toFraction must be >= 0.0 and <= 1.0 when specified
|
||||||
if ( toFraction != null && (toFraction < 0.0 || toFraction > 1.0) ) {
|
if ( toFraction != null && (toFraction < 0.0 || toFraction > 1.0) ) {
|
||||||
throw new UserException.CommandLineException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads");
|
throw new UserException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void checkCompatibilityWithWalker( Walker walker ) {
|
public void checkCompatibilityWithWalker( Walker walker ) {
|
||||||
boolean isLocusTraversal = walker instanceof LocusWalker || walker instanceof ActiveRegionWalker;
|
boolean isLocusTraversal = walker instanceof LocusWalker || walker instanceof ActiveRegionWalker;
|
||||||
|
|
||||||
if ( ! isLocusTraversal && useLegacyDownsampler && toCoverage != null ) {
|
if ( isLocusTraversal && type == DownsampleType.ALL_READS && toCoverage != null ) {
|
||||||
throw new UserException.CommandLineException("Downsampling to coverage for read-based traversals (eg., ReadWalkers) is not supported in the legacy downsampling implementation. " +
|
throw new UserException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not currently supported (though it is supported for ReadWalkers).");
|
||||||
"The newer downsampling implementation does not have this limitation.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( isLocusTraversal && ! useLegacyDownsampler && type == DownsampleType.ALL_READS && toCoverage != null ) {
|
|
||||||
throw new UserException.CommandLineException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not yet supported in the new downsampling implementation (though it is supported for ReadWalkers). " +
|
|
||||||
"You can run with --use_legacy_downsampler for a broken and poorly-maintained implementation of ALL_READS to-coverage downsampling, but this is not recommended.");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -128,31 +130,22 @@ public class DownsamplingMethod {
|
||||||
builder.append(String.format("Method: %s, ", type));
|
builder.append(String.format("Method: %s, ", type));
|
||||||
|
|
||||||
if ( toCoverage != null ) {
|
if ( toCoverage != null ) {
|
||||||
builder.append(String.format("Target Coverage: %d, ", toCoverage));
|
builder.append(String.format("Target Coverage: %d", toCoverage));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
builder.append(String.format("Target Fraction: %.2f, ", toFraction));
|
builder.append(String.format("Target Fraction: %.2f", toFraction));
|
||||||
}
|
|
||||||
|
|
||||||
if ( useLegacyDownsampler ) {
|
|
||||||
builder.append("Using the legacy downsampling implementation");
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
builder.append("Using the new downsampling implementation");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return builder.toString();
|
return builder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useLegacyDownsampler ) {
|
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker ) {
|
||||||
if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) {
|
if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) {
|
||||||
return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE,
|
return DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD;
|
||||||
null, useLegacyDownsampler);
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// Downsampling is off by default for non-locus-based traversals
|
return DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD;
|
||||||
return new DownsamplingMethod(DownsampleType.NONE, null, null, useLegacyDownsampler);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -111,9 +111,6 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
||||||
this.sourceInfo = shard.getReadProperties();
|
this.sourceInfo = shard.getReadProperties();
|
||||||
this.readIterator = new GATKSAMIterator(iterator);
|
this.readIterator = new GATKSAMIterator(iterator);
|
||||||
|
|
||||||
// Use the legacy version of LocusIteratorByState if legacy downsampling was requested:
|
|
||||||
if ( sourceInfo.getDownsamplingMethod().useLegacyDownsampler )
|
|
||||||
throw new IllegalArgumentException("legacy downsampler no longer supported in the window maker");
|
|
||||||
this.libs = new LocusIteratorByState(readIterator,sourceInfo,genomeLocParser,sampleNames);
|
this.libs = new LocusIteratorByState(readIterator,sourceInfo,genomeLocParser,sampleNames);
|
||||||
this.sourceIterator = new PeekableIterator<AlignmentContext>(libs);
|
this.sourceIterator = new PeekableIterator<AlignmentContext>(libs);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,77 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2012 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.iterators;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|
||||||
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
|
|
||||||
public class LegacyDownsampleIterator implements StingSAMIterator {
|
|
||||||
|
|
||||||
StingSAMIterator it;
|
|
||||||
int cutoff;
|
|
||||||
SAMRecord next;
|
|
||||||
|
|
||||||
public LegacyDownsampleIterator(StingSAMIterator it, double fraction) {
|
|
||||||
this.it = it;
|
|
||||||
cutoff = (int)(fraction * 10000);
|
|
||||||
next = getNextRecord();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasNext() {
|
|
||||||
return next != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public SAMRecord next() {
|
|
||||||
SAMRecord result = next;
|
|
||||||
next = getNextRecord();
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void remove() {
|
|
||||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
|
||||||
}
|
|
||||||
|
|
||||||
private SAMRecord getNextRecord() {
|
|
||||||
while ( true ) {
|
|
||||||
if ( !it.hasNext() )
|
|
||||||
return null;
|
|
||||||
SAMRecord rec = it.next();
|
|
||||||
if ( GenomeAnalysisEngine.getRandomGenerator().nextInt(10000) < cutoff )
|
|
||||||
return rec;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() {
|
|
||||||
it.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public Iterator<SAMRecord> iterator() {
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,153 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2012 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* THIS IMPLEMENTATION IS BROKEN AND WILL BE REMOVED ONCE THE DOWNSAMPLING ENGINE FORK COLLAPSES
|
|
||||||
*
|
|
||||||
* Randomly downsample from a stream of elements. This algorithm is a direct,
|
|
||||||
* naive implementation of reservoir downsampling as described in "Random Downsampling
|
|
||||||
* with a Reservoir" (Vitter 1985). At time of writing, this paper is located here:
|
|
||||||
* http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.138.784&rep=rep1&type=pdf
|
|
||||||
|
|
||||||
* @author mhanna
|
|
||||||
* @version 0.1
|
|
||||||
*/
|
|
||||||
public class LegacyReservoirDownsampler<T> {
|
|
||||||
/**
|
|
||||||
* The reservoir of elements tracked by this downsampler.
|
|
||||||
*/
|
|
||||||
private final ArrayList<T> reservoir;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* What is the maximum number of reads that can be returned in a single batch.
|
|
||||||
*/
|
|
||||||
private final int maxElements;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new downsampler with the given source iterator and given comparator.
|
|
||||||
* @param maxElements What is the maximum number of reads that can be returned in any call of this
|
|
||||||
*/
|
|
||||||
public LegacyReservoirDownsampler(final int maxElements) {
|
|
||||||
if(maxElements < 0)
|
|
||||||
throw new ReviewedStingException("Unable to work with an negative size collection of elements");
|
|
||||||
this.reservoir = new ArrayList<T>(maxElements);
|
|
||||||
this.maxElements = maxElements;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the eliminated element.
|
|
||||||
* @param element Eliminated element; null if no element has been eliminated.
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public T add(T element) {
|
|
||||||
if(maxElements <= 0)
|
|
||||||
return element;
|
|
||||||
else if(reservoir.size() < maxElements) {
|
|
||||||
reservoir.add(element);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Get a uniformly distributed int. If the chosen slot lives within the partition, replace the entry in that slot with the newest entry.
|
|
||||||
int slot = GenomeAnalysisEngine.getRandomGenerator().nextInt(maxElements);
|
|
||||||
if(slot >= 0 && slot < maxElements) {
|
|
||||||
T displaced = reservoir.get(slot);
|
|
||||||
reservoir.set(slot,element);
|
|
||||||
return displaced;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return element;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean addAll(Collection<? extends T> elements) {
|
|
||||||
boolean added = false;
|
|
||||||
for(T element: elements)
|
|
||||||
added |= (add(element) != null);
|
|
||||||
return added;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the contents of this reservoir, downsampled to the given value. Note that the return value
|
|
||||||
* @return The downsampled contents of this reservoir.
|
|
||||||
*/
|
|
||||||
public Collection<T> getDownsampledContents() {
|
|
||||||
return reservoir;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void clear() {
|
|
||||||
reservoir.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return reservoir.isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
public int size() {
|
|
||||||
return reservoir.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
public Iterator<T> iterator() {
|
|
||||||
return reservoir.iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean contains(Object o) {
|
|
||||||
return reservoir.contains(o);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean containsAll(Collection<?> elements) {
|
|
||||||
return reservoir.containsAll(elements);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean retainAll(Collection<?> elements) {
|
|
||||||
return reservoir.retainAll(elements);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean remove(Object o) {
|
|
||||||
return reservoir.remove(o);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean removeAll(Collection<?> elements) {
|
|
||||||
return reservoir.removeAll(elements);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Object[] toArray() {
|
|
||||||
Object[] contents = new Object[reservoir.size()];
|
|
||||||
reservoir.toArray(contents);
|
|
||||||
return contents;
|
|
||||||
}
|
|
||||||
|
|
||||||
public <T> T[] toArray(T[] array) {
|
|
||||||
return reservoir.toArray(array);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -86,7 +86,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
|
||||||
},
|
},
|
||||||
PER_SAMPLE {
|
PER_SAMPLE {
|
||||||
@Override
|
@Override
|
||||||
DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci(), false); }
|
DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci()); }
|
||||||
};
|
};
|
||||||
abstract DownsamplingMethod create();
|
abstract DownsamplingMethod create();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ public class ReadShardBalancerUnitTest extends BaseTest {
|
||||||
* Tests to ensure that ReadShardBalancer works as expected and does not place shard boundaries
|
* Tests to ensure that ReadShardBalancer works as expected and does not place shard boundaries
|
||||||
* at inappropriate places, such as within an alignment start position
|
* at inappropriate places, such as within an alignment start position
|
||||||
*/
|
*/
|
||||||
private static class ExperimentalReadShardBalancerTest extends TestDataProvider {
|
private static class ReadShardBalancerTest extends TestDataProvider {
|
||||||
private int numContigs;
|
private int numContigs;
|
||||||
private int numStacksPerContig;
|
private int numStacksPerContig;
|
||||||
private int stackSize;
|
private int stackSize;
|
||||||
|
|
@ -63,19 +63,19 @@ public class ReadShardBalancerUnitTest extends BaseTest {
|
||||||
private SAMFileHeader header;
|
private SAMFileHeader header;
|
||||||
private SAMReaderID testBAM;
|
private SAMReaderID testBAM;
|
||||||
|
|
||||||
public ExperimentalReadShardBalancerTest( int numContigs,
|
public ReadShardBalancerTest( int numContigs,
|
||||||
int numStacksPerContig,
|
int numStacksPerContig,
|
||||||
int stackSize,
|
int stackSize,
|
||||||
int numUnmappedReads,
|
int numUnmappedReads,
|
||||||
int downsamplingTargetCoverage ) {
|
int downsamplingTargetCoverage ) {
|
||||||
super(ExperimentalReadShardBalancerTest.class);
|
super(ReadShardBalancerTest.class);
|
||||||
|
|
||||||
this.numContigs = numContigs;
|
this.numContigs = numContigs;
|
||||||
this.numStacksPerContig = numStacksPerContig;
|
this.numStacksPerContig = numStacksPerContig;
|
||||||
this.stackSize = stackSize;
|
this.stackSize = stackSize;
|
||||||
this.numUnmappedReads = numUnmappedReads;
|
this.numUnmappedReads = numUnmappedReads;
|
||||||
|
|
||||||
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null, false);
|
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null);
|
||||||
this.expectedReadCount = Math.min(stackSize, downsamplingTargetCoverage) * numStacksPerContig * numContigs + numUnmappedReads;
|
this.expectedReadCount = Math.min(stackSize, downsamplingTargetCoverage) * numStacksPerContig * numContigs + numUnmappedReads;
|
||||||
|
|
||||||
setName(String.format("%s: numContigs=%d numStacksPerContig=%d stackSize=%d numUnmappedReads=%d downsamplingTargetCoverage=%d",
|
setName(String.format("%s: numContigs=%d numStacksPerContig=%d stackSize=%d numUnmappedReads=%d downsamplingTargetCoverage=%d",
|
||||||
|
|
@ -176,8 +176,8 @@ public class ReadShardBalancerUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@DataProvider(name = "ExperimentalReadShardBalancerTestDataProvider")
|
@DataProvider(name = "ReadShardBalancerTestDataProvider")
|
||||||
public Object[][] createExperimentalReadShardBalancerTests() {
|
public Object[][] createReadShardBalancerTests() {
|
||||||
for ( int numContigs = 1; numContigs <= 3; numContigs++ ) {
|
for ( int numContigs = 1; numContigs <= 3; numContigs++ ) {
|
||||||
for ( int numStacksPerContig : Arrays.asList(1, 2, 4) ) {
|
for ( int numStacksPerContig : Arrays.asList(1, 2, 4) ) {
|
||||||
// Use crucial read shard boundary values as the stack sizes
|
// Use crucial read shard boundary values as the stack sizes
|
||||||
|
|
@ -185,18 +185,18 @@ public class ReadShardBalancerUnitTest extends BaseTest {
|
||||||
for ( int numUnmappedReads : Arrays.asList(0, ReadShard.DEFAULT_MAX_READS / 2, ReadShard.DEFAULT_MAX_READS * 2) ) {
|
for ( int numUnmappedReads : Arrays.asList(0, ReadShard.DEFAULT_MAX_READS / 2, ReadShard.DEFAULT_MAX_READS * 2) ) {
|
||||||
// The first value will result in no downsampling at all, the others in some downsampling
|
// The first value will result in no downsampling at all, the others in some downsampling
|
||||||
for ( int downsamplingTargetCoverage : Arrays.asList(ReadShard.DEFAULT_MAX_READS * 10, ReadShard.DEFAULT_MAX_READS, ReadShard.DEFAULT_MAX_READS / 2) ) {
|
for ( int downsamplingTargetCoverage : Arrays.asList(ReadShard.DEFAULT_MAX_READS * 10, ReadShard.DEFAULT_MAX_READS, ReadShard.DEFAULT_MAX_READS / 2) ) {
|
||||||
new ExperimentalReadShardBalancerTest(numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage);
|
new ReadShardBalancerTest(numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ExperimentalReadShardBalancerTest.getTests(ExperimentalReadShardBalancerTest.class);
|
return ReadShardBalancerTest.getTests(ReadShardBalancerTest.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "ExperimentalReadShardBalancerTestDataProvider")
|
@Test(dataProvider = "ReadShardBalancerTestDataProvider")
|
||||||
public void runExperimentalReadShardBalancerTest( ExperimentalReadShardBalancerTest test ) {
|
public void runReadShardBalancerTest( ReadShardBalancerTest test ) {
|
||||||
logger.warn("Running test: " + test);
|
logger.warn("Running test: " + test);
|
||||||
|
|
||||||
test.run();
|
test.run();
|
||||||
|
|
|
||||||
|
|
@ -31,10 +31,7 @@ import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.commandline.Tags;
|
import org.broadinstitute.sting.commandline.Tags;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.LegacyReadShardBalancer;
|
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
|
||||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.qc.CountReads;
|
import org.broadinstitute.sting.gatk.walkers.qc.CountReads;
|
||||||
|
|
@ -139,7 +136,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testUnmappedReadCount() {
|
public void testUnmappedReadCount() {
|
||||||
SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser);
|
SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser);
|
||||||
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new LegacyReadShardBalancer());
|
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
|
||||||
|
|
||||||
countReadWalker.initialize();
|
countReadWalker.initialize();
|
||||||
Object accumulator = countReadWalker.reduceInit();
|
Object accumulator = countReadWalker.reduceInit();
|
||||||
|
|
|
||||||
|
|
@ -1,191 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2012 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|
||||||
import org.testng.Assert;
|
|
||||||
import org.testng.annotations.Test;
|
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Basic tests to prove the integrity of the reservoir downsampler.
|
|
||||||
* At the moment, always run tests on SAM records as that's the task
|
|
||||||
* for which the downsampler was conceived.
|
|
||||||
*
|
|
||||||
* @author mhanna
|
|
||||||
* @version 0.1
|
|
||||||
*/
|
|
||||||
public class LegacyReservoirDownsamplerUnitTest {
|
|
||||||
private static final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,200);
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testEmptyIterator() {
|
|
||||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
|
|
||||||
Assert.assertTrue(downsampler.isEmpty(),"Downsampler is not empty but should be.");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testOneElementWithPoolSizeOne() {
|
|
||||||
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
|
||||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
|
|
||||||
downsampler.addAll(reads);
|
|
||||||
|
|
||||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
|
||||||
Collection<SAMRecord> batchedReads = downsampler.getDownsampledContents();
|
|
||||||
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
|
|
||||||
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testOneElementWithPoolSizeGreaterThanOne() {
|
|
||||||
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
|
||||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
|
|
||||||
downsampler.addAll(reads);
|
|
||||||
|
|
||||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
|
||||||
Collection<SAMRecord> batchedReads = downsampler.getDownsampledContents();
|
|
||||||
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
|
|
||||||
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testPoolFilledPartially() {
|
|
||||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
|
||||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
|
|
||||||
downsampler.addAll(reads);
|
|
||||||
|
|
||||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
|
||||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
|
||||||
Assert.assertEquals(batchedReads.size(), 3, "Downsampler is returning the wrong number of reads");
|
|
||||||
|
|
||||||
Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect");
|
|
||||||
Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect");
|
|
||||||
Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testPoolFilledExactly() {
|
|
||||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
|
|
||||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
|
|
||||||
downsampler.addAll(reads);
|
|
||||||
|
|
||||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
|
||||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
|
||||||
Assert.assertEquals(batchedReads.size(), 5, "Downsampler is returning the wrong number of reads");
|
|
||||||
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
|
|
||||||
|
|
||||||
Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect");
|
|
||||||
Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect");
|
|
||||||
Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect");
|
|
||||||
Assert.assertSame(batchedReads.get(3), reads.get(3), "Downsampler read 4 is incorrect");
|
|
||||||
Assert.assertSame(batchedReads.get(4), reads.get(4), "Downsampler read 5 is incorrect");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testLargerPileWithZeroElementPool() {
|
|
||||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
|
||||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(0);
|
|
||||||
downsampler.addAll(reads);
|
|
||||||
|
|
||||||
Assert.assertTrue(downsampler.isEmpty(),"Downsampler isn't empty but should be");
|
|
||||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
|
||||||
Assert.assertEquals(batchedReads.size(), 0, "Downsampler is returning the wrong number of reads");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testLargerPileWithSingleElementPool() {
|
|
||||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
|
|
||||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
|
|
||||||
downsampler.addAll(reads);
|
|
||||||
|
|
||||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
|
||||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
|
||||||
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
|
|
||||||
Assert.assertTrue(reads.contains(batchedReads.get(0)),"Downsampler is returning a bad read.");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFillingAcrossLoci() {
|
|
||||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
|
||||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
|
|
||||||
downsampler.addAll(reads);
|
|
||||||
|
|
||||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
|
||||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
|
||||||
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
|
|
||||||
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
|
|
||||||
|
|
||||||
reads.clear();
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,2,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,2,76));
|
|
||||||
|
|
||||||
downsampler.clear();
|
|
||||||
downsampler.addAll(reads);
|
|
||||||
|
|
||||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
|
||||||
batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
|
||||||
Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads");
|
|
||||||
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
|
|
||||||
Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read.");
|
|
||||||
|
|
||||||
reads.clear();
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,3,76));
|
|
||||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,3,76));
|
|
||||||
|
|
||||||
downsampler.clear();
|
|
||||||
downsampler.addAll(reads);
|
|
||||||
|
|
||||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
|
||||||
batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
|
||||||
Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads");
|
|
||||||
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
|
|
||||||
Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read.");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -474,8 +474,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
|
|
||||||
final boolean downsample = downsampleTo != -1;
|
final boolean downsample = downsampleTo != -1;
|
||||||
final DownsamplingMethod downsampler = downsample
|
final DownsamplingMethod downsampler = downsample
|
||||||
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null, false)
|
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null)
|
||||||
: new DownsamplingMethod(DownsampleType.NONE, null, null, false);
|
: new DownsamplingMethod(DownsampleType.NONE, null, null);
|
||||||
|
|
||||||
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(header.getSequenceDictionary(), nReadsPerLocus, nLoci);
|
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(header.getSequenceDictionary(), nReadsPerLocus, nLoci);
|
||||||
bamBuilder.createAndSetHeader(nSamples).setReadLength(readLength).setAlignmentStart(1);
|
bamBuilder.createAndSetHeader(nSamples).setReadLength(readLength).setAlignmentStart(1);
|
||||||
|
|
@ -635,8 +635,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
|
|
||||||
final boolean downsample = downsampleTo != -1;
|
final boolean downsample = downsampleTo != -1;
|
||||||
final DownsamplingMethod downsampler = downsample
|
final DownsamplingMethod downsampler = downsample
|
||||||
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null, false)
|
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null)
|
||||||
: new DownsamplingMethod(DownsampleType.NONE, null, null, false);
|
: new DownsamplingMethod(DownsampleType.NONE, null, null);
|
||||||
|
|
||||||
// final List<GATKSAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);
|
// final List<GATKSAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue