Collapse the downsampling fork in the GATK engine

With LegacyLocusIteratorByState deleted, the legacy downsampling implementation
was already non-functional. This commit removes all remaining code in the
engine belonging to the legacy implementation.
This commit is contained in:
David Roazen 2013-01-28 01:19:44 -05:00
parent 5003deafb6
commit 3744d1a596
16 changed files with 78 additions and 773 deletions

View File

@ -434,12 +434,9 @@ public class GenomeAnalysisEngine {
protected DownsamplingMethod getDownsamplingMethod() {
GATKArgumentCollection argCollection = this.getArguments();
// Legacy downsampler can only be selected via the command line, not via walker annotations
boolean useLegacyDownsampler = argCollection.useLegacyDownsampler;
DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod();
DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker, useLegacyDownsampler);
DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker, useLegacyDownsampler);
DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker);
DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker);
DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod);
method.checkCompatibilityWithWalker(walker);
@ -572,15 +569,10 @@ public class GenomeAnalysisEngine {
throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");
}
// Use the legacy ReadShardBalancer if legacy downsampling is enabled
ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ?
new LegacyReadShardBalancer() :
new ReadShardBalancer();
if(intervals == null)
return readsDataSource.createShardIteratorOverAllReads(readShardBalancer);
return readsDataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
else
return readsDataSource.createShardIteratorOverIntervals(intervals, readShardBalancer);
return readsDataSource.createShardIteratorOverIntervals(intervals, new ReadShardBalancer());
}
else
throw new ReviewedStingException("Unable to determine walker type for walker " + walker.getClass().getName());
@ -793,7 +785,7 @@ public class GenomeAnalysisEngine {
DownsamplingMethod downsamplingMethod = getDownsamplingMethod();
// Synchronize the method back into the collection so that it shows up when
// interrogating for the downsample method during command line recreation.
// interrogating for the downsampling method during command line recreation.
setDownsamplingMethod(downsamplingMethod);
logger.info(downsamplingMethod);

View File

@ -306,11 +306,10 @@ public class WalkerManager extends PluginManager<Walker> {
* downsampling method is specified on the command-line, the command-line version will
* be used instead.
* @param walker The walker to interrogate.
* @param useLegacyDownsampler If true, use the legacy downsampling implementation
* @return The downsampling method, as specified by the walker. Null if none exists.
*/
public static DownsamplingMethod getDownsamplingMethod(Walker walker, boolean useLegacyDownsampler) {
return getDownsamplingMethod(walker.getClass(), useLegacyDownsampler);
public static DownsamplingMethod getDownsamplingMethod( Walker walker ) {
return getDownsamplingMethod(walker.getClass());
}
/**
@ -318,10 +317,9 @@ public class WalkerManager extends PluginManager<Walker> {
* downsampling method is specified on the command-line, the command-line version will
* be used instead.
* @param walkerClass The class of the walker to interrogate.
* @param useLegacyDownsampler If true, use the legacy downsampling implementation
* @return The downsampling method, as specified by the walker. Null if none exists.
*/
public static DownsamplingMethod getDownsamplingMethod(Class<? extends Walker> walkerClass, boolean useLegacyDownsampler) {
public static DownsamplingMethod getDownsamplingMethod( Class<? extends Walker> walkerClass ) {
DownsamplingMethod downsamplingMethod = null;
if( walkerClass.isAnnotationPresent(Downsample.class) ) {
@ -329,7 +327,7 @@ public class WalkerManager extends PluginManager<Walker> {
DownsampleType type = downsampleParameters.by();
Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction,useLegacyDownsampler);
downsamplingMethod = new DownsamplingMethod(type, toCoverage, toFraction);
}
return downsamplingMethod;

View File

@ -126,9 +126,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position.", required = false)
public Integer downsampleCoverage = null;
@Argument(fullName = "use_legacy_downsampler", shortName = "use_legacy_downsampler", doc = "Use the legacy downsampling implementation instead of the newer, less-tested implementation", required = false)
public boolean useLegacyDownsampler = false;
/**
* Gets the downsampling method explicitly specified by the user. If the user didn't specify
* a default downsampling mechanism, return the default.
@ -138,7 +135,7 @@ public class GATKArgumentCollection {
if ( downsamplingType == null && downsampleFraction == null && downsampleCoverage == null )
return null;
return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction, useLegacyDownsampler);
return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction);
}
/**
@ -152,7 +149,6 @@ public class GATKArgumentCollection {
downsamplingType = method.type;
downsampleCoverage = method.toCoverage;
downsampleFraction = method.toFraction;
useLegacyDownsampler = method.useLegacyDownsampler;
}
// --------------------------------------------------------------------------------------------------------------

View File

@ -162,14 +162,6 @@ public abstract class LocusView extends LocusIterator implements View {
// Cache the current and apply filtering.
AlignmentContext current = nextLocus;
// The old ALL_READS downsampling implementation -- use only if legacy downsampling was requested:
if ( sourceInfo.getDownsamplingMethod().useLegacyDownsampler &&
sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS &&
sourceInfo.getDownsamplingMethod().toCoverage != null ) {
current.downsampleToCoverage(sourceInfo.getDownsamplingMethod().toCoverage);
}
// Indicate that the next operation will need to advance.
nextLocus = null;

View File

@ -133,14 +133,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
Map<SAMReaderID,GATKBAMFileSpan> currentPosition;
// Only use the deprecated SAMDataSource.getCurrentPosition() if we're not using experimental downsampling
// TODO: clean this up once the experimental downsampling engine fork collapses
if ( dataSource.getReadsInfo().getDownsamplingMethod() != null && dataSource.getReadsInfo().getDownsamplingMethod().useLegacyDownsampler ) {
currentPosition = dataSource.getCurrentPosition();
}
else {
currentPosition = dataSource.getInitialReaderPositions();
}
currentPosition = dataSource.getInitialReaderPositions();
for(SAMReaderID reader: dataSource.getReaderIDs())
filePointer.addFileSpans(reader,createSpanToEndOfFile(currentPosition.get(reader).getGATKChunks().get(0).getChunkStart()));

View File

@ -1,130 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.GATKBAMFileSpan;
import net.sf.samtools.SAMFileSpan;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
/**
* Divide up large file pointers containing reads into more manageable subcomponents.
*
* TODO: delete this class once the experimental downsampling engine fork collapses
*/
public class LegacyReadShardBalancer extends ShardBalancer {
/**
* Convert iterators of file pointers into balanced iterators of shards.
* @return An iterator over balanced shards.
*/
public Iterator<Shard> iterator() {
return new Iterator<Shard>() {
/**
* The cached shard to be returned next. Prefetched in the peekable iterator style.
*/
private Shard nextShard = null;
/**
* The file pointer currently being processed.
*/
private FilePointer currentFilePointer;
/**
* Ending position of the last shard in the file.
*/
private Map<SAMReaderID,GATKBAMFileSpan> position = readsDataSource.getCurrentPosition();
{
if(filePointers.hasNext())
currentFilePointer = filePointers.next();
advance();
}
public boolean hasNext() {
return nextShard != null;
}
public Shard next() {
if(!hasNext())
throw new NoSuchElementException("No next read shard available");
Shard currentShard = nextShard;
advance();
return currentShard;
}
public void remove() {
throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
}
private void advance() {
Map<SAMReaderID,SAMFileSpan> shardPosition;
nextShard = null;
Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
while(selectedReaders.size() == 0 && currentFilePointer != null) {
shardPosition = currentFilePointer.fileSpans;
for(SAMReaderID id: shardPosition.keySet()) {
SAMFileSpan fileSpan = new GATKBAMFileSpan(shardPosition.get(id).removeContentsBefore(position.get(id)));
selectedReaders.put(id,fileSpan);
}
if(!isEmpty(selectedReaders)) {
Shard shard = new ReadShard(parser,readsDataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
readsDataSource.fillShard(shard);
if(!shard.isBufferEmpty()) {
nextShard = shard;
break;
}
}
selectedReaders.clear();
currentFilePointer = filePointers.hasNext() ? filePointers.next() : null;
}
position = readsDataSource.getCurrentPosition();
}
/**
* Detects whether the list of file spans contain any read data.
* @param selectedSpans Mapping of readers to file spans.
* @return True if file spans are completely empty; false otherwise.
*/
private boolean isEmpty(Map<SAMReaderID,SAMFileSpan> selectedSpans) {
for(SAMFileSpan fileSpan: selectedSpans.values()) {
if(!fileSpan.isEmpty())
return false;
}
return true;
}
};
}
}

View File

@ -97,13 +97,6 @@ public class SAMDataSource {
*/
private final Map<SAMReaderID,GATKBAMIndex> bamIndices = new HashMap<SAMReaderID,GATKBAMIndex>();
/**
* How far along is each reader?
*
* TODO: delete this once the experimental downsampling engine fork collapses
*/
private final Map<SAMReaderID,GATKBAMFileSpan> readerPositions = new HashMap<SAMReaderID,GATKBAMFileSpan>();
/**
* The merged header.
*/
@ -298,8 +291,6 @@ public class SAMDataSource {
this.sortOrder = sortOrder;
}
initializeReaderPositions(readers);
mergedHeader = readers.getMergedHeader();
hasReadGroupCollisions = readers.hasReadGroupCollisions();
@ -387,17 +378,6 @@ public class SAMDataSource {
return resourcePool.getReaderID(read.getFileSource().getReader());
}
/**
* Retrieves the current position within the BAM file.
* @return A mapping of reader to current position.
*
* TODO: delete this once the experimental downsampling engine fork collapses
*/
@Deprecated
public Map<SAMReaderID,GATKBAMFileSpan> getCurrentPosition() {
return readerPositions;
}
/**
* Gets the merged header from the SAM file.
* @return The merged header.
@ -475,67 +455,6 @@ public class SAMDataSource {
}
}
/**
* Legacy method to fill the given buffering shard with reads.
*
* Shard.fill() is used instead of this method unless legacy downsampling is enabled
*
* TODO: delete this method once the experimental downsampling engine fork collapses
*
* @param shard Shard to fill.
*/
@Deprecated
public void fillShard(Shard shard) {
if(!shard.buffersReads())
throw new ReviewedStingException("Attempting to fill a non-buffering shard.");
SAMReaders readers = resourcePool.getAvailableReaders();
// Cache the most recently viewed read so that we can check whether we've reached the end of a pair.
SAMRecord read = null;
Map<SAMFileReader,GATKBAMFileSpan> positionUpdates = new IdentityHashMap<SAMFileReader,GATKBAMFileSpan>();
CloseableIterator<SAMRecord> iterator = getIterator(readers,shard,sortOrder == SAMFileHeader.SortOrder.coordinate);
while(!shard.isBufferFull() && iterator.hasNext()) {
final SAMRecord nextRead = iterator.next();
if ( read == null || (nextRead.getReferenceIndex().equals(read.getReferenceIndex())) ) {
// only add reads to the shard if they are on the same contig
read = nextRead;
shard.addRead(read);
noteFilePositionUpdate(positionUpdates,read);
} else {
break;
}
}
// If the reads are sorted in queryname order, ensure that all reads
// having the same queryname become part of the same shard.
if(sortOrder == SAMFileHeader.SortOrder.queryname) {
while(iterator.hasNext()) {
SAMRecord nextRead = iterator.next();
if(read == null || !read.getReadName().equals(nextRead.getReadName()))
break;
shard.addRead(nextRead);
noteFilePositionUpdate(positionUpdates,nextRead);
}
}
iterator.close();
// Make the updates specified by the reader.
for(Map.Entry<SAMFileReader,GATKBAMFileSpan> positionUpdate: positionUpdates.entrySet())
readerPositions.put(readers.getReaderID(positionUpdate.getKey()),positionUpdate.getValue());
}
/*
* TODO: delete this method once the experimental downsampling engine fork collapses
*/
@Deprecated
private void noteFilePositionUpdate(Map<SAMFileReader,GATKBAMFileSpan> positionMapping, SAMRecord read) {
GATKBAMFileSpan endChunk = new GATKBAMFileSpan(read.getFileSource().getFilePointer().getContentsFollowing());
positionMapping.put(read.getFileSource().getReader(),endChunk);
}
public StingSAMIterator seek(Shard shard) {
if(shard.buffersReads()) {
return shard.iterator();
@ -559,19 +478,6 @@ public class SAMDataSource {
throw new ReviewedStingException("Unable to find id for reader associated with read " + read.getReadName());
}
/**
* Initialize the current reader positions
*
* TODO: delete this once the experimental downsampling engine fork collapses
*
* @param readers
*/
@Deprecated
private void initializeReaderPositions(SAMReaders readers) {
for(SAMReaderID id: getReaderIDs())
readerPositions.put(id,new GATKBAMFileSpan(readers.getReader(id).getFilePointerSpanningReads()));
}
/**
* Get the initial reader positions across all BAM files
*
@ -646,7 +552,6 @@ public class SAMDataSource {
enableVerification,
readProperties.useOriginalBaseQualities(),
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),
readProperties.getDownsamplingMethod().toFraction,
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
readProperties.getSupplementalFilters(),
readProperties.getReadTransformers(),
@ -704,7 +609,6 @@ public class SAMDataSource {
* @param enableVerification Verify the order of reads.
* @param useOriginalBaseQualities True if original base qualities should be used.
* @param wrappedIterator the raw data source.
* @param downsamplingFraction whether and how much to downsample the reads themselves (not at a locus).
* @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this.
* @param supplementalFilters additional filters to apply to the reads.
* @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality.
@ -715,7 +619,6 @@ public class SAMDataSource {
boolean enableVerification,
boolean useOriginalBaseQualities,
StingSAMIterator wrappedIterator,
Double downsamplingFraction,
Boolean noValidationOfReadOrder,
Collection<ReadFilter> supplementalFilters,
List<ReadTransformer> readTransformers,
@ -727,30 +630,25 @@ public class SAMDataSource {
// * (otherwise we will process something that we may end up throwing away) * //
// ************************************************************************************************ //
// Filters:
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
// If we're using the new downsampling implementation, apply downsampling iterators at this
// point in the read stream for most (but not all) cases
if ( ! readProperties.getDownsamplingMethod().useLegacyDownsampler ) {
// Downsampling:
// For locus traversals where we're downsampling to coverage by sample, assume that the downsamplers
// will be invoked downstream from us in LocusIteratorByState. This improves performance by avoiding
// splitting/re-assembly of the read stream at this stage, and also allows for partial downsampling
// of individual reads.
boolean assumeDownstreamLIBSDownsampling = isLocusBasedTraversal &&
readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
readProperties.getDownsamplingMethod().toCoverage != null;
// For locus traversals where we're downsampling to coverage by sample, assume that the downsamplers
// will be invoked downstream from us in LocusIteratorByState. This improves performance by avoiding
// splitting/re-assembly of the read stream at this stage, and also allows for partial downsampling
// of individual reads.
boolean assumeDownstreamLIBSDownsampling = isLocusBasedTraversal &&
readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
readProperties.getDownsamplingMethod().toCoverage != null;
if ( ! assumeDownstreamLIBSDownsampling ) {
wrappedIterator = applyDownsamplingIterator(wrappedIterator);
}
// Apply downsampling iterators here only in cases where we know that LocusIteratorByState won't be
// doing any downsampling downstream of us
if ( ! assumeDownstreamLIBSDownsampling ) {
wrappedIterator = applyDownsamplingIterator(wrappedIterator);
}
// Use the old fractional downsampler only if we're using legacy downsampling:
// TODO: remove this statement (and associated classes) once the downsampling engine fork collapses
if ( readProperties.getDownsamplingMethod().useLegacyDownsampler && downsamplingFraction != null )
wrappedIterator = new LegacyDownsampleIterator(wrappedIterator, downsamplingFraction);
// unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification,
// verify the read ordering by applying a sort order iterator
if (!noValidationOfReadOrder && enableVerification)

View File

@ -50,35 +50,43 @@ public class DownsamplingMethod {
*/
public final Double toFraction;
/**
* Use the legacy downsampling implementation instead of the newer implementation?
*/
public final boolean useLegacyDownsampler;
/**
* Expresses no downsampling applied at all.
*/
public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE,null,null,false);
public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE, null, null);
/**
* Default type to use if no type is specified
*/
public static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
public static final DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
/**
* Default target coverage for locus-based traversals
*/
public static int DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
public static final int DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useLegacyDownsampler ) {
/**
* Default downsampling method for locus-based traversals
*/
public static final DownsamplingMethod DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD =
new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE, null);
/**
* Default downsampling method for read-based traversals
*/
public static final DownsamplingMethod DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD = NONE;
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction ) {
this.type = type != null ? type : DEFAULT_DOWNSAMPLING_TYPE;
this.toCoverage = toCoverage;
this.toFraction = toFraction;
this.useLegacyDownsampler = useLegacyDownsampler;
if ( type == DownsampleType.NONE ) {
toCoverage = null;
toFraction = null;
this.toCoverage = null;
this.toFraction = null;
}
else {
this.toCoverage = toCoverage;
this.toFraction = toFraction;
}
validate();
@ -87,34 +95,28 @@ public class DownsamplingMethod {
private void validate() {
// Can't leave toFraction and toCoverage null unless type is NONE
if ( type != DownsampleType.NONE && toFraction == null && toCoverage == null )
throw new UserException.CommandLineException("Must specify either toFraction or toCoverage when downsampling.");
throw new UserException("Must specify either toFraction or toCoverage when downsampling.");
// Fraction and coverage cannot both be specified.
if ( toFraction != null && toCoverage != null )
throw new UserException.CommandLineException("Downsampling coverage and fraction are both specified. Please choose only one.");
throw new UserException("Downsampling coverage and fraction are both specified. Please choose only one.");
// toCoverage must be > 0 when specified
if ( toCoverage != null && toCoverage <= 0 ) {
throw new UserException.CommandLineException("toCoverage must be > 0 when downsampling to coverage");
throw new UserException("toCoverage must be > 0 when downsampling to coverage");
}
// toFraction must be >= 0.0 and <= 1.0 when specified
if ( toFraction != null && (toFraction < 0.0 || toFraction > 1.0) ) {
throw new UserException.CommandLineException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads");
throw new UserException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads");
}
}
public void checkCompatibilityWithWalker( Walker walker ) {
boolean isLocusTraversal = walker instanceof LocusWalker || walker instanceof ActiveRegionWalker;
if ( ! isLocusTraversal && useLegacyDownsampler && toCoverage != null ) {
throw new UserException.CommandLineException("Downsampling to coverage for read-based traversals (eg., ReadWalkers) is not supported in the legacy downsampling implementation. " +
"The newer downsampling implementation does not have this limitation.");
}
if ( isLocusTraversal && ! useLegacyDownsampler && type == DownsampleType.ALL_READS && toCoverage != null ) {
throw new UserException.CommandLineException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not yet supported in the new downsampling implementation (though it is supported for ReadWalkers). " +
"You can run with --use_legacy_downsampler for a broken and poorly-maintained implementation of ALL_READS to-coverage downsampling, but this is not recommended.");
if ( isLocusTraversal && type == DownsampleType.ALL_READS && toCoverage != null ) {
throw new UserException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not currently supported (though it is supported for ReadWalkers).");
}
}
@ -128,31 +130,22 @@ public class DownsamplingMethod {
builder.append(String.format("Method: %s, ", type));
if ( toCoverage != null ) {
builder.append(String.format("Target Coverage: %d, ", toCoverage));
builder.append(String.format("Target Coverage: %d", toCoverage));
}
else {
builder.append(String.format("Target Fraction: %.2f, ", toFraction));
}
if ( useLegacyDownsampler ) {
builder.append("Using the legacy downsampling implementation");
}
else {
builder.append("Using the new downsampling implementation");
builder.append(String.format("Target Fraction: %.2f", toFraction));
}
}
return builder.toString();
}
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useLegacyDownsampler ) {
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker ) {
if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) {
return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE,
null, useLegacyDownsampler);
return DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD;
}
else {
// Downsampling is off by default for non-locus-based traversals
return new DownsamplingMethod(DownsampleType.NONE, null, null, useLegacyDownsampler);
return DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD;
}
}
}

View File

@ -111,9 +111,6 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
this.sourceInfo = shard.getReadProperties();
this.readIterator = new GATKSAMIterator(iterator);
// Use the legacy version of LocusIteratorByState if legacy downsampling was requested:
if ( sourceInfo.getDownsamplingMethod().useLegacyDownsampler )
throw new IllegalArgumentException("legacy downsampler no longer supported in the window maker");
this.libs = new LocusIteratorByState(readIterator,sourceInfo,genomeLocParser,sampleNames);
this.sourceIterator = new PeekableIterator<AlignmentContext>(libs);

View File

@ -1,77 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.util.Iterator;
public class LegacyDownsampleIterator implements StingSAMIterator {
StingSAMIterator it;
int cutoff;
SAMRecord next;
public LegacyDownsampleIterator(StingSAMIterator it, double fraction) {
this.it = it;
cutoff = (int)(fraction * 10000);
next = getNextRecord();
}
public boolean hasNext() {
return next != null;
}
public SAMRecord next() {
SAMRecord result = next;
next = getNextRecord();
return result;
}
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
private SAMRecord getNextRecord() {
while ( true ) {
if ( !it.hasNext() )
return null;
SAMRecord rec = it.next();
if ( GenomeAnalysisEngine.getRandomGenerator().nextInt(10000) < cutoff )
return rec;
}
}
public void close() {
it.close();
}
public Iterator<SAMRecord> iterator() {
return this;
}
}

View File

@ -1,153 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
/**
* THIS IMPLEMENTATION IS BROKEN AND WILL BE REMOVED ONCE THE DOWNSAMPLING ENGINE FORK COLLAPSES
*
* Randomly downsample from a stream of elements. This algorithm is a direct,
* naive implementation of reservoir downsampling as described in "Random Downsampling
* with a Reservoir" (Vitter 1985). At time of writing, this paper is located here:
* http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.138.784&rep=rep1&type=pdf
* @author mhanna
* @version 0.1
*/
public class LegacyReservoirDownsampler<T> {
/**
* The reservoir of elements tracked by this downsampler.
*/
private final ArrayList<T> reservoir;
/**
* What is the maximum number of reads that can be returned in a single batch.
*/
private final int maxElements;
/**
* Create a new downsampler with the given source iterator and given comparator.
* @param maxElements What is the maximum number of reads that can be returned in any call of this
*/
public LegacyReservoirDownsampler(final int maxElements) {
if(maxElements < 0)
throw new ReviewedStingException("Unable to work with an negative size collection of elements");
this.reservoir = new ArrayList<T>(maxElements);
this.maxElements = maxElements;
}
/**
* Returns the eliminated element.
* @param element Eliminated element; null if no element has been eliminated.
* @return
*/
public T add(T element) {
if(maxElements <= 0)
return element;
else if(reservoir.size() < maxElements) {
reservoir.add(element);
return null;
}
else {
// Get a uniformly distributed int. If the chosen slot lives within the partition, replace the entry in that slot with the newest entry.
int slot = GenomeAnalysisEngine.getRandomGenerator().nextInt(maxElements);
if(slot >= 0 && slot < maxElements) {
T displaced = reservoir.get(slot);
reservoir.set(slot,element);
return displaced;
}
else
return element;
}
}
public boolean addAll(Collection<? extends T> elements) {
boolean added = false;
for(T element: elements)
added |= (add(element) != null);
return added;
}
/**
* Returns the contents of this reservoir, downsampled to the given value. Note that the return value
* @return The downsampled contents of this reservoir.
*/
public Collection<T> getDownsampledContents() {
return reservoir;
}
public void clear() {
reservoir.clear();
}
public boolean isEmpty() {
return reservoir.isEmpty();
}
public int size() {
return reservoir.size();
}
public Iterator<T> iterator() {
return reservoir.iterator();
}
public boolean contains(Object o) {
return reservoir.contains(o);
}
public boolean containsAll(Collection<?> elements) {
return reservoir.containsAll(elements);
}
public boolean retainAll(Collection<?> elements) {
return reservoir.retainAll(elements);
}
public boolean remove(Object o) {
return reservoir.remove(o);
}
public boolean removeAll(Collection<?> elements) {
return reservoir.removeAll(elements);
}
public Object[] toArray() {
Object[] contents = new Object[reservoir.size()];
reservoir.toArray(contents);
return contents;
}
public <T> T[] toArray(T[] array) {
return reservoir.toArray(array);
}
}

View File

@ -86,7 +86,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
},
PER_SAMPLE {
@Override
DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci(), false); }
DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci()); }
};
abstract DownsamplingMethod create();
}

View File

@ -52,7 +52,7 @@ public class ReadShardBalancerUnitTest extends BaseTest {
* Tests to ensure that ReadShardBalancer works as expected and does not place shard boundaries
* at inappropriate places, such as within an alignment start position
*/
private static class ExperimentalReadShardBalancerTest extends TestDataProvider {
private static class ReadShardBalancerTest extends TestDataProvider {
private int numContigs;
private int numStacksPerContig;
private int stackSize;
@ -63,19 +63,19 @@ public class ReadShardBalancerUnitTest extends BaseTest {
private SAMFileHeader header;
private SAMReaderID testBAM;
public ExperimentalReadShardBalancerTest( int numContigs,
int numStacksPerContig,
int stackSize,
int numUnmappedReads,
int downsamplingTargetCoverage ) {
super(ExperimentalReadShardBalancerTest.class);
public ReadShardBalancerTest( int numContigs,
int numStacksPerContig,
int stackSize,
int numUnmappedReads,
int downsamplingTargetCoverage ) {
super(ReadShardBalancerTest.class);
this.numContigs = numContigs;
this.numStacksPerContig = numStacksPerContig;
this.stackSize = stackSize;
this.numUnmappedReads = numUnmappedReads;
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null, false);
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null);
this.expectedReadCount = Math.min(stackSize, downsamplingTargetCoverage) * numStacksPerContig * numContigs + numUnmappedReads;
setName(String.format("%s: numContigs=%d numStacksPerContig=%d stackSize=%d numUnmappedReads=%d downsamplingTargetCoverage=%d",
@ -176,8 +176,8 @@ public class ReadShardBalancerUnitTest extends BaseTest {
}
}
@DataProvider(name = "ExperimentalReadShardBalancerTestDataProvider")
public Object[][] createExperimentalReadShardBalancerTests() {
@DataProvider(name = "ReadShardBalancerTestDataProvider")
public Object[][] createReadShardBalancerTests() {
for ( int numContigs = 1; numContigs <= 3; numContigs++ ) {
for ( int numStacksPerContig : Arrays.asList(1, 2, 4) ) {
// Use crucial read shard boundary values as the stack sizes
@ -185,18 +185,18 @@ public class ReadShardBalancerUnitTest extends BaseTest {
for ( int numUnmappedReads : Arrays.asList(0, ReadShard.DEFAULT_MAX_READS / 2, ReadShard.DEFAULT_MAX_READS * 2) ) {
// The first value will result in no downsampling at all, the others in some downsampling
for ( int downsamplingTargetCoverage : Arrays.asList(ReadShard.DEFAULT_MAX_READS * 10, ReadShard.DEFAULT_MAX_READS, ReadShard.DEFAULT_MAX_READS / 2) ) {
new ExperimentalReadShardBalancerTest(numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage);
new ReadShardBalancerTest(numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage);
}
}
}
}
}
return ExperimentalReadShardBalancerTest.getTests(ExperimentalReadShardBalancerTest.class);
return ReadShardBalancerTest.getTests(ReadShardBalancerTest.class);
}
@Test(dataProvider = "ExperimentalReadShardBalancerTestDataProvider")
public void runExperimentalReadShardBalancerTest( ExperimentalReadShardBalancerTest test ) {
@Test(dataProvider = "ReadShardBalancerTestDataProvider")
public void runReadShardBalancerTest( ReadShardBalancerTest test ) {
logger.warn("Running test: " + test);
test.run();

View File

@ -31,10 +31,7 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.reads.LegacyReadShardBalancer;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.reads.*;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.qc.CountReads;
@ -139,7 +136,7 @@ public class TraverseReadsUnitTest extends BaseTest {
@Test
public void testUnmappedReadCount() {
SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser);
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new LegacyReadShardBalancer());
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
countReadWalker.initialize();
Object accumulator = countReadWalker.reduceInit();

View File

@ -1,191 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import java.util.*;
/**
* Basic tests to prove the integrity of the reservoir downsampler.
* At the moment, always run tests on SAM records as that's the task
* for which the downsampler was conceived.
*
* @author mhanna
* @version 0.1
*/
public class LegacyReservoirDownsamplerUnitTest {
private static final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,200);
@Test
public void testEmptyIterator() {
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
Assert.assertTrue(downsampler.isEmpty(),"Downsampler is not empty but should be.");
}
@Test
public void testOneElementWithPoolSizeOne() {
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
Collection<SAMRecord> batchedReads = downsampler.getDownsampledContents();
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
}
@Test
public void testOneElementWithPoolSizeGreaterThanOne() {
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
Collection<SAMRecord> batchedReads = downsampler.getDownsampledContents();
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
}
@Test
public void testPoolFilledPartially() {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 3, "Downsampler is returning the wrong number of reads");
Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect");
Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect");
Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect");
}
@Test
public void testPoolFilledExactly() {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 5, "Downsampler is returning the wrong number of reads");
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect");
Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect");
Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect");
Assert.assertSame(batchedReads.get(3), reads.get(3), "Downsampler read 4 is incorrect");
Assert.assertSame(batchedReads.get(4), reads.get(4), "Downsampler read 5 is incorrect");
}
@Test
public void testLargerPileWithZeroElementPool() {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(0);
downsampler.addAll(reads);
Assert.assertTrue(downsampler.isEmpty(),"Downsampler isn't empty but should be");
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 0, "Downsampler is returning the wrong number of reads");
}
@Test
public void testLargerPileWithSingleElementPool() {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
Assert.assertTrue(reads.contains(batchedReads.get(0)),"Downsampler is returning a bad read.");
}
@Test
public void testFillingAcrossLoci() {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
reads.clear();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,2,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,2,76));
downsampler.clear();
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads");
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read.");
reads.clear();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,3,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,3,76));
downsampler.clear();
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads");
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read.");
}
}

View File

@ -474,8 +474,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
final boolean downsample = downsampleTo != -1;
final DownsamplingMethod downsampler = downsample
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null, false)
: new DownsamplingMethod(DownsampleType.NONE, null, null, false);
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null)
: new DownsamplingMethod(DownsampleType.NONE, null, null);
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(header.getSequenceDictionary(), nReadsPerLocus, nLoci);
bamBuilder.createAndSetHeader(nSamples).setReadLength(readLength).setAlignmentStart(1);
@ -635,8 +635,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
final boolean downsample = downsampleTo != -1;
final DownsamplingMethod downsampler = downsample
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null, false)
: new DownsamplingMethod(DownsampleType.NONE, null, null, false);
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null)
: new DownsamplingMethod(DownsampleType.NONE, null, null);
// final List<GATKSAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);