diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 9b801be7d..389b1371a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -434,12 +434,9 @@ public class GenomeAnalysisEngine { protected DownsamplingMethod getDownsamplingMethod() { GATKArgumentCollection argCollection = this.getArguments(); - // Legacy downsampler can only be selected via the command line, not via walker annotations - boolean useLegacyDownsampler = argCollection.useLegacyDownsampler; - DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod(); - DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker, useLegacyDownsampler); - DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker, useLegacyDownsampler); + DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker); + DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker); DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod); method.checkCompatibilityWithWalker(walker); @@ -572,15 +569,10 @@ public class GenomeAnalysisEngine { throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals."); } - // Use the legacy ReadShardBalancer if legacy downsampling is enabled - ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ? - new LegacyReadShardBalancer() : - new ReadShardBalancer(); - if(intervals == null) - return readsDataSource.createShardIteratorOverAllReads(readShardBalancer); + return readsDataSource.createShardIteratorOverAllReads(new ReadShardBalancer()); else - return readsDataSource.createShardIteratorOverIntervals(intervals, readShardBalancer); + return readsDataSource.createShardIteratorOverIntervals(intervals, new ReadShardBalancer()); } else throw new ReviewedStingException("Unable to determine walker type for walker " + walker.getClass().getName()); @@ -793,7 +785,7 @@ public class GenomeAnalysisEngine { DownsamplingMethod downsamplingMethod = getDownsamplingMethod(); // Synchronize the method back into the collection so that it shows up when - // interrogating for the downsample method during command line recreation. + // interrogating for the downsampling method during command line recreation. setDownsamplingMethod(downsamplingMethod); logger.info(downsamplingMethod); diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index 62c2cdd13..6dfb2c2a5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -306,11 +306,10 @@ public class WalkerManager extends PluginManager { * downsampling method is specified on the command-line, the command-line version will * be used instead. * @param walker The walker to interrogate. - * @param useLegacyDownsampler If true, use the legacy downsampling implementation * @return The downsampling method, as specified by the walker. Null if none exists. */ - public static DownsamplingMethod getDownsamplingMethod(Walker walker, boolean useLegacyDownsampler) { - return getDownsamplingMethod(walker.getClass(), useLegacyDownsampler); + public static DownsamplingMethod getDownsamplingMethod( Walker walker ) { + return getDownsamplingMethod(walker.getClass()); } /** @@ -318,10 +317,9 @@ public class WalkerManager extends PluginManager { * downsampling method is specified on the command-line, the command-line version will * be used instead. * @param walkerClass The class of the walker to interrogate. - * @param useLegacyDownsampler If true, use the legacy downsampling implementation * @return The downsampling method, as specified by the walker. Null if none exists. */ - public static DownsamplingMethod getDownsamplingMethod(Class walkerClass, boolean useLegacyDownsampler) { + public static DownsamplingMethod getDownsamplingMethod( Class walkerClass ) { DownsamplingMethod downsamplingMethod = null; if( walkerClass.isAnnotationPresent(Downsample.class) ) { @@ -329,7 +327,7 @@ public class WalkerManager extends PluginManager { DownsampleType type = downsampleParameters.by(); Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null; Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null; - downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction,useLegacyDownsampler); + downsamplingMethod = new DownsamplingMethod(type, toCoverage, toFraction); } return downsamplingMethod; diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 62ca38ad2..9cd88001c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -126,9 +126,6 @@ public class GATKArgumentCollection { @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position.", required = false) public Integer downsampleCoverage = null; - @Argument(fullName = "use_legacy_downsampler", shortName = "use_legacy_downsampler", doc = "Use the legacy downsampling implementation instead of the newer, less-tested implementation", required = false) - public boolean useLegacyDownsampler = false; - /** * Gets the downsampling method explicitly specified by the user. If the user didn't specify * a default downsampling mechanism, return the default. @@ -138,7 +135,7 @@ public class GATKArgumentCollection { if ( downsamplingType == null && downsampleFraction == null && downsampleCoverage == null ) return null; - return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction, useLegacyDownsampler); + return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction); } /** @@ -152,7 +149,6 @@ public class GATKArgumentCollection { downsamplingType = method.type; downsampleCoverage = method.toCoverage; downsampleFraction = method.toFraction; - useLegacyDownsampler = method.useLegacyDownsampler; } // -------------------------------------------------------------------------------------------------------------- diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index 3ff6e34fb..6b3661baa 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -162,14 +162,6 @@ public abstract class LocusView extends LocusIterator implements View { // Cache the current and apply filtering. AlignmentContext current = nextLocus; - // The old ALL_READS downsampling implementation -- use only if legacy downsampling was requested: - if ( sourceInfo.getDownsamplingMethod().useLegacyDownsampler && - sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS && - sourceInfo.getDownsamplingMethod().toCoverage != null ) { - - current.downsampleToCoverage(sourceInfo.getDownsamplingMethod().toCoverage); - } - // Indicate that the next operation will need to advance. nextLocus = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java index 0b41f15c0..8d7cfbaa7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java @@ -133,14 +133,7 @@ public class BAMScheduler implements Iterator { Map currentPosition; - // Only use the deprecated SAMDataSource.getCurrentPosition() if we're not using experimental downsampling - // TODO: clean this up once the experimental downsampling engine fork collapses - if ( dataSource.getReadsInfo().getDownsamplingMethod() != null && dataSource.getReadsInfo().getDownsamplingMethod().useLegacyDownsampler ) { - currentPosition = dataSource.getCurrentPosition(); - } - else { - currentPosition = dataSource.getInitialReaderPositions(); - } + currentPosition = dataSource.getInitialReaderPositions(); for(SAMReaderID reader: dataSource.getReaderIDs()) filePointer.addFileSpans(reader,createSpanToEndOfFile(currentPosition.get(reader).getGATKChunks().get(0).getChunkStart())); diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LegacyReadShardBalancer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LegacyReadShardBalancer.java deleted file mode 100644 index 510398157..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LegacyReadShardBalancer.java +++ /dev/null @@ -1,130 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.sting.gatk.datasources.reads; - -import net.sf.samtools.GATKBAMFileSpan; -import net.sf.samtools.SAMFileSpan; - -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.NoSuchElementException; - -/** - * Divide up large file pointers containing reads into more manageable subcomponents. - * - * TODO: delete this class once the experimental downsampling engine fork collapses - */ -public class LegacyReadShardBalancer extends ShardBalancer { - /** - * Convert iterators of file pointers into balanced iterators of shards. - * @return An iterator over balanced shards. - */ - public Iterator iterator() { - return new Iterator() { - /** - * The cached shard to be returned next. Prefetched in the peekable iterator style. - */ - private Shard nextShard = null; - - /** - * The file pointer currently being processed. - */ - private FilePointer currentFilePointer; - - /** - * Ending position of the last shard in the file. - */ - private Map position = readsDataSource.getCurrentPosition(); - - { - if(filePointers.hasNext()) - currentFilePointer = filePointers.next(); - advance(); - } - - public boolean hasNext() { - return nextShard != null; - } - - public Shard next() { - if(!hasNext()) - throw new NoSuchElementException("No next read shard available"); - Shard currentShard = nextShard; - advance(); - return currentShard; - } - - public void remove() { - throw new UnsupportedOperationException("Unable to remove from shard balancing iterator"); - } - - private void advance() { - Map shardPosition; - nextShard = null; - - Map selectedReaders = new HashMap(); - while(selectedReaders.size() == 0 && currentFilePointer != null) { - shardPosition = currentFilePointer.fileSpans; - - for(SAMReaderID id: shardPosition.keySet()) { - SAMFileSpan fileSpan = new GATKBAMFileSpan(shardPosition.get(id).removeContentsBefore(position.get(id))); - selectedReaders.put(id,fileSpan); - } - - if(!isEmpty(selectedReaders)) { - Shard shard = new ReadShard(parser,readsDataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped); - readsDataSource.fillShard(shard); - - if(!shard.isBufferEmpty()) { - nextShard = shard; - break; - } - } - - selectedReaders.clear(); - currentFilePointer = filePointers.hasNext() ? filePointers.next() : null; - } - - position = readsDataSource.getCurrentPosition(); - } - - /** - * Detects whether the list of file spans contain any read data. - * @param selectedSpans Mapping of readers to file spans. - * @return True if file spans are completely empty; false otherwise. - */ - private boolean isEmpty(Map selectedSpans) { - for(SAMFileSpan fileSpan: selectedSpans.values()) { - if(!fileSpan.isEmpty()) - return false; - } - return true; - } - }; - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index c9a3b0df0..d52e55d6d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -97,13 +97,6 @@ public class SAMDataSource { */ private final Map bamIndices = new HashMap(); - /** - * How far along is each reader? - * - * TODO: delete this once the experimental downsampling engine fork collapses - */ - private final Map readerPositions = new HashMap(); - /** * The merged header. */ @@ -298,8 +291,6 @@ public class SAMDataSource { this.sortOrder = sortOrder; } - initializeReaderPositions(readers); - mergedHeader = readers.getMergedHeader(); hasReadGroupCollisions = readers.hasReadGroupCollisions(); @@ -387,17 +378,6 @@ public class SAMDataSource { return resourcePool.getReaderID(read.getFileSource().getReader()); } - /** - * Retrieves the current position within the BAM file. - * @return A mapping of reader to current position. - * - * TODO: delete this once the experimental downsampling engine fork collapses - */ - @Deprecated - public Map getCurrentPosition() { - return readerPositions; - } - /** * Gets the merged header from the SAM file. * @return The merged header. @@ -475,67 +455,6 @@ public class SAMDataSource { } } - /** - * Legacy method to fill the given buffering shard with reads. - * - * Shard.fill() is used instead of this method unless legacy downsampling is enabled - * - * TODO: delete this method once the experimental downsampling engine fork collapses - * - * @param shard Shard to fill. - */ - @Deprecated - public void fillShard(Shard shard) { - if(!shard.buffersReads()) - throw new ReviewedStingException("Attempting to fill a non-buffering shard."); - - SAMReaders readers = resourcePool.getAvailableReaders(); - // Cache the most recently viewed read so that we can check whether we've reached the end of a pair. - SAMRecord read = null; - - Map positionUpdates = new IdentityHashMap(); - - CloseableIterator iterator = getIterator(readers,shard,sortOrder == SAMFileHeader.SortOrder.coordinate); - while(!shard.isBufferFull() && iterator.hasNext()) { - final SAMRecord nextRead = iterator.next(); - if ( read == null || (nextRead.getReferenceIndex().equals(read.getReferenceIndex())) ) { - // only add reads to the shard if they are on the same contig - read = nextRead; - shard.addRead(read); - noteFilePositionUpdate(positionUpdates,read); - } else { - break; - } - } - - // If the reads are sorted in queryname order, ensure that all reads - // having the same queryname become part of the same shard. - if(sortOrder == SAMFileHeader.SortOrder.queryname) { - while(iterator.hasNext()) { - SAMRecord nextRead = iterator.next(); - if(read == null || !read.getReadName().equals(nextRead.getReadName())) - break; - shard.addRead(nextRead); - noteFilePositionUpdate(positionUpdates,nextRead); - } - } - - iterator.close(); - - // Make the updates specified by the reader. - for(Map.Entry positionUpdate: positionUpdates.entrySet()) - readerPositions.put(readers.getReaderID(positionUpdate.getKey()),positionUpdate.getValue()); - } - - /* - * TODO: delete this method once the experimental downsampling engine fork collapses - */ - @Deprecated - private void noteFilePositionUpdate(Map positionMapping, SAMRecord read) { - GATKBAMFileSpan endChunk = new GATKBAMFileSpan(read.getFileSource().getFilePointer().getContentsFollowing()); - positionMapping.put(read.getFileSource().getReader(),endChunk); - } - public StingSAMIterator seek(Shard shard) { if(shard.buffersReads()) { return shard.iterator(); @@ -559,19 +478,6 @@ public class SAMDataSource { throw new ReviewedStingException("Unable to find id for reader associated with read " + read.getReadName()); } - /** - * Initialize the current reader positions - * - * TODO: delete this once the experimental downsampling engine fork collapses - * - * @param readers - */ - @Deprecated - private void initializeReaderPositions(SAMReaders readers) { - for(SAMReaderID id: getReaderIDs()) - readerPositions.put(id,new GATKBAMFileSpan(readers.getReader(id).getFilePointerSpanningReads())); - } - /** * Get the initial reader positions across all BAM files * @@ -646,7 +552,6 @@ public class SAMDataSource { enableVerification, readProperties.useOriginalBaseQualities(), new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)), - readProperties.getDownsamplingMethod().toFraction, readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), readProperties.getSupplementalFilters(), readProperties.getReadTransformers(), @@ -704,7 +609,6 @@ public class SAMDataSource { * @param enableVerification Verify the order of reads. * @param useOriginalBaseQualities True if original base qualities should be used. * @param wrappedIterator the raw data source. - * @param downsamplingFraction whether and how much to downsample the reads themselves (not at a locus). * @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this. * @param supplementalFilters additional filters to apply to the reads. * @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality. @@ -715,7 +619,6 @@ public class SAMDataSource { boolean enableVerification, boolean useOriginalBaseQualities, StingSAMIterator wrappedIterator, - Double downsamplingFraction, Boolean noValidationOfReadOrder, Collection supplementalFilters, List readTransformers, @@ -727,30 +630,25 @@ public class SAMDataSource { // * (otherwise we will process something that we may end up throwing away) * // // ************************************************************************************************ // + // Filters: wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters)); - // If we're using the new downsampling implementation, apply downsampling iterators at this - // point in the read stream for most (but not all) cases - if ( ! readProperties.getDownsamplingMethod().useLegacyDownsampler ) { + // Downsampling: - // For locus traversals where we're downsampling to coverage by sample, assume that the downsamplers - // will be invoked downstream from us in LocusIteratorByState. This improves performance by avoiding - // splitting/re-assembly of the read stream at this stage, and also allows for partial downsampling - // of individual reads. - boolean assumeDownstreamLIBSDownsampling = isLocusBasedTraversal && - readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE && - readProperties.getDownsamplingMethod().toCoverage != null; + // For locus traversals where we're downsampling to coverage by sample, assume that the downsamplers + // will be invoked downstream from us in LocusIteratorByState. This improves performance by avoiding + // splitting/re-assembly of the read stream at this stage, and also allows for partial downsampling + // of individual reads. + boolean assumeDownstreamLIBSDownsampling = isLocusBasedTraversal && + readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE && + readProperties.getDownsamplingMethod().toCoverage != null; - if ( ! assumeDownstreamLIBSDownsampling ) { - wrappedIterator = applyDownsamplingIterator(wrappedIterator); - } + // Apply downsampling iterators here only in cases where we know that LocusIteratorByState won't be + // doing any downsampling downstream of us + if ( ! assumeDownstreamLIBSDownsampling ) { + wrappedIterator = applyDownsamplingIterator(wrappedIterator); } - // Use the old fractional downsampler only if we're using legacy downsampling: - // TODO: remove this statement (and associated classes) once the downsampling engine fork collapses - if ( readProperties.getDownsamplingMethod().useLegacyDownsampler && downsamplingFraction != null ) - wrappedIterator = new LegacyDownsampleIterator(wrappedIterator, downsamplingFraction); - // unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification, // verify the read ordering by applying a sort order iterator if (!noValidationOfReadOrder && enableVerification) diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java index bc32f9188..5aa27608d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java @@ -50,35 +50,43 @@ public class DownsamplingMethod { */ public final Double toFraction; - /** - * Use the legacy downsampling implementation instead of the newer implementation? - */ - public final boolean useLegacyDownsampler; - /** * Expresses no downsampling applied at all. */ - public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE,null,null,false); + public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE, null, null); /** * Default type to use if no type is specified */ - public static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE; + public static final DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE; /** * Default target coverage for locus-based traversals */ - public static int DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000; + public static final int DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000; - public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useLegacyDownsampler ) { + /** + * Default downsampling method for locus-based traversals + */ + public static final DownsamplingMethod DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD = + new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE, null); + + /** + * Default downsampling method for read-based traversals + */ + public static final DownsamplingMethod DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD = NONE; + + + public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction ) { this.type = type != null ? type : DEFAULT_DOWNSAMPLING_TYPE; - this.toCoverage = toCoverage; - this.toFraction = toFraction; - this.useLegacyDownsampler = useLegacyDownsampler; if ( type == DownsampleType.NONE ) { - toCoverage = null; - toFraction = null; + this.toCoverage = null; + this.toFraction = null; + } + else { + this.toCoverage = toCoverage; + this.toFraction = toFraction; } validate(); @@ -87,34 +95,28 @@ public class DownsamplingMethod { private void validate() { // Can't leave toFraction and toCoverage null unless type is NONE if ( type != DownsampleType.NONE && toFraction == null && toCoverage == null ) - throw new UserException.CommandLineException("Must specify either toFraction or toCoverage when downsampling."); + throw new UserException("Must specify either toFraction or toCoverage when downsampling."); // Fraction and coverage cannot both be specified. if ( toFraction != null && toCoverage != null ) - throw new UserException.CommandLineException("Downsampling coverage and fraction are both specified. Please choose only one."); + throw new UserException("Downsampling coverage and fraction are both specified. Please choose only one."); // toCoverage must be > 0 when specified if ( toCoverage != null && toCoverage <= 0 ) { - throw new UserException.CommandLineException("toCoverage must be > 0 when downsampling to coverage"); + throw new UserException("toCoverage must be > 0 when downsampling to coverage"); } // toFraction must be >= 0.0 and <= 1.0 when specified if ( toFraction != null && (toFraction < 0.0 || toFraction > 1.0) ) { - throw new UserException.CommandLineException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads"); + throw new UserException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads"); } } public void checkCompatibilityWithWalker( Walker walker ) { boolean isLocusTraversal = walker instanceof LocusWalker || walker instanceof ActiveRegionWalker; - if ( ! isLocusTraversal && useLegacyDownsampler && toCoverage != null ) { - throw new UserException.CommandLineException("Downsampling to coverage for read-based traversals (eg., ReadWalkers) is not supported in the legacy downsampling implementation. " + - "The newer downsampling implementation does not have this limitation."); - } - - if ( isLocusTraversal && ! useLegacyDownsampler && type == DownsampleType.ALL_READS && toCoverage != null ) { - throw new UserException.CommandLineException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not yet supported in the new downsampling implementation (though it is supported for ReadWalkers). " + - "You can run with --use_legacy_downsampler for a broken and poorly-maintained implementation of ALL_READS to-coverage downsampling, but this is not recommended."); + if ( isLocusTraversal && type == DownsampleType.ALL_READS && toCoverage != null ) { + throw new UserException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not currently supported (though it is supported for ReadWalkers)."); } } @@ -128,31 +130,22 @@ public class DownsamplingMethod { builder.append(String.format("Method: %s, ", type)); if ( toCoverage != null ) { - builder.append(String.format("Target Coverage: %d, ", toCoverage)); + builder.append(String.format("Target Coverage: %d", toCoverage)); } else { - builder.append(String.format("Target Fraction: %.2f, ", toFraction)); - } - - if ( useLegacyDownsampler ) { - builder.append("Using the legacy downsampling implementation"); - } - else { - builder.append("Using the new downsampling implementation"); + builder.append(String.format("Target Fraction: %.2f", toFraction)); } } return builder.toString(); } - public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useLegacyDownsampler ) { + public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker ) { if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) { - return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE, - null, useLegacyDownsampler); + return DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD; } else { - // Downsampling is off by default for non-locus-based traversals - return new DownsamplingMethod(DownsampleType.NONE, null, null, useLegacyDownsampler); + return DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD; } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index f587442d7..e9a2b56c2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -111,9 +111,6 @@ public class WindowMaker implements Iterable, I this.sourceInfo = shard.getReadProperties(); this.readIterator = new GATKSAMIterator(iterator); - // Use the legacy version of LocusIteratorByState if legacy downsampling was requested: - if ( sourceInfo.getDownsamplingMethod().useLegacyDownsampler ) - throw new IllegalArgumentException("legacy downsampler no longer supported in the window maker"); this.libs = new LocusIteratorByState(readIterator,sourceInfo,genomeLocParser,sampleNames); this.sourceIterator = new PeekableIterator(libs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LegacyDownsampleIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LegacyDownsampleIterator.java deleted file mode 100644 index d0992149e..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LegacyDownsampleIterator.java +++ /dev/null @@ -1,77 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.sting.gatk.iterators; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; - -import java.util.Iterator; - - -public class LegacyDownsampleIterator implements StingSAMIterator { - - StingSAMIterator it; - int cutoff; - SAMRecord next; - - public LegacyDownsampleIterator(StingSAMIterator it, double fraction) { - this.it = it; - cutoff = (int)(fraction * 10000); - next = getNextRecord(); - } - - public boolean hasNext() { - return next != null; - } - - public SAMRecord next() { - SAMRecord result = next; - next = getNextRecord(); - return result; - } - - public void remove() { - throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); - } - - private SAMRecord getNextRecord() { - while ( true ) { - if ( !it.hasNext() ) - return null; - SAMRecord rec = it.next(); - if ( GenomeAnalysisEngine.getRandomGenerator().nextInt(10000) < cutoff ) - return rec; - } - } - - public void close() { - it.close(); - } - - public Iterator iterator() { - return this; - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/LegacyReservoirDownsampler.java b/public/java/src/org/broadinstitute/sting/utils/LegacyReservoirDownsampler.java deleted file mode 100644 index 2d590dd80..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/LegacyReservoirDownsampler.java +++ /dev/null @@ -1,153 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.sting.utils; - -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; - -/** - * THIS IMPLEMENTATION IS BROKEN AND WILL BE REMOVED ONCE THE DOWNSAMPLING ENGINE FORK COLLAPSES - * - * Randomly downsample from a stream of elements. This algorithm is a direct, - * naive implementation of reservoir downsampling as described in "Random Downsampling - * with a Reservoir" (Vitter 1985). At time of writing, this paper is located here: - * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.138.784&rep=rep1&type=pdf - - * @author mhanna - * @version 0.1 - */ -public class LegacyReservoirDownsampler { - /** - * The reservoir of elements tracked by this downsampler. - */ - private final ArrayList reservoir; - - /** - * What is the maximum number of reads that can be returned in a single batch. - */ - private final int maxElements; - - /** - * Create a new downsampler with the given source iterator and given comparator. - * @param maxElements What is the maximum number of reads that can be returned in any call of this - */ - public LegacyReservoirDownsampler(final int maxElements) { - if(maxElements < 0) - throw new ReviewedStingException("Unable to work with an negative size collection of elements"); - this.reservoir = new ArrayList(maxElements); - this.maxElements = maxElements; - } - - /** - * Returns the eliminated element. - * @param element Eliminated element; null if no element has been eliminated. - * @return - */ - public T add(T element) { - if(maxElements <= 0) - return element; - else if(reservoir.size() < maxElements) { - reservoir.add(element); - return null; - } - else { - // Get a uniformly distributed int. If the chosen slot lives within the partition, replace the entry in that slot with the newest entry. - int slot = GenomeAnalysisEngine.getRandomGenerator().nextInt(maxElements); - if(slot >= 0 && slot < maxElements) { - T displaced = reservoir.get(slot); - reservoir.set(slot,element); - return displaced; - } - else - return element; - } - } - - public boolean addAll(Collection elements) { - boolean added = false; - for(T element: elements) - added |= (add(element) != null); - return added; - } - - /** - * Returns the contents of this reservoir, downsampled to the given value. Note that the return value - * @return The downsampled contents of this reservoir. - */ - public Collection getDownsampledContents() { - return reservoir; - } - - public void clear() { - reservoir.clear(); - } - - public boolean isEmpty() { - return reservoir.isEmpty(); - } - - public int size() { - return reservoir.size(); - } - - public Iterator iterator() { - return reservoir.iterator(); - } - - public boolean contains(Object o) { - return reservoir.contains(o); - } - - public boolean containsAll(Collection elements) { - return reservoir.containsAll(elements); - } - - public boolean retainAll(Collection elements) { - return reservoir.retainAll(elements); - } - - public boolean remove(Object o) { - return reservoir.remove(o); - } - - public boolean removeAll(Collection elements) { - return reservoir.removeAll(elements); - } - - public Object[] toArray() { - Object[] contents = new Object[reservoir.size()]; - reservoir.toArray(contents); - return contents; - } - - public T[] toArray(T[] array) { - return reservoir.toArray(array); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java index d960177d9..00389be97 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java @@ -86,7 +86,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark { }, PER_SAMPLE { @Override - DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci(), false); } + DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci()); } }; abstract DownsamplingMethod create(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancerUnitTest.java index 049e8d6bb..40ec42ef1 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancerUnitTest.java @@ -52,7 +52,7 @@ public class ReadShardBalancerUnitTest extends BaseTest { * Tests to ensure that ReadShardBalancer works as expected and does not place shard boundaries * at inappropriate places, such as within an alignment start position */ - private static class ExperimentalReadShardBalancerTest extends TestDataProvider { + private static class ReadShardBalancerTest extends TestDataProvider { private int numContigs; private int numStacksPerContig; private int stackSize; @@ -63,19 +63,19 @@ public class ReadShardBalancerUnitTest extends BaseTest { private SAMFileHeader header; private SAMReaderID testBAM; - public ExperimentalReadShardBalancerTest( int numContigs, - int numStacksPerContig, - int stackSize, - int numUnmappedReads, - int downsamplingTargetCoverage ) { - super(ExperimentalReadShardBalancerTest.class); + public ReadShardBalancerTest( int numContigs, + int numStacksPerContig, + int stackSize, + int numUnmappedReads, + int downsamplingTargetCoverage ) { + super(ReadShardBalancerTest.class); this.numContigs = numContigs; this.numStacksPerContig = numStacksPerContig; this.stackSize = stackSize; this.numUnmappedReads = numUnmappedReads; - this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null, false); + this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null); this.expectedReadCount = Math.min(stackSize, downsamplingTargetCoverage) * numStacksPerContig * numContigs + numUnmappedReads; setName(String.format("%s: numContigs=%d numStacksPerContig=%d stackSize=%d numUnmappedReads=%d downsamplingTargetCoverage=%d", @@ -176,8 +176,8 @@ public class ReadShardBalancerUnitTest extends BaseTest { } } - @DataProvider(name = "ExperimentalReadShardBalancerTestDataProvider") - public Object[][] createExperimentalReadShardBalancerTests() { + @DataProvider(name = "ReadShardBalancerTestDataProvider") + public Object[][] createReadShardBalancerTests() { for ( int numContigs = 1; numContigs <= 3; numContigs++ ) { for ( int numStacksPerContig : Arrays.asList(1, 2, 4) ) { // Use crucial read shard boundary values as the stack sizes @@ -185,18 +185,18 @@ public class ReadShardBalancerUnitTest extends BaseTest { for ( int numUnmappedReads : Arrays.asList(0, ReadShard.DEFAULT_MAX_READS / 2, ReadShard.DEFAULT_MAX_READS * 2) ) { // The first value will result in no downsampling at all, the others in some downsampling for ( int downsamplingTargetCoverage : Arrays.asList(ReadShard.DEFAULT_MAX_READS * 10, ReadShard.DEFAULT_MAX_READS, ReadShard.DEFAULT_MAX_READS / 2) ) { - new ExperimentalReadShardBalancerTest(numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage); + new ReadShardBalancerTest(numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage); } } } } } - return ExperimentalReadShardBalancerTest.getTests(ExperimentalReadShardBalancerTest.class); + return ReadShardBalancerTest.getTests(ReadShardBalancerTest.class); } - @Test(dataProvider = "ExperimentalReadShardBalancerTestDataProvider") - public void runExperimentalReadShardBalancerTest( ExperimentalReadShardBalancerTest test ) { + @Test(dataProvider = "ReadShardBalancerTestDataProvider") + public void runReadShardBalancerTest( ReadShardBalancerTest test ) { logger.warn("Running test: " + test); test.run(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index 4328e3047..8bc373fe8 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -31,10 +31,7 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.reads.LegacyReadShardBalancer; -import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.reads.*; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.qc.CountReads; @@ -139,7 +136,7 @@ public class TraverseReadsUnitTest extends BaseTest { @Test public void testUnmappedReadCount() { SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser); - Iterable shardStrategy = dataSource.createShardIteratorOverAllReads(new LegacyReadShardBalancer()); + Iterable shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer()); countReadWalker.initialize(); Object accumulator = countReadWalker.reduceInit(); diff --git a/public/java/test/org/broadinstitute/sting/utils/LegacyReservoirDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/LegacyReservoirDownsamplerUnitTest.java deleted file mode 100644 index bdcd4f600..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/LegacyReservoirDownsamplerUnitTest.java +++ /dev/null @@ -1,191 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.sting.utils; - -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.testng.Assert; -import org.testng.annotations.Test; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileHeader; - -import java.util.*; - -/** - * Basic tests to prove the integrity of the reservoir downsampler. - * At the moment, always run tests on SAM records as that's the task - * for which the downsampler was conceived. - * - * @author mhanna - * @version 0.1 - */ -public class LegacyReservoirDownsamplerUnitTest { - private static final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,200); - - - @Test - public void testEmptyIterator() { - LegacyReservoirDownsampler downsampler = new LegacyReservoirDownsampler(1); - Assert.assertTrue(downsampler.isEmpty(),"Downsampler is not empty but should be."); - } - - @Test - public void testOneElementWithPoolSizeOne() { - List reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - LegacyReservoirDownsampler downsampler = new LegacyReservoirDownsampler(1); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - Collection batchedReads = downsampler.getDownsampledContents(); - Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads"); - Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read"); - } - - @Test - public void testOneElementWithPoolSizeGreaterThanOne() { - List reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - LegacyReservoirDownsampler downsampler = new LegacyReservoirDownsampler(5); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - Collection batchedReads = downsampler.getDownsampledContents(); - Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads"); - Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read"); - - } - - @Test - public void testPoolFilledPartially() { - List reads = new ArrayList(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76)); - LegacyReservoirDownsampler downsampler = new LegacyReservoirDownsampler(5); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - List batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 3, "Downsampler is returning the wrong number of reads"); - - Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect"); - Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect"); - Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect"); - } - - @Test - public void testPoolFilledExactly() { - List reads = new ArrayList(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76)); - LegacyReservoirDownsampler downsampler = new LegacyReservoirDownsampler(5); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - List batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 5, "Downsampler is returning the wrong number of reads"); - Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read"); - - Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect"); - Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect"); - Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect"); - Assert.assertSame(batchedReads.get(3), reads.get(3), "Downsampler read 4 is incorrect"); - Assert.assertSame(batchedReads.get(4), reads.get(4), "Downsampler read 5 is incorrect"); - } - - @Test - public void testLargerPileWithZeroElementPool() { - List reads = new ArrayList(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76)); - LegacyReservoirDownsampler downsampler = new LegacyReservoirDownsampler(0); - downsampler.addAll(reads); - - Assert.assertTrue(downsampler.isEmpty(),"Downsampler isn't empty but should be"); - List batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 0, "Downsampler is returning the wrong number of reads"); - } - - @Test - public void testLargerPileWithSingleElementPool() { - List reads = new ArrayList(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76)); - LegacyReservoirDownsampler downsampler = new LegacyReservoirDownsampler(1); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - List batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads"); - Assert.assertTrue(reads.contains(batchedReads.get(0)),"Downsampler is returning a bad read."); - } - - @Test - public void testFillingAcrossLoci() { - List reads = new ArrayList(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - LegacyReservoirDownsampler downsampler = new LegacyReservoirDownsampler(5); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - List batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads"); - Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read."); - - reads.clear(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,2,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,2,76)); - - downsampler.clear(); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads"); - Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read."); - Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read."); - - reads.clear(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,3,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,3,76)); - - downsampler.clear(); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads"); - Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read."); - Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read."); - } - -} diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java index 4e7a783e5..eb7e61ed8 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java @@ -474,8 +474,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { final boolean downsample = downsampleTo != -1; final DownsamplingMethod downsampler = downsample - ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null, false) - : new DownsamplingMethod(DownsampleType.NONE, null, null, false); + ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null) + : new DownsamplingMethod(DownsampleType.NONE, null, null); final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(header.getSequenceDictionary(), nReadsPerLocus, nLoci); bamBuilder.createAndSetHeader(nSamples).setReadLength(readLength).setAlignmentStart(1); @@ -635,8 +635,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { final boolean downsample = downsampleTo != -1; final DownsamplingMethod downsampler = downsample - ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null, false) - : new DownsamplingMethod(DownsampleType.NONE, null, null, false); + ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null) + : new DownsamplingMethod(DownsampleType.NONE, null, null); // final List reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);