From 0b717e2e2e82f9e2a3e8c1db2c18ca9f947b27b6 Mon Sep 17 00:00:00 2001 From: Menachem Fromer Date: Mon, 10 Sep 2012 15:32:41 -0400 Subject: [PATCH] Separated out the DoC calculations from the XHMM pipeline, so that CalcDepthOfCoverage can be used for calculating joint coverage on a per-base accounting over multiple samples (e.g., family samples) --- .../IntervalOverlappingRODsFromStream.java | 143 ---- .../gatk/downsampling/DownsampleType.java | 14 - .../gatk/downsampling/DownsamplingMethod.java | 153 ----- .../FractionalDownsamplerFactory.java | 45 -- .../downsampling/LevelingDownsampler.java | 212 ------ .../PerSampleDownsamplingReadsIterator.java | 202 ------ .../downsampling/ReadsDownsamplerFactory.java | 37 - .../ReservoirDownsamplerFactory.java | 45 -- .../SimplePositionalDownsampler.java | 169 ----- .../SimplePositionalDownsamplerFactory.java | 45 -- .../iterators/LegacyDownsampleIterator.java | 52 -- .../LocusIteratorByStateExperimental.java | 649 ------------------ .../sting/gatk/iterators/ReadTransformer.java | 144 ---- .../gatk/iterators/ReadTransformersMode.java | 28 - .../sting/gatk/samples/Trio.java | 45 -- .../gatk/traversals/TraverseLociBase.java | 103 --- .../gatk/traversals/TraverseLociLinear.java | 47 -- .../gatk/traversals/TraverseLociNano.java | 205 ------ .../gatk/traversals/TraverseReadsNano.java | 234 ------- .../sting/gatk/walkers/NanoSchedulable.java | 31 - .../fasta/FastaAlternateReferenceMaker.java | 133 ---- .../walkers/fasta/FastaReferenceMaker.java | 127 ---- .../sting/utils/baq/BAQReadTransformer.java | 49 -- .../utils/baq/ReadTransformingIterator.java | 44 -- .../nanoScheduler/BlockingQueueValue.java | 82 --- .../utils/nanoScheduler/FutureValue.java | 45 -- .../utils/nanoScheduler/InputProducer.java | 62 -- .../sting/utils/nanoScheduler/MapResult.java | 36 - .../utils/nanoScheduler/NSMapFunction.java | 19 - .../nanoScheduler/NSProgressFunction.java | 12 - .../utils/nanoScheduler/NSReduceFunction.java | 18 - .../utils/nanoScheduler/NanoScheduler.java | 392 ----------- .../utils/nanoScheduler/ReducerThread.java | 65 -- .../sting/utils/recalibration/BQSRMode.java | 30 - .../recalibration/BQSRReadTransformer.java | 40 -- .../sam/ArtificialMultiSampleReadStream.java | 86 --- .../sam/ArtificialSingleSampleReadStream.java | 212 ------ ...ificialSingleSampleReadStreamAnalyzer.java | 281 -------- .../EfficiencyMonitoringThreadFactory.java | 158 ----- .../utils/threading/NamedThreadFactory.java | 26 - .../threading/ThreadEfficiencyMonitor.java | 207 ------ .../InvalidArgumentIntegrationTest.java | 41 -- .../LevelingDownsamplerUnitTest.java | 163 ----- ...mpleDownsamplingReadsIteratorUnitTest.java | 298 -------- ...ificialSingleSampleReadStreamAnalyzer.java | 126 ---- .../ReservoirDownsamplerUnitTest.java | 129 ---- .../SimplePositionalDownsamplerUnitTest.java | 330 --------- ...usIteratorByStateExperimentalUnitTest.java | 546 --------------- .../LegacyReservoirDownsamplerUnitTest.java | 166 ----- .../nanoScheduler/InputProducerUnitTest.java | 71 -- .../nanoScheduler/NanoSchedulerUnitTest.java | 182 ----- .../nanoScheduler/ReducerThreadUnitTest.java | 94 --- ...ificialSingleSampleReadStreamUnitTest.java | 161 ----- ...ciencyMonitoringThreadFactoryUnitTest.java | 184 ----- 54 files changed, 7218 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/datasources/providers/IntervalOverlappingRODsFromStream.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsampleType.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsamplerFactory.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/downsampling/ReadsDownsamplerFactory.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerFactory.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerFactory.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/iterators/LegacyDownsampleIterator.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimental.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformersMode.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/samples/Trio.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/NanoSchedulable.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/baq/BAQReadTransformer.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/baq/ReadTransformingIterator.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/nanoScheduler/BlockingQueueValue.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/nanoScheduler/FutureValue.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResult.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSMapFunction.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSProgressFunction.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSReduceFunction.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/nanoScheduler/ReducerThread.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRMode.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/sam/ArtificialMultiSampleReadStream.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStream.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStreamAnalyzer.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/threading/EfficiencyMonitoringThreadFactory.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/threading/NamedThreadFactory.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/threading/ThreadEfficiencyMonitor.java delete mode 100644 public/java/test/org/broadinstitute/sting/commandline/InvalidArgumentIntegrationTest.java delete mode 100644 public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java delete mode 100644 public/java/test/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIteratorUnitTest.java delete mode 100644 public/java/test/org/broadinstitute/sting/gatk/downsampling/PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer.java delete mode 100644 public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java delete mode 100644 public/java/test/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerUnitTest.java delete mode 100644 public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimentalUnitTest.java delete mode 100644 public/java/test/org/broadinstitute/sting/utils/LegacyReservoirDownsamplerUnitTest.java delete mode 100644 public/java/test/org/broadinstitute/sting/utils/nanoScheduler/InputProducerUnitTest.java delete mode 100644 public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java delete mode 100644 public/java/test/org/broadinstitute/sting/utils/nanoScheduler/ReducerThreadUnitTest.java delete mode 100644 public/java/test/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStreamUnitTest.java delete mode 100755 public/java/test/org/broadinstitute/sting/utils/threading/EfficiencyMonitoringThreadFactoryUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/IntervalOverlappingRODsFromStream.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/IntervalOverlappingRODsFromStream.java deleted file mode 100644 index 1e39d6836..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/IntervalOverlappingRODsFromStream.java +++ /dev/null @@ -1,143 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.providers; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import net.sf.picard.util.PeekableIterator; -import org.broadinstitute.sting.gatk.refdata.RODRecordListImpl; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.util.Collection; -import java.util.LinkedList; -import java.util.ListIterator; - -/** - * Key algorithmic helper for ReadBasedReferenceOrderedData - * - * Takes a single iterator of features, and provides a single capability that returns - * the list of RODs that overlap an interval. Allows sequential getOverlapping calls - * from intervals provided that these intervals always have increasing getStart() values. - * - */ -class IntervalOverlappingRODsFromStream { - /** - * Only held for QC purposes - */ - GenomeLoc lastQuery = null; - - private final String name; - private final LinkedList currentFeatures = new LinkedList(); - private final PeekableIterator futureFeatures; - - /** - * Create a new IntervalOverlappingRODsFromStream that reads elements from futureFeatures and - * returns RODRecordLists having name - * - * @param name - * @param futureFeatures - */ - IntervalOverlappingRODsFromStream(final String name, final PeekableIterator futureFeatures) { - if ( futureFeatures == null ) throw new IllegalArgumentException("futureFeatures cannot be null"); - - this.name = name; - this.futureFeatures = futureFeatures; - } - - /** - * Get the list of RODs overlapping loc from this stream of RODs. - * - * Sequential calls to this function must obey the rule that loc2.getStart >= loc1.getStart - * - * @param loc the interval to query - * @return a non-null RODRecordList containing the overlapping RODs, which may be empty - */ - @Ensures({"overlaps(loc, result)", - "! futureFeatures.hasNext() || futureFeatures.peek().getLocation().isPast(loc)", - "result != null"}) - public RODRecordList getOverlapping(final GenomeLoc loc) { - if ( lastQuery != null && loc.getStart() < lastQuery.getStart() ) - throw new IllegalArgumentException(String.format("BUG: query interval (%s) starts before the previous interval %s", loc, lastQuery)); - - trimCurrentFeaturesToLoc(loc); - readOverlappingFutureFeatures(loc); - return new RODRecordListImpl(name, subsetToOverlapping(loc, currentFeatures), loc); - } - - - /** - * For contract assurance. Checks that all bindings in loc overlap - * - * @param loc - * @param bindings - * @return - */ - @Requires({"loc != null", "bindings != null"}) - private boolean overlaps(final GenomeLoc loc, final RODRecordList bindings) { - for ( final GATKFeature feature : bindings ) - if ( ! feature.getLocation().overlapsP(loc) ) - return false; - return true; - } - - /** - * Subset the features in all to those that overlap with loc - * - * The current features list contains everything read that cannot be thrown away yet, but not - * everything in there necessarily overlaps with loc. Subset to just those that do overlap - * - * @param loc the location that features must overlap - * @param all the list of all features - * @return a subset of all that overlaps with loc - */ - @Requires({"loc != null", "all != null"}) - @Ensures("result.size() <= all.size()") - private Collection subsetToOverlapping(final GenomeLoc loc, final Collection all) { - final LinkedList overlapping = new LinkedList(); - for ( final GATKFeature feature : all ) - if ( feature.getLocation().overlapsP(loc) ) - overlapping.add(feature); - return overlapping; - } - - /** - * Update function. Remove all elements of currentFeatures that end before loc - * - * @param loc the location to use - */ - @Requires("loc != null") - @Ensures("currentFeatures.size() <= old(currentFeatures.size())") - private void trimCurrentFeaturesToLoc(final GenomeLoc loc) { - final ListIterator it = currentFeatures.listIterator(); - while ( it.hasNext() ) { - final GATKFeature feature = it.next(); - if ( feature.getLocation().isBefore(loc) ) - it.remove(); - } - } - - /** - * Update function: Read all elements from futureFeatures that overlap with loc - * - * Stops at the first element that starts before the end of loc, or the stream empties - * - * @param loc - */ - @Requires("loc != null") - @Ensures("currentFeatures.size() >= old(currentFeatures.size())") - private void readOverlappingFutureFeatures(final GenomeLoc loc) { - while ( futureFeatures.hasNext() ) { - final GenomeLoc nextLoc = futureFeatures.peek().getLocation(); - if ( nextLoc.isBefore(loc) ) { - futureFeatures.next(); // next rod element is before loc, throw it away and keep looking - } else if ( nextLoc.isPast(loc) ) { - break; // next element is past loc, stop looking but don't pop it - } else if ( nextLoc.overlapsP(loc) ) { - // add overlapping elements to our current features, removing from stream - for ( final GATKFeature feature : futureFeatures.next() ) { - currentFeatures.add(feature); - } - } - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsampleType.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsampleType.java deleted file mode 100644 index c3d17436a..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsampleType.java +++ /dev/null @@ -1,14 +0,0 @@ -package org.broadinstitute.sting.gatk.downsampling; - -/** - * Type of downsampling method to invoke. - * - * @author hanna - * @version 0.1 - */ - -public enum DownsampleType { - NONE, - ALL_READS, - BY_SAMPLE -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java deleted file mode 100644 index ae1d98ce0..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.exceptions.UserException; - -/** - * Describes the method for downsampling reads at a given locus. - */ - -public class DownsamplingMethod { - /** - * Type of downsampling to perform. - */ - public final DownsampleType type; - - /** - * Actual downsampling target is specified as an integer number of reads. - */ - public final Integer toCoverage; - - /** - * Actual downsampling target is specified as a fraction of total available reads. - */ - public final Double toFraction; - - /** - * Use the new experimental downsampling? - */ - public final boolean useExperimentalDownsampling; - - /** - * Expresses no downsampling applied at all. - */ - public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE,null,null,false); - - /** - * Default type to use if no type is specified - */ - public static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE; - - /** - * Default target coverage for locus-based traversals - */ - public static int DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000; - - public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useExperimentalDownsampling ) { - this.type = type != null ? type : DEFAULT_DOWNSAMPLING_TYPE; - this.toCoverage = toCoverage; - this.toFraction = toFraction; - this.useExperimentalDownsampling = useExperimentalDownsampling; - - if ( type == DownsampleType.NONE ) { - toCoverage = null; - toFraction = null; - } - - validate(); - } - - private void validate() { - // Can't leave toFraction and toCoverage null unless type is NONE - if ( type != DownsampleType.NONE && toFraction == null && toCoverage == null ) - throw new UserException.CommandLineException("Must specify either toFraction or toCoverage when downsampling."); - - // Fraction and coverage cannot both be specified. - if ( toFraction != null && toCoverage != null ) - throw new UserException.CommandLineException("Downsampling coverage and fraction are both specified. Please choose only one."); - - // toCoverage must be > 0 when specified - if ( toCoverage != null && toCoverage <= 0 ) { - throw new UserException.CommandLineException("toCoverage must be > 0 when downsampling to coverage"); - } - - // toFraction must be >= 0.0 and <= 1.0 when specified - if ( toFraction != null && (toFraction < 0.0 || toFraction > 1.0) ) { - throw new UserException.CommandLineException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads"); - } - - // Some restrictions only exist for the old downsampling implementation: - if ( ! useExperimentalDownsampling ) { - // By sample downsampling does not work with a fraction of reads in the old downsampling implementation - if( type == DownsampleType.BY_SAMPLE && toFraction != null ) - throw new UserException.CommandLineException("Cannot downsample to fraction with the BY_SAMPLE method"); - } - - // Some restrictions only exist for the new downsampling implementation: - if ( useExperimentalDownsampling ) { - if ( type == DownsampleType.ALL_READS && toCoverage != null ) { - throw new UserException.CommandLineException("Cannot downsample to coverage with the ALL_READS method in the experimental downsampling implementation"); - } - } - } - - public String toString() { - StringBuilder builder = new StringBuilder("Downsampling Settings: "); - - if ( type == DownsampleType.NONE ) { - builder.append("No downsampling"); - } - else { - builder.append(String.format("Method: %s ", type)); - - if ( toCoverage != null ) { - builder.append(String.format("Target Coverage: %d ", toCoverage)); - } - else { - builder.append(String.format("Target Fraction: %.2f ", toFraction)); - } - - if ( useExperimentalDownsampling ) { - builder.append("Using Experimental Downsampling"); - } - } - - return builder.toString(); - } - - public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useExperimentalDownsampling ) { - if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) { - return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE, - null, useExperimentalDownsampling); - } - else { - return new DownsamplingMethod(DownsampleType.NONE, null, null, useExperimentalDownsampling); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsamplerFactory.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsamplerFactory.java deleted file mode 100644 index 7a7c9e91e..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsamplerFactory.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import net.sf.samtools.SAMRecord; - -/** - * Factory for creating FractionalDownsamplers on demand - * - * @author David Roazen - */ -public class FractionalDownsamplerFactory implements ReadsDownsamplerFactory { - - private double fraction; - - public FractionalDownsamplerFactory( double fraction ) { - this.fraction = fraction; - } - - public ReadsDownsampler newInstance() { - return new FractionalDownsampler(fraction); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java deleted file mode 100644 index 73d69140d..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import org.broadinstitute.sting.utils.MathUtils; - -import java.util.*; - -/** - * Leveling Downsampler: Given a set of Lists of arbitrary items and a target size, removes items from - * the Lists in an even fashion until the total size of all Lists is <= the target size. Leveling - * does not occur until all Lists have been submitted and signalEndOfInput() is called. - * - * The Lists should be LinkedLists for maximum efficiency during item removal, however other - * kinds of Lists are also accepted (albeit at a slight performance penalty). - * - * Since this downsampler extends the Downsampler interface rather than the ReadsDownsampler interface, - * the Lists need not contain reads. However this downsampler may not be wrapped within one of the - * DownsamplingReadsIterators - * - * @param the List type representing the stacks to be leveled - * @param the type of the elements of each List - * - * @author David Roazen - */ -public class LevelingDownsampler, E> implements Downsampler { - - private int targetSize; - - private List groups; - - private boolean groupsAreFinalized; - - private int numDiscardedItems; - - /** - * Construct a LevelingDownsampler - * - * @param targetSize the sum of the sizes of all individual Lists this downsampler is fed may not exceed - * this value -- if it does, items are removed from Lists evenly until the total size - * is <= this value - */ - public LevelingDownsampler( int targetSize ) { - this.targetSize = targetSize; - clear(); - reset(); - } - - public void submit( T item ) { - groups.add(item); - } - - public void submit( Collection items ){ - groups.addAll(items); - } - - public boolean hasFinalizedItems() { - return groupsAreFinalized && groups.size() > 0; - } - - public List consumeFinalizedItems() { - if ( ! hasFinalizedItems() ) { - return new ArrayList(); - } - - // pass by reference rather than make a copy, for speed - List toReturn = groups; - clear(); - return toReturn; - } - - public boolean hasPendingItems() { - return ! groupsAreFinalized && groups.size() > 0; - } - - public T peekFinalized() { - return hasFinalizedItems() ? groups.get(0) : null; - } - - public T peekPending() { - return hasPendingItems() ? groups.get(0) : null; - } - - public int getNumberOfDiscardedItems() { - return numDiscardedItems; - } - - public void signalEndOfInput() { - levelGroups(); - groupsAreFinalized = true; - } - - public void clear() { - groups = new ArrayList(); - groupsAreFinalized = false; - } - - public void reset() { - numDiscardedItems = 0; - } - - private void levelGroups() { - int totalSize = 0; - int[] groupSizes = new int[groups.size()]; - int currentGroupIndex = 0; - - for ( T group : groups ) { - groupSizes[currentGroupIndex] = group.size(); - totalSize += groupSizes[currentGroupIndex]; - currentGroupIndex++; - } - - if ( totalSize <= targetSize ) { - return; // no need to eliminate any items - } - - // We will try to remove exactly this many items, however we will refuse to allow any - // one group to fall below size 1, and so might end up removing fewer items than this - int numItemsToRemove = totalSize - targetSize; - - currentGroupIndex = 0; - int numConsecutiveUmodifiableGroups = 0; - - // Continue until we've either removed all the items we wanted to, or we can't - // remove any more items without violating the constraint that all groups must - // be left with at least one item - while ( numItemsToRemove > 0 && numConsecutiveUmodifiableGroups < groupSizes.length ) { - if ( groupSizes[currentGroupIndex] > 1 ) { - groupSizes[currentGroupIndex]--; - numItemsToRemove--; - numConsecutiveUmodifiableGroups = 0; - } - else { - numConsecutiveUmodifiableGroups++; - } - - currentGroupIndex = (currentGroupIndex + 1) % groupSizes.length; - } - - // Now we actually go through and reduce each group to its new count as specified in groupSizes - currentGroupIndex = 0; - for ( T group : groups ) { - downsampleOneGroup(group, groupSizes[currentGroupIndex]); - currentGroupIndex++; - } - } - - private void downsampleOneGroup( T group, int numItemsToKeep ) { - if ( numItemsToKeep >= group.size() ) { - return; - } - - numDiscardedItems += group.size() - numItemsToKeep; - - BitSet itemsToKeep = new BitSet(group.size()); - for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(group.size(), numItemsToKeep) ) { - itemsToKeep.set(selectedIndex); - } - - int currentIndex = 0; - - // If our group is a linked list, we can remove the desired items in a single O(n) pass with an iterator - if ( group instanceof LinkedList ) { - Iterator iter = group.iterator(); - while ( iter.hasNext() ) { - iter.next(); - - if ( ! itemsToKeep.get(currentIndex) ) { - iter.remove(); - } - - currentIndex++; - } - } - // If it's not a linked list, it's more efficient to copy the desired items into a new list and back rather - // than suffer O(n^2) of item shifting - else { - List keptItems = new ArrayList(numItemsToKeep); - - for ( E item : group ) { - if ( itemsToKeep.get(currentIndex) ) { - keptItems.add(item); - } - currentIndex++; - } - group.clear(); - group.addAll(keptItems); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java deleted file mode 100644 index 8b2034460..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMRecordComparator; -import net.sf.samtools.SAMRecordCoordinateComparator; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; - -import java.util.*; - - -/** - * StingSAMIterator wrapper around our generic reads downsampler interface - * that downsamples reads for each sample independently, and then re-assembles - * the reads back into a single merged stream. - * - * @author David Roazen - */ -public class PerSampleDownsamplingReadsIterator implements StingSAMIterator { - - private StingSAMIterator nestedSAMIterator; - private ReadsDownsamplerFactory downsamplerFactory; - private Map> perSampleDownsamplers; - private PriorityQueue orderedDownsampledReadsCache; - private SAMRecord nextRead = null; - private SAMRecordComparator readComparator = new SAMRecordCoordinateComparator(); - private SAMRecord earliestPendingRead = null; - private ReadsDownsampler earliestPendingDownsampler = null; - - // Initial size of our cache of finalized reads - private static final int DOWNSAMPLED_READS_INITIAL_CACHE_SIZE = 4096; - - // The number of positional changes that can occur in the read stream before all downsamplers - // should be informed of the current position (guards against samples with relatively sparse reads - // getting stuck in a pending state): - private static final int DOWNSAMPLER_POSITIONAL_UPDATE_INTERVAL = 3; // TODO: experiment with this value - - /** - * @param iter wrapped iterator from which this iterator will pull reads - * @param downsamplerFactory factory used to create new downsamplers as needed - */ - public PerSampleDownsamplingReadsIterator( StingSAMIterator iter, ReadsDownsamplerFactory downsamplerFactory ) { - nestedSAMIterator = iter; - this.downsamplerFactory = downsamplerFactory; - perSampleDownsamplers = new HashMap>(); - orderedDownsampledReadsCache = new PriorityQueue(DOWNSAMPLED_READS_INITIAL_CACHE_SIZE, readComparator); - - advanceToNextRead(); - } - - public boolean hasNext() { - return nextRead != null; - } - - public SAMRecord next() { - if ( nextRead == null ) { - throw new NoSuchElementException("next() called when there are no more items"); - } - - SAMRecord toReturn = nextRead; - advanceToNextRead(); - - return toReturn; - } - - private void advanceToNextRead() { - if ( ! readyToReleaseReads() && ! fillDownsampledReadsCache() ) { - nextRead = null; - } - else { - nextRead = orderedDownsampledReadsCache.poll(); - } - } - - private boolean readyToReleaseReads() { - if ( orderedDownsampledReadsCache.isEmpty() ) { - return false; - } - - return earliestPendingRead == null || - readComparator.compare(orderedDownsampledReadsCache.peek(), earliestPendingRead) <= 0; - } - - private void updateEarliestPendingRead( ReadsDownsampler currentDownsampler ) { - // If there is no recorded earliest pending read and this downsampler has pending items, - // then this downsampler's first pending item becomes the new earliest pending read: - if ( earliestPendingRead == null && currentDownsampler.hasPendingItems() ) { - earliestPendingRead = currentDownsampler.peekPending(); - earliestPendingDownsampler = currentDownsampler; - } - // In all other cases, we only need to update the earliest pending read when the downsampler - // associated with it experiences a change in its pending reads, since by assuming a sorted - // read stream we're assured that each downsampler's earliest pending read will only increase - // in genomic position over time. - // - // TODO: An occasional O(samples) linear search seems like a better option than keeping the downsamplers - // TODO: sorted by earliest pending read, which would cost at least O(total_reads * (samples + log(samples))), - // TODO: but need to verify this empirically. - else if ( currentDownsampler == earliestPendingDownsampler && - (! currentDownsampler.hasPendingItems() || readComparator.compare(currentDownsampler.peekPending(), earliestPendingRead) != 0) ) { - - earliestPendingRead = null; - earliestPendingDownsampler = null; - for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) { - if ( perSampleDownsampler.hasPendingItems() && - (earliestPendingRead == null || readComparator.compare(perSampleDownsampler.peekPending(), earliestPendingRead) < 0) ) { - - earliestPendingRead = perSampleDownsampler.peekPending(); - earliestPendingDownsampler = perSampleDownsampler; - } - } - } - } - - private boolean fillDownsampledReadsCache() { - SAMRecord prevRead = null; - int numPositionalChanges = 0; - - // Continue submitting reads to the per-sample downsamplers until the read at the top of the priority queue - // can be released without violating global sort order - while ( nestedSAMIterator.hasNext() && ! readyToReleaseReads() ) { - SAMRecord read = nestedSAMIterator.next(); - String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; - - ReadsDownsampler thisSampleDownsampler = perSampleDownsamplers.get(sampleName); - if ( thisSampleDownsampler == null ) { - thisSampleDownsampler = downsamplerFactory.newInstance(); - perSampleDownsamplers.put(sampleName, thisSampleDownsampler); - } - - thisSampleDownsampler.submit(read); - updateEarliestPendingRead(thisSampleDownsampler); - - if ( prevRead != null && prevRead.getAlignmentStart() != read.getAlignmentStart() ) { - numPositionalChanges++; - } - - // If the number of times we've changed position exceeds a certain threshold, inform all - // downsamplers of the current position in the read stream. This is to prevent downsamplers - // for samples with sparser reads than others from getting stuck too long in a pending state. - if ( numPositionalChanges > DOWNSAMPLER_POSITIONAL_UPDATE_INTERVAL ) { - for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) { - perSampleDownsampler.signalNoMoreReadsBefore(read); - updateEarliestPendingRead(perSampleDownsampler); - } - } - - prevRead = read; - } - - if ( ! nestedSAMIterator.hasNext() ) { - for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) { - perSampleDownsampler.signalEndOfInput(); - } - earliestPendingRead = null; - earliestPendingDownsampler = null; - } - - for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) { - if ( perSampleDownsampler.hasFinalizedItems() ) { - orderedDownsampledReadsCache.addAll(perSampleDownsampler.consumeFinalizedItems()); - } - } - - return readyToReleaseReads(); - } - - public void remove() { - throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); - } - - public void close() { - nestedSAMIterator.close(); - } - - public Iterator iterator() { - return this; - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReadsDownsamplerFactory.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReadsDownsamplerFactory.java deleted file mode 100644 index 2fa32497b..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReadsDownsamplerFactory.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import net.sf.samtools.SAMRecord; - -/** - * A ReadsDownsamplerFactory can be used to create an arbitrary number of instances of a particular - * downsampler, all sharing the same construction parameters. - * - * @author David Roazen - */ -public interface ReadsDownsamplerFactory { - public ReadsDownsampler newInstance(); -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerFactory.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerFactory.java deleted file mode 100644 index 040f0c788..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerFactory.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import net.sf.samtools.SAMRecord; - -/** - * Factory for creating ReservoirDownsamplers on demand - * - * @author David Roazen - */ -public class ReservoirDownsamplerFactory implements ReadsDownsamplerFactory { - - private int targetSampleSize; - - public ReservoirDownsamplerFactory( int targetSampleSize ) { - this.targetSampleSize = targetSampleSize; - } - - public ReadsDownsampler newInstance() { - return new ReservoirDownsampler(targetSampleSize); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java deleted file mode 100644 index 30affc2b3..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import net.sf.samtools.SAMRecord; - -import java.util.*; - -/** - * Simple Positional Downsampler: Downsample each stack of reads at each alignment start to a size <= a target coverage - * using a Reservoir downsampler. Stores only O(target coverage) reads in memory at any given time. - * - * @author David Roazen - */ -public class SimplePositionalDownsampler implements ReadsDownsampler { - - private int targetCoverage; - - private ReservoirDownsampler reservoir; - - private int currentContigIndex; - - private int currentAlignmentStart; - - private boolean positionEstablished; - - private boolean unmappedReadsReached; - - private ArrayList finalizedReads; - - private int numDiscardedItems; - - /** - * Construct a SimplePositionalDownsampler - * - * @param targetCoverage Maximum number of reads that may share any given alignment start position - */ - public SimplePositionalDownsampler( int targetCoverage ) { - this.targetCoverage = targetCoverage; - reservoir = new ReservoirDownsampler(targetCoverage); - finalizedReads = new ArrayList(); - clear(); - reset(); - } - - public void submit( T newRead ) { - updatePositionalState(newRead); - - if ( unmappedReadsReached ) { // don't downsample the unmapped reads at the end of the stream - finalizedReads.add(newRead); - } - else { - int reservoirPreviouslyDiscardedItems = reservoir.getNumberOfDiscardedItems(); - reservoir.submit(newRead); - numDiscardedItems += reservoir.getNumberOfDiscardedItems() - reservoirPreviouslyDiscardedItems; - } - } - - public void submit( Collection newReads ) { - for ( T read : newReads ) { - submit(read); - } - } - - public boolean hasFinalizedItems() { - return finalizedReads.size() > 0; - } - - public List consumeFinalizedItems() { - // pass by reference rather than make a copy, for speed - List toReturn = finalizedReads; - finalizedReads = new ArrayList(); - return toReturn; - } - - public boolean hasPendingItems() { - return reservoir.hasFinalizedItems(); - } - - public T peekFinalized() { - return finalizedReads.isEmpty() ? null : finalizedReads.get(0); - } - - public T peekPending() { - return reservoir.peekFinalized(); - } - - public int getNumberOfDiscardedItems() { - return numDiscardedItems; - } - - public void signalEndOfInput() { - finalizeReservoir(); - } - - public void clear() { - reservoir.clear(); - reservoir.reset(); - finalizedReads.clear(); - positionEstablished = false; - unmappedReadsReached = false; - } - - public void reset() { - numDiscardedItems = 0; - } - - public boolean requiresCoordinateSortOrder() { - return true; - } - - public void signalNoMoreReadsBefore( T read ) { - updatePositionalState(read); - } - - private void updatePositionalState( T newRead ) { - if ( readIsPastCurrentPosition(newRead) ) { - if ( reservoir.hasFinalizedItems() ) { - finalizeReservoir(); - } - - setCurrentPosition(newRead); - - if ( newRead.getReadUnmappedFlag() ) { - unmappedReadsReached = true; - } - } - } - - private void setCurrentPosition( T read ) { - currentContigIndex = read.getReferenceIndex(); - currentAlignmentStart = read.getAlignmentStart(); - positionEstablished = true; - } - - private boolean readIsPastCurrentPosition( T read ) { - return ! positionEstablished || - read.getReferenceIndex() > currentContigIndex || - read.getAlignmentStart() > currentAlignmentStart || - (read.getReadUnmappedFlag() && ! unmappedReadsReached); - } - - private void finalizeReservoir() { - finalizedReads.addAll(reservoir.consumeFinalizedItems()); - reservoir.reset(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerFactory.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerFactory.java deleted file mode 100644 index fcc18b16b..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerFactory.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import net.sf.samtools.SAMRecord; - -/** - * Factory for creating SimplePositionalDownsamplers on demand - * - * @author David Roazen - */ -public class SimplePositionalDownsamplerFactory implements ReadsDownsamplerFactory { - - private int targetCoverage; - - public SimplePositionalDownsamplerFactory( int targetCoverage ) { - this.targetCoverage = targetCoverage; - } - - public ReadsDownsampler newInstance() { - return new SimplePositionalDownsampler(targetCoverage); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LegacyDownsampleIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LegacyDownsampleIterator.java deleted file mode 100755 index c0de06b49..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LegacyDownsampleIterator.java +++ /dev/null @@ -1,52 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; - -import java.util.Iterator; - - -public class LegacyDownsampleIterator implements StingSAMIterator { - - StingSAMIterator it; - int cutoff; - SAMRecord next; - - public LegacyDownsampleIterator(StingSAMIterator it, double fraction) { - this.it = it; - cutoff = (int)(fraction * 10000); - next = getNextRecord(); - } - - public boolean hasNext() { - return next != null; - } - - public SAMRecord next() { - SAMRecord result = next; - next = getNextRecord(); - return result; - } - - public void remove() { - throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); - } - - private SAMRecord getNextRecord() { - while ( true ) { - if ( !it.hasNext() ) - return null; - SAMRecord rec = it.next(); - if ( GenomeAnalysisEngine.getRandomGenerator().nextInt(10000) < cutoff ) - return rec; - } - } - - public void close() { - it.close(); - } - - public Iterator iterator() { - return this; - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimental.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimental.java deleted file mode 100755 index 557cbd009..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimental.java +++ /dev/null @@ -1,649 +0,0 @@ -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.iterators; - -import net.sf.picard.util.PeekableIterator; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMRecord; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.downsampling.DownsampleType; -import org.broadinstitute.sting.gatk.downsampling.Downsampler; -import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; - -import java.util.*; - -/** - * Iterator that traverses a SAM File, accumulating information on a per-locus basis - */ -public class LocusIteratorByStateExperimental extends LocusIterator { - /** - * our log, which we want to capture anything from this class - */ - private static Logger logger = Logger.getLogger(LocusIteratorByState.class); - - // ----------------------------------------------------------------------------------------------------------------- - // - // member fields - // - // ----------------------------------------------------------------------------------------------------------------- - - /** - * Used to create new GenomeLocs. - */ - private final GenomeLocParser genomeLocParser; - private final ArrayList samples; - private final ReadStateManager readStates; - - protected static class SAMRecordState { - SAMRecord read; - int readOffset = -1; // how far are we offset from the start of the read bases? - int genomeOffset = -1; // how far are we offset from the alignment start on the genome? - - Cigar cigar = null; - int cigarOffset = -1; - CigarElement curElement = null; - int nCigarElements = 0; - - int cigarElementCounter = -1; // how far are we into a single cigarElement - - // The logical model for generating extended events is as follows: the "record state" implements the traversal - // along the reference; thus stepForwardOnGenome() returns on every and only on actual reference bases. This - // can be a (mis)match or a deletion (in the latter case, we still return on every individual reference base the - // deletion spans). In the extended events mode, the record state also remembers if there was an insertion, or - // if the deletion just started *right before* the current reference base the record state is pointing to upon the return from - // stepForwardOnGenome(). The next call to stepForwardOnGenome() will clear that memory (as we remember only extended - // events immediately preceding the current reference base). - - public SAMRecordState(SAMRecord read) { - this.read = read; - cigar = read.getCigar(); - nCigarElements = cigar.numCigarElements(); - - //System.out.printf("Creating a SAMRecordState: %s%n", this); - } - - public SAMRecord getRead() { - return read; - } - - /** - * What is our current offset in the read's bases that aligns us with the reference genome? - * - * @return - */ - public int getReadOffset() { - return readOffset; - } - - /** - * What is the current offset w.r.t. the alignment state that aligns us to the readOffset? - * - * @return - */ - public int getGenomeOffset() { - return genomeOffset; - } - - public int getGenomePosition() { - return read.getAlignmentStart() + getGenomeOffset(); - } - - public GenomeLoc getLocation(GenomeLocParser genomeLocParser) { - return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition()); - } - - public CigarOperator getCurrentCigarOperator() { - return curElement.getOperator(); - } - - public String toString() { - return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement); - } - - public CigarElement peekForwardOnGenome() { - return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement ); - } - - public CigarElement peekBackwardOnGenome() { - return ( cigarElementCounter - 1 == 0 && cigarOffset - 1 > 0 ? cigar.getCigarElement(cigarOffset - 1) : curElement ); - } - - - public CigarOperator stepForwardOnGenome() { - // we enter this method with readOffset = index of the last processed base on the read - // (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion - - - if (curElement == null || ++cigarElementCounter > curElement.getLength()) { - cigarOffset++; - if (cigarOffset < nCigarElements) { - curElement = cigar.getCigarElement(cigarOffset); - cigarElementCounter = 0; - // next line: guards against cigar elements of length 0; when new cigar element is retrieved, - // we reenter in order to re-check cigarElementCounter against curElement's length - return stepForwardOnGenome(); - } else { - if (curElement != null && curElement.getOperator() == CigarOperator.D) - throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar"); - - // Reads that contain indels model the genomeOffset as the following base in the reference. Because - // we fall into this else block only when indels end the read, increment genomeOffset such that the - // current offset of this read is the next ref base after the end of the indel. This position will - // model a point on the reference somewhere after the end of the read. - genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here: - // we do step forward on the ref, and by returning null we also indicate that we are past the read end. - - return null; - } - } - - boolean done = false; - switch (curElement.getOperator()) { - case H: // ignore hard clips - case P: // ignore pads - cigarElementCounter = curElement.getLength(); - break; - case I: // insertion w.r.t. the reference - case S: // soft clip - cigarElementCounter = curElement.getLength(); - readOffset += curElement.getLength(); - break; - case D: // deletion w.r.t. the reference - if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string - throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar"); - // should be the same as N case - genomeOffset++; - done = true; - break; - case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning) - genomeOffset++; - done = true; - break; - case M: - case EQ: - case X: - readOffset++; - genomeOffset++; - done = true; - break; - default: - throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator()); - } - - return done ? curElement.getOperator() : stepForwardOnGenome(); - } - } - - //final boolean DEBUG = false; - //final boolean DEBUG2 = false && DEBUG; - private ReadProperties readInfo; - private AlignmentContext nextAlignmentContext; - private boolean performLevelingDownsampling; - - // ----------------------------------------------------------------------------------------------------------------- - // - // constructors and other basic operations - // - // ----------------------------------------------------------------------------------------------------------------- - - public LocusIteratorByStateExperimental(final Iterator samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection samples) { - this.readInfo = readInformation; - this.genomeLocParser = genomeLocParser; - this.samples = new ArrayList(samples); - this.readStates = new ReadStateManager(samIterator); - - this.performLevelingDownsampling = readInfo.getDownsamplingMethod() != null && - readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE && - readInfo.getDownsamplingMethod().toCoverage != null; - - // currently the GATK expects this LocusIteratorByState to accept empty sample lists, when - // there's no read data. So we need to throw this error only when samIterator.hasNext() is true - if (this.samples.isEmpty() && samIterator.hasNext()) { - throw new IllegalArgumentException("samples list must not be empty"); - } - } - - /** - * For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list - * for the system. - */ - public final static Collection sampleListForSAMWithoutReadGroups() { - List samples = new ArrayList(); - samples.add(null); - return samples; - } - - public Iterator iterator() { - return this; - } - - public void close() { - //this.it.close(); - } - - public boolean hasNext() { - lazyLoadNextAlignmentContext(); - return (nextAlignmentContext != null); - //if ( DEBUG ) System.out.printf("hasNext() = %b%n", r); - } - - private GenomeLoc getLocation() { - return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser); - } - - // ----------------------------------------------------------------------------------------------------------------- - // - // next() routine and associated collection operations - // - // ----------------------------------------------------------------------------------------------------------------- - public AlignmentContext next() { - lazyLoadNextAlignmentContext(); - if (!hasNext()) - throw new NoSuchElementException("LocusIteratorByState: out of elements."); - AlignmentContext currentAlignmentContext = nextAlignmentContext; - nextAlignmentContext = null; - return currentAlignmentContext; - } - - /** - * Creates the next alignment context from the given state. Note that this is implemented as a lazy load method. - * nextAlignmentContext MUST BE null in order for this method to advance to the next entry. - */ - private void lazyLoadNextAlignmentContext() { - while (nextAlignmentContext == null && readStates.hasNext()) { - readStates.collectPendingReads(); - - final GenomeLoc location = getLocation(); - final Map fullPileup = new HashMap(); - - // TODO: How can you determine here whether the current pileup has been downsampled? - boolean hasBeenSampled = false; - - for (final String sample : samples) { - final Iterator iterator = readStates.iterator(sample); - final List pile = new ArrayList(readStates.size(sample)); - - int size = 0; // number of elements in this sample's pileup - int nDeletions = 0; // number of deletions in this sample's pileup - int nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0) - - while (iterator.hasNext()) { - final SAMRecordState state = iterator.next(); // state object with the read/offset information - final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read - final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator - final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element - final CigarElement lastElement = state.peekBackwardOnGenome(); // last cigar element - final boolean isSingleElementCigar = nextElement == lastElement; - final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator - final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator - int readOffset = state.getReadOffset(); // the base offset on this read - - final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION; - final boolean isAfterDeletion = lastOp == CigarOperator.DELETION; - final boolean isBeforeInsertion = nextOp == CigarOperator.INSERTION; - final boolean isAfterInsertion = lastOp == CigarOperator.INSERTION && !isSingleElementCigar; - final boolean isNextToSoftClip = nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart()); - - int nextElementLength = nextElement.getLength(); - - if (op == CigarOperator.N) // N's are never added to any pileup - continue; - - if (op == CigarOperator.D) { - // TODO -- LIBS is totally busted for deletions so that reads with Ds right before Is in their CIGAR are broken; must fix - if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so - pile.add(new PileupElement(read, readOffset, true, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, nextOp == CigarOperator.D ? nextElementLength : -1)); - size++; - nDeletions++; - if (read.getMappingQuality() == 0) - nMQ0Reads++; - } - } - else { - if (!filterBaseInRead(read, location.getStart())) { - String insertedBaseString = null; - if (nextOp == CigarOperator.I) { - final int insertionOffset = isSingleElementCigar ? 0 : 1; - // TODO -- someone please implement a better fix for the single element insertion CIGAR! - if (isSingleElementCigar) - readOffset -= (nextElement.getLength() - 1); // LIBS has passed over the insertion bases! - insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + insertionOffset, readOffset + insertionOffset + nextElement.getLength())); - } - - pile.add(new PileupElement(read, readOffset, false, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, insertedBaseString, nextElementLength)); - size++; - if (read.getMappingQuality() == 0) - nMQ0Reads++; - } - } - } - - if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup - fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads)); - } - - updateReadStates(); // critical - must be called after we get the current state offsets and location - if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done - nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled); - } - } - - // fast testing of position - private boolean readIsPastCurrentPosition(SAMRecord read) { - if (readStates.isEmpty()) - return false; - else { - SAMRecordState state = readStates.getFirst(); - SAMRecord ourRead = state.getRead(); - return read.getReferenceIndex() > ourRead.getReferenceIndex() || read.getAlignmentStart() > state.getGenomePosition(); - } - } - - /** - * Generic place to put per-base filters appropriate to LocusIteratorByState - * - * @param rec - * @param pos - * @return - */ - private static boolean filterBaseInRead(GATKSAMRecord rec, long pos) { - return ReadUtils.isBaseInsideAdaptor(rec, pos); - } - - private void updateReadStates() { - for (final String sample : samples) { - Iterator it = readStates.iterator(sample); - while (it.hasNext()) { - SAMRecordState state = it.next(); - CigarOperator op = state.stepForwardOnGenome(); - if (op == null) { - // we discard the read only when we are past its end AND indel at the end of the read (if any) was - // already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe - // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag. - it.remove(); // we've stepped off the end of the object - } - } - } - } - - public void remove() { - throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); - } - - protected class ReadStateManager { - private final PeekableIterator iterator; - private final SamplePartitioner samplePartitioner; - private final Map readStatesBySample = new HashMap(); - private int totalReadStates = 0; - - public ReadStateManager(Iterator source) { - this.iterator = new PeekableIterator(source); - - for (final String sample : samples) { - readStatesBySample.put(sample, new PerSampleReadStateManager()); - } - - samplePartitioner = new SamplePartitioner(); - } - - /** - * Returns a iterator over all the reads associated with the given sample. Note that remove() is implemented - * for this iterator; if present, total read states will be decremented. - * - * @param sample The sample. - * @return Iterator over the reads associated with that sample. - */ - public Iterator iterator(final String sample) { - return new Iterator() { - private Iterator wrappedIterator = readStatesBySample.get(sample).iterator(); - - public boolean hasNext() { - return wrappedIterator.hasNext(); - } - - public SAMRecordState next() { - return wrappedIterator.next(); - } - - public void remove() { - wrappedIterator.remove(); - } - }; - } - - public boolean isEmpty() { - return totalReadStates == 0; - } - - /** - * Retrieves the total number of reads in the manager across all samples. - * - * @return Total number of reads over all samples. - */ - public int size() { - return totalReadStates; - } - - /** - * Retrieves the total number of reads in the manager in the given sample. - * - * @param sample The sample. - * @return Total number of reads in the given sample. - */ - public int size(final String sample) { - return readStatesBySample.get(sample).size(); - } - - public SAMRecordState getFirst() { - for (final String sample : samples) { - PerSampleReadStateManager reads = readStatesBySample.get(sample); - if (!reads.isEmpty()) - return reads.peek(); - } - return null; - } - - public boolean hasNext() { - return totalReadStates > 0 || iterator.hasNext(); - } - - public void collectPendingReads() { - if (!iterator.hasNext()) - return; - - if (readStates.size() == 0) { - int firstContigIndex = iterator.peek().getReferenceIndex(); - int firstAlignmentStart = iterator.peek().getAlignmentStart(); - while (iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) { - samplePartitioner.submitRead(iterator.next()); - } - } else { - // Fast fail in the case that the read is past the current position. - if (readIsPastCurrentPosition(iterator.peek())) - return; - - while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) { - samplePartitioner.submitRead(iterator.next()); - } - } - - for (final String sample : samples) { - Collection newReads = samplePartitioner.getReadsForSample(sample); - PerSampleReadStateManager statesBySample = readStatesBySample.get(sample); - addReadsToSample(statesBySample, newReads); - } - - samplePartitioner.reset(); - } - - /** - * Add reads with the given sample name to the given hanger entry. - * - * @param readStates The list of read states to add this collection of reads. - * @param reads Reads to add. Selected reads will be pulled from this source. - */ - private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection reads) { - if (reads.isEmpty()) - return; - - Collection newReadStates = new LinkedList(); - - for (SAMRecord read : reads) { - SAMRecordState state = new SAMRecordState(read); - state.stepForwardOnGenome(); - newReadStates.add(state); - } - - readStates.addStatesAtNextAlignmentStart(newReadStates); - } - - protected class PerSampleReadStateManager implements Iterable { - private List> readStatesByAlignmentStart = new LinkedList>(); - private int thisSampleReadStates = 0; - private Downsampler> levelingDownsampler = - performLevelingDownsampling ? - new LevelingDownsampler, SAMRecordState>(readInfo.getDownsamplingMethod().toCoverage) : - null; - - public void addStatesAtNextAlignmentStart(Collection states) { - if ( states.isEmpty() ) { - return; - } - - readStatesByAlignmentStart.add(new LinkedList(states)); - thisSampleReadStates += states.size(); - totalReadStates += states.size(); - - if ( levelingDownsampler != null ) { - levelingDownsampler.submit(readStatesByAlignmentStart); - levelingDownsampler.signalEndOfInput(); - - thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems(); - totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems(); - - // use returned List directly rather than make a copy, for efficiency's sake - readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems(); - levelingDownsampler.reset(); - } - } - - public boolean isEmpty() { - return readStatesByAlignmentStart.isEmpty(); - } - - public SAMRecordState peek() { - return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek(); - } - - public int size() { - return thisSampleReadStates; - } - - public Iterator iterator() { - return new Iterator() { - private Iterator> alignmentStartIterator = readStatesByAlignmentStart.iterator(); - private LinkedList currentPositionReadStates = null; - private Iterator currentPositionReadStatesIterator = null; - - public boolean hasNext() { - return alignmentStartIterator.hasNext() || - (currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext()); - } - - public SAMRecordState next() { - if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) { - currentPositionReadStates = alignmentStartIterator.next(); - currentPositionReadStatesIterator = currentPositionReadStates.iterator(); - } - - return currentPositionReadStatesIterator.next(); - } - - public void remove() { - currentPositionReadStatesIterator.remove(); - thisSampleReadStates--; - totalReadStates--; - - if ( currentPositionReadStates.isEmpty() ) { - alignmentStartIterator.remove(); - } - } - }; - } - } - } - - /** - * Note: stores reads by sample ID string, not by sample object - */ - private class SamplePartitioner { - private Map> readsBySample; - private long readsSeen = 0; - - public SamplePartitioner() { - readsBySample = new HashMap>(); - - for ( String sample : samples ) { - readsBySample.put(sample, new ArrayList()); - } - } - - public void submitRead(SAMRecord read) { - String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; - if (readsBySample.containsKey(sampleName)) - readsBySample.get(sampleName).add(read); - readsSeen++; - } - - public long getNumReadsSeen() { - return readsSeen; - } - - public Collection getReadsForSample(String sampleName) { - if ( ! readsBySample.containsKey(sampleName) ) - throw new NoSuchElementException("Sample name not found"); - return readsBySample.get(sampleName); - } - - public void reset() { - for ( Collection perSampleReads : readsBySample.values() ) - perSampleReads.clear(); - readsSeen = 0; - } - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java deleted file mode 100644 index 28348ecc2..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java +++ /dev/null @@ -1,144 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -/** - * Baseclass used to describe a read transformer like BAQ and BQSR - * - * Read transformers are plugable infrastructure that modify read state - * either on input, on output, or within walkers themselves. - * - * The function apply() is called on each read seen by the GATK (after passing - * all ReadFilters) and it can do as it sees fit (without modifying the alignment) - * to the read to change qualities, add tags, etc. - * - * Initialize is called once right before the GATK traversal begins providing - * the ReadTransformer with the ability to collect and initialize data from the - * engine. - * - * Note that all ReadTransformers within the classpath are created and initialized. If one - * shouldn't be run it should look at the command line options of the engine and override - * the enabled. - * - * @since 8/31/12 - * @author depristo - */ -abstract public class ReadTransformer { - /** - * When should this read transform be applied? - */ - private ApplicationTime applicationTime; - - /** - * Keep track of whether we've been initialized already, and ensure it's not called more than once. - */ - private boolean initialized = false; - - protected ReadTransformer() {} - - /** - * Master initialization routine. Called to setup a ReadTransform, using it's overloaded initialialSub routine. - * - * @param overrideTime if not null, we will run this ReadTransform at the time provided, regardless of the timing of this read transformer itself - * @param engine the engine, for initializing values - * @param walker the walker we intend to run - */ - @Requires({"initialized == false", "engine != null", "walker != null"}) - @Ensures("initialized == true") - public final void initialize(final ApplicationTime overrideTime, final GenomeAnalysisEngine engine, final Walker walker) { - if ( engine == null ) throw new IllegalArgumentException("engine cannot be null"); - if ( walker == null ) throw new IllegalArgumentException("walker cannot be null"); - - this.applicationTime = initializeSub(engine, walker); - if ( overrideTime != null ) this.applicationTime = overrideTime; - initialized = true; - } - - /** - * Subclasses must override this to initialize themeselves - * - * @param engine the engine, for initializing values - * @param walker the walker we intend to run - * @return the point of time we'd like this read transform to be run - */ - @Requires({"engine != null", "walker != null"}) - @Ensures("result != null") - protected abstract ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker); - - /** - * Should this ReadTransformer be activated? Called after initialize, which allows this - * read transformer to look at its arguments and decide if it should be active. All - * ReadTransformers must override this, as by default they are not enabled. - * - * @return true if this ReadTransformer should be used on the read stream - */ - public boolean enabled() { - return false; - } - - /** - * Has this transformer been initialized? - * - * @return true if it has - */ - public final boolean isInitialized() { - return initialized; - } - - /** - * When should we apply this read transformer? - * - * @return true if yes - */ - public final ApplicationTime getApplicationTime() { - return applicationTime; - } - - /** - * Primary interface function for a read transform to actually do some work - * - * The function apply() is called on each read seen by the GATK (after passing - * all ReadFilters) and it can do as it sees fit (without modifying the alignment) - * to the read to change qualities, add tags, etc. - * - * @param read the read to transform - * @return the transformed read - */ - @Requires("read != null") - @Ensures("result != null") - abstract public GATKSAMRecord apply(final GATKSAMRecord read); - - @Override - public String toString() { - return getClass().getSimpleName(); - } - - /** - * When should a read transformer be applied? - */ - public static enum ApplicationTime { - /** - * Walker does not tolerate this read transformer - */ - FORBIDDEN, - - /** - * apply the transformation to the incoming reads, the default - */ - ON_INPUT, - - /** - * apply the transformation to the outgoing read stream - */ - ON_OUTPUT, - - /** - * the walker will deal with the calculation itself - */ - HANDLED_IN_WALKER - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformersMode.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformersMode.java deleted file mode 100644 index be227619f..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformersMode.java +++ /dev/null @@ -1,28 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import java.lang.annotation.*; - -/** - * User: hanna - * Date: May 14, 2009 - * Time: 1:51:22 PM - * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT - * Software and documentation are copyright 2005 by the Broad Institute. - * All rights are reserved. - * - * Users acknowledge that this software is supplied without any warranty or support. - * The Broad Institute is not responsible for its use, misuse, or - * functionality. - */ - -/** - * Allows the walker to indicate what type of data it wants to consume. - */ - -@Documented -@Inherited -@Retention(RetentionPolicy.RUNTIME) -@Target(ElementType.TYPE) -public @interface ReadTransformersMode { - public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT; -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/Trio.java b/public/java/src/org/broadinstitute/sting/gatk/samples/Trio.java deleted file mode 100644 index 314baad3d..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/Trio.java +++ /dev/null @@ -1,45 +0,0 @@ -package org.broadinstitute.sting.gatk.samples; - -/** - * A class for imposing a trio structure on three samples; a common paradigm - * - * todo -- there should probably be an interface or abstract class "Pedigree" that generalizes the notion of - * -- imposing structure on samples. But given how complex pedigrees can quickly become, it's not - * -- clear the best way to do this. - */ -public class Trio { - private Sample mother; - private Sample father; - private Sample child; - - public Trio(Sample mom, Sample dad, Sample spawn) { - assert mom.getID().equals(spawn.getMaternalID()) && dad.getID().equals(spawn.getPaternalID()) : "Samples passed to trio constructor do not form a trio"; - mother = mom; - father = dad; - child = spawn; - } - - public Sample getMother() { - return mother; - } - - public String getMaternalID() { - return mother.getID(); - } - - public Sample getFather() { - return father; - } - - public String getPaternalID() { - return father.getID(); - } - - public Sample getChild() { - return child; - } - - public String getChildID() { - return child.getID(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java deleted file mode 100755 index efa2eca02..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java +++ /dev/null @@ -1,103 +0,0 @@ -package org.broadinstitute.sting.gatk.traversals; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.WalkerManager; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.datasources.providers.*; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; - -/** - * A simple solution to iterating over all reference positions over a series of genomic locations. - */ -public abstract class TraverseLociBase extends TraversalEngine,LocusShardDataProvider> { - /** - * our log, which we want to capture anything from this class - */ - protected static final Logger logger = Logger.getLogger(TraversalEngine.class); - - @Override - protected final String getTraversalType() { - return "sites"; - } - - protected static class TraverseResults { - final int numIterations; - final T reduceResult; - - public TraverseResults(int numIterations, T reduceResult) { - this.numIterations = numIterations; - this.reduceResult = reduceResult; - } - } - - protected abstract TraverseResults traverse( final LocusWalker walker, - final LocusView locusView, - final LocusReferenceView referenceView, - final ReferenceOrderedView referenceOrderedDataView, - final T sum); - - @Override - public T traverse( LocusWalker walker, - LocusShardDataProvider dataProvider, - T sum) { - logger.debug(String.format("TraverseLociBase.traverse: Shard is %s", dataProvider)); - - final LocusView locusView = getLocusView( walker, dataProvider ); - - if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all - //ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider ); - ReferenceOrderedView referenceOrderedDataView = null; - if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) - referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider ); - else - referenceOrderedDataView = (RodLocusView)locusView; - - final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider ); - - final TraverseResults result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum ); - sum = result.reduceResult; - dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations); - updateCumulativeMetrics(dataProvider.getShard()); - } - - // We have a final map call to execute here to clean up the skipped based from the - // last position in the ROD to that in the interval - if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) { - // only do this if the walker isn't done! - final RodLocusView rodLocusView = (RodLocusView)locusView; - final long nSkipped = rodLocusView.getLastSkippedBases(); - if ( nSkipped > 0 ) { - final GenomeLoc site = rodLocusView.getLocOneBeyondShard(); - final AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped); - final M x = walker.map(null, null, ac); - sum = walker.reduce(x, sum); - } - } - - return sum; - } - - /** - * Gets the best view of loci for this walker given the available data. The view will function as a 'trigger track' - * of sorts, providing a consistent interface so that TraverseLociBase doesn't need to be reimplemented for any new datatype - * that comes along. - * @param walker walker to interrogate. - * @param dataProvider Data which which to drive the locus view. - * @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal. - */ - private LocusView getLocusView( Walker walker, LocusShardDataProvider dataProvider ) { - final DataSource dataSource = WalkerManager.getWalkerDataSource(walker); - if( dataSource == DataSource.READS ) - return new CoveredLocusView(dataProvider); - else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers ) - return new AllLocusView(dataProvider); - else if( dataSource == DataSource.REFERENCE_ORDERED_DATA ) - return new RodLocusView(dataProvider); - else - throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java deleted file mode 100755 index 22381092f..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java +++ /dev/null @@ -1,47 +0,0 @@ -package org.broadinstitute.sting.gatk.traversals; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView; -import org.broadinstitute.sting.gatk.datasources.providers.LocusView; -import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.utils.GenomeLoc; - -/** - * A simple solution to iterating over all reference positions over a series of genomic locations. - */ -public class TraverseLociLinear extends TraverseLociBase { - - @Override - protected TraverseResults traverse(LocusWalker walker, LocusView locusView, LocusReferenceView referenceView, ReferenceOrderedView referenceOrderedDataView, T sum) { - // We keep processing while the next reference location is within the interval - boolean done = false; - int numIterations = 0; - - while( locusView.hasNext() && ! done ) { - numIterations++; - final AlignmentContext locus = locusView.next(); - final GenomeLoc location = locus.getLocation(); - - // create reference context. Note that if we have a pileup of "extended events", the context will - // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). - final ReferenceContext refContext = referenceView.getReferenceContext(location); - - // Iterate forward to get all reference ordered data covering this location - final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); - - final boolean keepMeP = walker.filter(tracker, refContext, locus); - if (keepMeP) { - final M x = walker.map(tracker, refContext, locus); - sum = walker.reduce(x, sum); - done = walker.isDone(); - } - - printProgress(locus.getLocation()); - } - - return new TraverseResults(numIterations, sum); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java deleted file mode 100755 index e4e2254d0..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java +++ /dev/null @@ -1,205 +0,0 @@ -package org.broadinstitute.sting.gatk.traversals; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView; -import org.broadinstitute.sting.gatk.datasources.providers.LocusView; -import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction; -import org.broadinstitute.sting.utils.nanoScheduler.NSProgressFunction; -import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction; -import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler; - -import java.util.Iterator; - -/** - * A simple solution to iterating over all reference positions over a series of genomic locations. - */ -public class TraverseLociNano extends TraverseLociBase { - /** our log, which we want to capture anything from this class */ - private static final boolean DEBUG = false; - private static final int BUFFER_SIZE = 1000; - - final NanoScheduler nanoScheduler; - - public TraverseLociNano(int nThreads) { - nanoScheduler = new NanoScheduler(BUFFER_SIZE, nThreads); - nanoScheduler.setProgressFunction(new TraverseLociProgress()); - } - - @Override - protected TraverseResults traverse(final LocusWalker walker, - final LocusView locusView, - final LocusReferenceView referenceView, - final ReferenceOrderedView referenceOrderedDataView, - final T sum) { - nanoScheduler.setDebug(DEBUG); - final TraverseLociMap myMap = new TraverseLociMap(walker); - final TraverseLociReduce myReduce = new TraverseLociReduce(walker); - - final MapDataIterator inputIterator = new MapDataIterator(locusView, referenceView, referenceOrderedDataView); - final T result = nanoScheduler.execute(inputIterator, myMap, sum, myReduce); - - return new TraverseResults(inputIterator.numIterations, result); - } - - /** - * Create iterator that provides inputs for all map calls into MapData, to be provided - * to NanoScheduler for Map/Reduce - */ - private class MapDataIterator implements Iterator { - final LocusView locusView; - final LocusReferenceView referenceView; - final ReferenceOrderedView referenceOrderedDataView; - int numIterations = 0; - - private MapDataIterator(LocusView locusView, LocusReferenceView referenceView, ReferenceOrderedView referenceOrderedDataView) { - this.locusView = locusView; - this.referenceView = referenceView; - this.referenceOrderedDataView = referenceOrderedDataView; - } - - @Override - public boolean hasNext() { - return locusView.hasNext(); - } - - @Override - public MapData next() { - final AlignmentContext locus = locusView.next(); - final GenomeLoc location = locus.getLocation(); - - //logger.info("Pulling data from MapDataIterator at " + location); - - // create reference context. Note that if we have a pileup of "extended events", the context will - // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). - final ReferenceContext refContext = referenceView.getReferenceContext(location); - - // Iterate forward to get all reference ordered data covering this location - final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(location, refContext); - - numIterations++; - return new MapData(locus, refContext, tracker); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Cannot remove elements from MapDataIterator"); - } - } - - @Override - public void printOnTraversalDone() { - nanoScheduler.shutdown(); - super.printOnTraversalDone(); - } - - /** - * The input data needed for each map call. The read, the reference, and the RODs - */ - private class MapData { - final AlignmentContext alignmentContext; - final ReferenceContext refContext; - final RefMetaDataTracker tracker; - - private MapData(final AlignmentContext alignmentContext, ReferenceContext refContext, RefMetaDataTracker tracker) { - this.alignmentContext = alignmentContext; - this.refContext = refContext; - this.tracker = tracker; - } - - @Override - public String toString() { - return "MapData " + alignmentContext.getLocation(); - } - } - - /** - * Contains the results of a map call, indicating whether the call was good, filtered, or done - */ - private class MapResult { - final M value; - final boolean reduceMe; - - /** - * Create a MapResult with value that should be reduced - * - * @param value the value to reduce - */ - private MapResult(final M value) { - this.value = value; - this.reduceMe = true; - } - - /** - * Create a MapResult that shouldn't be reduced - */ - private MapResult() { - this.value = null; - this.reduceMe = false; - } - } - - /** - * A static object that tells reduce that the result of map should be skipped (filtered or done) - */ - private final MapResult SKIP_REDUCE = new MapResult(); - - /** - * MapFunction for TraverseReads meeting NanoScheduler interface requirements - * - * Applies walker.map to MapData, returning a MapResult object containing the result - */ - private class TraverseLociMap implements NSMapFunction { - final LocusWalker walker; - - private TraverseLociMap(LocusWalker walker) { - this.walker = walker; - } - - @Override - public MapResult apply(final MapData data) { - if ( ! walker.isDone() ) { - final boolean keepMeP = walker.filter(data.tracker, data.refContext, data.alignmentContext); - if (keepMeP) { - final M x = walker.map(data.tracker, data.refContext, data.alignmentContext); - return new MapResult(x); - } - } - return SKIP_REDUCE; - } - } - - /** - * NSReduceFunction for TraverseReads meeting NanoScheduler interface requirements - * - * Takes a MapResult object and applies the walkers reduce function to each map result, when applicable - */ - private class TraverseLociReduce implements NSReduceFunction { - final LocusWalker walker; - - private TraverseLociReduce(LocusWalker walker) { - this.walker = walker; - } - - @Override - public T apply(MapResult one, T sum) { - if ( one.reduceMe ) - // only run reduce on values that aren't DONE or FAILED - return walker.reduce(one.value, sum); - else - return sum; - } - } - - private class TraverseLociProgress implements NSProgressFunction { - @Override - public void progress(MapData lastProcessedMap) { - if (lastProcessedMap.alignmentContext != null) - printProgress(lastProcessedMap.alignmentContext.getLocation()); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java deleted file mode 100755 index b3a0a1390..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -package org.broadinstitute.sting.gatk.traversals; - -import net.sf.samtools.SAMRecord; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView; -import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView; -import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.providers.ReadView; -import org.broadinstitute.sting.gatk.datasources.reads.ReadShard; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction; -import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction; -import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.util.LinkedList; -import java.util.List; - -/** - * A nano-scheduling version of TraverseReads. - * - * Implements the traversal of a walker that accepts individual reads, the reference, and - * RODs per map call. Directly supports shared memory parallelism via NanoScheduler - * - * @author depristo - * @version 1.0 - * @date 9/2/2012 - */ -public class TraverseReadsNano extends TraversalEngine,ReadShardDataProvider> { - /** our log, which we want to capture anything from this class */ - protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class); - private static final boolean DEBUG = false; - final NanoScheduler nanoScheduler; - - public TraverseReadsNano(int nThreads) { - final int bufferSize = ReadShard.getReadBufferSize() + 1; // actually has 1 more than max - nanoScheduler = new NanoScheduler(bufferSize, nThreads); - } - - @Override - protected String getTraversalType() { - return "reads"; - } - - /** - * Traverse by reads, given the data and the walker - * - * @param walker the walker to traverse with - * @param dataProvider the provider of the reads data - * @param sum the value of type T, specified by the walker, to feed to the walkers reduce function - * @return the reduce variable of the read walker - */ - public T traverse(ReadWalker walker, - ReadShardDataProvider dataProvider, - T sum) { - logger.debug(String.format("TraverseReadsNano.traverse Covered dataset is %s", dataProvider)); - - if( !dataProvider.hasReads() ) - throw new IllegalArgumentException("Unable to traverse reads; no read data is available."); - - nanoScheduler.setDebug(DEBUG); - final TraverseReadsMap myMap = new TraverseReadsMap(walker); - final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker); - - final List aggregatedInputs = aggregateMapData(dataProvider); - final T result = nanoScheduler.execute(aggregatedInputs.iterator(), myMap, sum, myReduce); - - final GATKSAMRecord lastRead = aggregatedInputs.get(aggregatedInputs.size() - 1).read; - final GenomeLoc locus = engine.getGenomeLocParser().createGenomeLoc(lastRead); - - updateCumulativeMetrics(dataProvider.getShard()); - printProgress(locus); - - return result; - } - - /** - * Aggregate all of the inputs for all map calls into MapData, to be provided - * to NanoScheduler for Map/Reduce - * - * @param dataProvider the source of our data - * @return a linked list of MapData objects holding the read, ref, and ROD info for every map/reduce - * should execute - */ - private List aggregateMapData(final ReadShardDataProvider dataProvider) { - final ReadView reads = new ReadView(dataProvider); - final ReadReferenceView reference = new ReadReferenceView(dataProvider); - final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider); - - final List mapData = new LinkedList(); - for ( final SAMRecord read : reads ) { - final ReferenceContext refContext = ! read.getReadUnmappedFlag() - ? reference.getReferenceContext(read) - : null; - - // if the read is mapped, create a metadata tracker - final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0 - ? rodView.getReferenceOrderedDataForRead(read) - : null; - - // update the number of reads we've seen - dataProvider.getShard().getReadMetrics().incrementNumIterations(); - - mapData.add(new MapData((GATKSAMRecord)read, refContext, tracker)); - } - - return mapData; - } - - @Override - public void printOnTraversalDone() { - nanoScheduler.shutdown(); - super.printOnTraversalDone(); - } - - /** - * The input data needed for each map call. The read, the reference, and the RODs - */ - private class MapData { - final GATKSAMRecord read; - final ReferenceContext refContext; - final RefMetaDataTracker tracker; - - private MapData(GATKSAMRecord read, ReferenceContext refContext, RefMetaDataTracker tracker) { - this.read = read; - this.refContext = refContext; - this.tracker = tracker; - } - } - - /** - * Contains the results of a map call, indicating whether the call was good, filtered, or done - */ - private class MapResult { - final M value; - final boolean reduceMe; - - /** - * Create a MapResult with value that should be reduced - * - * @param value the value to reduce - */ - private MapResult(final M value) { - this.value = value; - this.reduceMe = true; - } - - /** - * Create a MapResult that shouldn't be reduced - */ - private MapResult() { - this.value = null; - this.reduceMe = false; - } - } - - /** - * A static object that tells reduce that the result of map should be skipped (filtered or done) - */ - private final MapResult SKIP_REDUCE = new MapResult(); - - /** - * MapFunction for TraverseReads meeting NanoScheduler interface requirements - * - * Applies walker.map to MapData, returning a MapResult object containing the result - */ - private class TraverseReadsMap implements NSMapFunction { - final ReadWalker walker; - - private TraverseReadsMap(ReadWalker walker) { - this.walker = walker; - } - - @Override - public MapResult apply(final MapData data) { - if ( ! walker.isDone() ) { - final boolean keepMeP = walker.filter(data.refContext, data.read); - if (keepMeP) - return new MapResult(walker.map(data.refContext, data.read, data.tracker)); - } - - return SKIP_REDUCE; - } - } - - /** - * NSReduceFunction for TraverseReads meeting NanoScheduler interface requirements - * - * Takes a MapResult object and applies the walkers reduce function to each map result, when applicable - */ - private class TraverseReadsReduce implements NSReduceFunction { - final ReadWalker walker; - - private TraverseReadsReduce(ReadWalker walker) { - this.walker = walker; - } - - @Override - public T apply(MapResult one, T sum) { - if ( one.reduceMe ) - // only run reduce on values that aren't DONE or FAILED - return walker.reduce(one.value, sum); - else - return sum; - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/NanoSchedulable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/NanoSchedulable.java deleted file mode 100755 index 731ce7e4e..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/NanoSchedulable.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2010. The Broad Institute - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -package org.broadinstitute.sting.gatk.walkers; - -/** - * Root parallelism interface. Walkers that implement this - * declare that their map function is thread-safe and so multiple - * map calls can be run in parallel in the same JVM instance. - */ -public interface NanoSchedulable { -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java deleted file mode 100755 index 2b9744b89..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.fasta; - -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.RodBinding; -import org.broadinstitute.sting.gatk.CommandLineGATK; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.Collections; -import java.util.List; - - -/** - * Generates an alternative reference sequence over the specified interval. - * - *

- * Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s). - * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'. - * Several important notes: - * 1) if there are multiple variants that start at a site, it chooses one of them randomly. - * 2) when there are overlapping indels (but with different start positions) only the first will be chosen. - * 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions). - * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order). - * - *

Input

- *

- * The reference, requested intervals, and any number of variant rod files. - *

- * - *

Output

- *

- * A fasta file representing the requested intervals. - *

- * - *

Examples

- *
- * java -Xmx2g -jar GenomeAnalysisTK.jar \
- *   -R ref.fasta \
- *   -T FastaAlternateReferenceMaker \
- *   -o output.fasta \
- *   -L input.intervals \
- *   --variant input.vcf \
- *   [--snpmask mask.vcf]
- * 
- * - */ -@DocumentedGATKFeature( groupName = "Companion Utilities", extraDocs = {CommandLineGATK.class} ) -@Reference(window=@Window(start=-1,stop=50)) -@Requires(value={DataSource.REFERENCE}) -public class FastaAlternateReferenceMaker extends FastaReferenceMaker { - - /** - * Variants from these input files are used by this tool to construct an alternate reference. - */ - @Input(fullName = "variant", shortName = "V", doc="variants to model", required=false) - public List> variants = Collections.emptyList(); - - /** - * Snps from this file are used as a mask when constructing the alternate reference. - */ - @Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false) - public RodBinding snpmask; - - private int deletionBasesRemaining = 0; - - public Pair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - - if (deletionBasesRemaining > 0) { - deletionBasesRemaining--; - return new Pair(context.getLocation(), ""); - } - - String refBase = String.valueOf((char)ref.getBase()); - - // Check to see if we have a called snp - for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) { - if ( vc.isFiltered() ) - continue; - - if ( vc.isSimpleDeletion()) { - deletionBasesRemaining = vc.getReference().length() - 1; - // delete the next n bases, not this one - return new Pair(context.getLocation(), refBase); - } else if ( vc.isSimpleInsertion()) { - return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); - } else if (vc.isSNP()) { - return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); - } - } - - // if we don't have a called site, and we have a mask at this site, mask it - for ( VariantContext vc : tracker.getValues(snpmask) ) { - if ( vc.isSNP()) { - return new Pair(context.getLocation(), "N"); - } - } - - - // if we got here then we're just ref - return new Pair(context.getLocation(), refBase); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java deleted file mode 100755 index 362867318..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.fasta; - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.CommandLineGATK; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RefWalker; -import org.broadinstitute.sting.gatk.walkers.WalkerName; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; - -import java.io.PrintStream; - -/** - * Renders a new reference in FASTA format consisting of only those loci provided in the input data set. - * - *

- * The output format can be partially controlled using the provided command-line arguments. - * Specify intervals with the usual -L argument to output only the reference bases within your intervals. - * Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a - * separate fasta sequence (named numerically in order). - * - *

Input

- *

- * The reference and requested intervals. - *

- * - *

Output

- *

- * A fasta file representing the requested intervals. - *

- * - *

Examples

- *
- * java -Xmx2g -jar GenomeAnalysisTK.jar \
- *   -R ref.fasta \
- *   -T FastaReferenceMaker \
- *   -o output.fasta \
- *   -L input.intervals
- * 
- * - */ -@DocumentedGATKFeature( groupName = "Companion Utilities", extraDocs = {CommandLineGATK.class} ) -public class FastaReferenceMaker extends RefWalker, GenomeLoc> { - - @Output PrintStream out; - - @Argument(fullName="lineWidth", shortName="lw", doc="Maximum length of sequence to write per line", required=false) - public int fastaLineWidth=60; - - /** - * Please note that when using this argument adjacent intervals will automatically be merged. - */ - @Argument(fullName="rawOnelineSeq", shortName="raw", doc="Print sequences with no FASTA header lines, one line per interval (i.e. lineWidth = infinity)", required=false) - public boolean fastaRawSeqs=false; - - protected FastaSequence fasta; - - public void initialize() { - if (fastaRawSeqs) fastaLineWidth = Integer.MAX_VALUE; - fasta = new FastaSequence(out, fastaLineWidth, fastaRawSeqs); - } - - public Pair map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) { - return new Pair(context.getLocation(), String.valueOf((char)ref.getBase())); - } - - public GenomeLoc reduceInit() { - return null; - } - - public GenomeLoc reduce(Pair value, GenomeLoc sum) { - if ( value == null ) - return sum; - - // if there is no interval to the left, then this is the first one - if ( sum == null ) { - sum = value.first; - fasta.append(value.second); - } - // if the intervals don't overlap, print out the leftmost one and start a new one - // (end of contig or new interval) - else if ( value.first.getStart() != sum.getStop() + 1 ) { - fasta.flush(); - sum = value.first; - fasta.append(value.second); - } - // otherwise, merge them - else { - sum = getToolkit().getGenomeLocParser().setStop(sum, value.first.getStop()); - fasta.append(value.second); - } - return sum; - } - - public void onTraversalDone(GenomeLoc sum) { - fasta.flush(); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQReadTransformer.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQReadTransformer.java deleted file mode 100644 index 4589ffb71..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQReadTransformer.java +++ /dev/null @@ -1,49 +0,0 @@ -package org.broadinstitute.sting.utils.baq; - -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.WalkerManager; -import org.broadinstitute.sting.gatk.iterators.ReadTransformer; -import org.broadinstitute.sting.gatk.walkers.BAQMode; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -/** - * Applies Heng's BAQ calculation to a stream of incoming reads - */ -public class BAQReadTransformer extends ReadTransformer { - private BAQ baqHMM; - private IndexedFastaSequenceFile refReader; - private BAQ.CalculationMode cmode; - private BAQ.QualityMode qmode; - - @Override - public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) { - final BAQMode mode = WalkerManager.getWalkerAnnotation(walker, BAQMode.class); - this.refReader = engine.getReferenceDataSource().getReference(); - this.cmode = engine.getArguments().BAQMode; - this.qmode = mode.QualityMode(); - baqHMM = new BAQ(engine.getArguments().BAQGOP); - - if ( qmode == BAQ.QualityMode.DONT_MODIFY ) - throw new ReviewedStingException("BUG: shouldn't create BAQ transformer with quality mode DONT_MODIFY"); - - if ( mode.ApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN && enabled() ) - throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + cmode + " was requested."); - - return mode.ApplicationTime(); - } - - @Override - public boolean enabled() { - return cmode != BAQ.CalculationMode.OFF; - } - - @Override - public GATKSAMRecord apply(final GATKSAMRecord read) { - baqHMM.baqRead(read, refReader, cmode, qmode); - return read; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/ReadTransformingIterator.java b/public/java/src/org/broadinstitute/sting/utils/baq/ReadTransformingIterator.java deleted file mode 100644 index 18ab9e01a..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/baq/ReadTransformingIterator.java +++ /dev/null @@ -1,44 +0,0 @@ -package org.broadinstitute.sting.utils.baq; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.iterators.ReadTransformer; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.util.Iterator; - -/** - * Iterator that applies a ReadTransformer to a stream of reads - */ -public class ReadTransformingIterator implements StingSAMIterator { - private final StingSAMIterator it; - private final ReadTransformer transformer; - - /** - * Creates a new ReadTransforming iterator - */ - @Requires({"it != null", "transformer != null", "transformer.isInitialized()"}) - public ReadTransformingIterator(final StingSAMIterator it, final ReadTransformer transformer) { - if ( ! transformer.isInitialized() ) - throw new IllegalStateException("Creating a read transformer stream for an uninitialized read transformer: " + transformer); - if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN ) - throw new IllegalStateException("Creating a read transformer stream for a forbidden transformer " + transformer); - - this.it = it; - this.transformer = transformer; - } - - @Requires("hasNext()") - @Ensures("result != null") - public SAMRecord next() { - final GATKSAMRecord read = (GATKSAMRecord)it.next(); - return transformer.apply(read); - } - - public boolean hasNext() { return this.it.hasNext(); } - public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); } - public void close() { it.close(); } - public Iterator iterator() { return this; } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/BlockingQueueValue.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/BlockingQueueValue.java deleted file mode 100644 index 2daa6c9eb..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/BlockingQueueValue.java +++ /dev/null @@ -1,82 +0,0 @@ -package org.broadinstitute.sting.utils.nanoScheduler; - -import com.google.java.contract.Invariant; - -/** - * Wrapper to hold data for a blocking queue, distinguishing an EOF marker from a real object - * - * The only way to tell in a consumer thread that a blocking queue has no more data ever - * coming down the pipe is to pass in a "poison" or EOF object. This class provides - * a generic capacity for that... - * - * The use case looks like this: - * - * BlockingQueue q - * producer: - * while ( x has items ) - * q.put(new BlockingQueueValue(x)) - * q.put(new BlockingQueueValue()) - * - * Consumer: - * while ( true ) - * value = q.take() - * if ( value.isLast() ) - * break - * else - * do something useful with value - * - * - * User: depristo - * Date: 9/6/12 - * Time: 3:08 PM - */ -@Invariant("! isLast || value == null") -class BlockingQueueValue { - /** - * True if this is the EOF marker object - */ - final private boolean isLast; - - /** - * Our value, if we aren't the EOF marker - */ - final private T value; - - /** - * Create a new BlockingQueueValue containing a real value, where last is false - * @param value - */ - BlockingQueueValue(final T value) { - isLast = false; - this.value = value; - } - - /** - * Create a new BlockingQueueValue that is the last item - */ - BlockingQueueValue() { - isLast = true; - this.value = null; - } - - /** - * Is this the EOF marker? - * - * @return true if so, else false - */ - public boolean isLast() { - return isLast; - } - - /** - * Get the value held by this BlockingQueueValue - * - * @return the value - * @throws IllegalStateException if this is the last item - */ - public T getValue() { - if ( isLast() ) - throw new IllegalStateException("Cannot get value for last object"); - return value; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/FutureValue.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/FutureValue.java deleted file mode 100644 index 9508a15aa..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/FutureValue.java +++ /dev/null @@ -1,45 +0,0 @@ -package org.broadinstitute.sting.utils.nanoScheduler; - -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -/** - * Create a future that simply returns a given value - * - * The only standard way to create a future in java is via the ExecutorService interface. - * If you have a data structure holding futures of value T, and you want to add a - * value to it for some reason (to add a EOF marker, for instance) you can use this - * class to create a dummy Future that simply returns a value. - * - * @author depristo - * @since 09/12 - */ -class FutureValue implements Future { - final V value; - - FutureValue(final V value) { - this.value = value; - } - - @Override public boolean cancel(boolean mayInterruptIfRunning) { - return true; - } - - @Override public boolean isCancelled() { - return false; - } - - @Override public boolean isDone() { - return true; - } - - @Override public V get() throws InterruptedException, ExecutionException { - return value; - } - - @Override public V get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { - return get(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java deleted file mode 100644 index 29dddbc49..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java +++ /dev/null @@ -1,62 +0,0 @@ -package org.broadinstitute.sting.utils.nanoScheduler; - -import org.broadinstitute.sting.utils.SimpleTimer; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.Iterator; -import java.util.concurrent.BlockingQueue; - -/** - * Producer Thread that reads input values from an inputReads and puts them into a BlockingQueue - */ -class InputProducer implements Runnable { - /** - * The iterator we are using to get data from - */ - final Iterator inputReader; - - /** - * Our timer (may be null) that we use to track our input costs - */ - final SimpleTimer inputTimer; - - /** - * Where we put our input values for consumption - */ - final BlockingQueue outputQueue; - - public InputProducer(final Iterator inputReader, - final SimpleTimer inputTimer, - final BlockingQueue outputQueue) { - if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null"); - if ( outputQueue == null ) throw new IllegalArgumentException("OutputQueue cannot be null"); - - this.inputReader = inputReader; - this.inputTimer = inputTimer; - this.outputQueue = outputQueue; - } - - public void run() { - try { - while ( inputReader.hasNext() ) { - if ( inputTimer != null ) inputTimer.restart(); - final InputType input = inputReader.next(); - if ( inputTimer != null ) inputTimer.stop(); - outputQueue.put(new InputValue(input)); - } - - // add the EOF object so our consumer knows we are done in all inputs - outputQueue.put(new InputValue()); - } catch (InterruptedException ex) { - throw new ReviewedStingException("got execution exception", ex); - } - } - - /** - * Helper class that contains a read value suitable for EOF marking in a BlockingQueue - */ - class InputValue extends BlockingQueueValue { - private InputValue(InputType datum) { super(datum); } - private InputValue() { } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResult.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResult.java deleted file mode 100644 index 3cc6fa786..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResult.java +++ /dev/null @@ -1,36 +0,0 @@ -package org.broadinstitute.sting.utils.nanoScheduler; - -/** - * Holds the results of a map job suitable for producer/consumer threading - * via a BlockingQueue - */ -class MapResult extends BlockingQueueValue { - final int jobID; - - /** - * Create a new MapResult with value datum and jod jobID ID - * - * @param datum the value produced by the map job - * @param jobID the id of the map job (for correctness testing) - */ - MapResult(final MapType datum, final int jobID) { - super(datum); - this.jobID = jobID; - if ( jobID < 0 ) throw new IllegalArgumentException("JobID must be >= 0"); - } - - /** - * Create the EOF marker version of MapResult - */ - MapResult() { - super(); - this.jobID = Integer.MAX_VALUE; - } - - /** - * @return the job ID of the map job that produced this MapResult - */ - public int getJobID() { - return jobID; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSMapFunction.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSMapFunction.java deleted file mode 100644 index cc5335051..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSMapFunction.java +++ /dev/null @@ -1,19 +0,0 @@ -package org.broadinstitute.sting.utils.nanoScheduler; - -/** - * A function that maps from InputType -> ResultType - * - * For use with the NanoScheduler - * - * User: depristo - * Date: 8/24/12 - * Time: 9:49 AM - */ -public interface NSMapFunction { - /** - * Return function on input, returning a value of ResultType - * @param input - * @return - */ - public ResultType apply(final InputType input); -} diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSProgressFunction.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSProgressFunction.java deleted file mode 100644 index 8b12c62c4..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSProgressFunction.java +++ /dev/null @@ -1,12 +0,0 @@ -package org.broadinstitute.sting.utils.nanoScheduler; - -/** - * Created with IntelliJ IDEA. - * User: depristo - * Date: 9/4/12 - * Time: 2:10 PM - * To change this template use File | Settings | File Templates. - */ -public interface NSProgressFunction { - public void progress(final InputType lastMapInput); -} diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSReduceFunction.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSReduceFunction.java deleted file mode 100644 index 879a33a1d..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSReduceFunction.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.broadinstitute.sting.utils.nanoScheduler; - -/** - * A function that combines a value of MapType with an existing ReduceValue into a new ResultType - * - * User: depristo - * Date: 8/24/12 - * Time: 9:49 AM - */ -public interface NSReduceFunction { - /** - * Combine one with sum into a new ReduceType - * @param one the result of a map call on an input element - * @param sum the cumulative reduce result over all previous map calls - * @return - */ - public ReduceType apply(MapType one, ReduceType sum); -} diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java deleted file mode 100644 index 664fb7b9b..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java +++ /dev/null @@ -1,392 +0,0 @@ -package org.broadinstitute.sting.utils.nanoScheduler; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.AutoFormattingTime; -import org.broadinstitute.sting.utils.SimpleTimer; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.threading.NamedThreadFactory; - -import java.util.Iterator; -import java.util.List; -import java.util.concurrent.*; - -/** - * Framework for very fine grained MapReduce parallelism - * - * The overall framework works like this - * - * nano <- new Nanoschedule(inputBufferSize, numberOfMapElementsToProcessTogether, nThreads) - * List[Input] outerData : outerDataLoop ) - * result = nano.execute(outerData.iterator(), map, reduce) - * - * inputBufferSize determines how many elements from the input stream are read in one go by the - * nanoscheduler. The scheduler may hold up to inputBufferSize in memory at one time, as well - * as up to inputBufferSize map results as well. - * - * numberOfMapElementsToProcessTogether determines how many input elements are processed - * together each thread cycle. For example, if this value is 10, then the input data - * is grouped together in units of 10 elements each, and map called on each in term. The more - * heavy-weight the map function is, in terms of CPU costs, the more it makes sense to - * have this number be small. The lighter the CPU cost per element, though, the more this - * parameter introduces overhead due to need to context switch among threads to process - * each input element. A value of -1 lets the nanoscheduler guess at a reasonable trade-off value. - * - * nThreads is a bit obvious yes? Note though that the nanoscheduler assumes that it gets 1 thread - * from its client during the execute call, as this call blocks until all work is done. The caller - * thread is put to work by execute to help with the processing of the data. So in reality the - * nanoScheduler only spawn nThreads - 1 additional workers (if this is > 1). - * - * User: depristo - * Date: 8/24/12 - * Time: 9:47 AM - */ -public class NanoScheduler { - private final static Logger logger = Logger.getLogger(NanoScheduler.class); - private final static boolean ALLOW_SINGLE_THREAD_FASTPATH = true; - private final static boolean LOG_MAP_TIMES = false; - private final static boolean TIME_CALLS = true; - - private final static int MAP_BUFFER_SIZE_SCALE_FACTOR = 100; - - final int inputBufferSize; - final int mapBufferSize; - final int nThreads; - final ExecutorService inputExecutor; - final ExecutorService reduceExecutor; - final ThreadPoolExecutor mapExecutor; - - boolean shutdown = false; - boolean debug = false; - private NSProgressFunction progressFunction = null; - - final SimpleTimer outsideSchedulerTimer = TIME_CALLS ? new SimpleTimer("outside") : null; - final SimpleTimer inputTimer = TIME_CALLS ? new SimpleTimer("input") : null; - final SimpleTimer mapTimer = TIME_CALLS ? new SimpleTimer("map") : null; - final SimpleTimer reduceTimer = TIME_CALLS ? new SimpleTimer("reduce") : null; - - /** - * Create a new nanoscheduler with the desire characteristics requested by the argument - * - * @param inputBufferSize the number of input elements to read in each scheduling cycle. - * @param nThreads the number of threads to use to get work done, in addition to the - * thread calling execute - */ - public NanoScheduler(final int inputBufferSize, final int nThreads) { - if ( inputBufferSize < 1 ) throw new IllegalArgumentException("inputBufferSize must be >= 1, got " + inputBufferSize); - if ( nThreads < 1 ) throw new IllegalArgumentException("nThreads must be >= 1, got " + nThreads); - - this.inputBufferSize = inputBufferSize; - this.mapBufferSize = inputBufferSize * MAP_BUFFER_SIZE_SCALE_FACTOR; - this.nThreads = nThreads; - - if ( nThreads == 1 ) { - this.mapExecutor = null; - this.inputExecutor = this.reduceExecutor = null; - } else { - this.mapExecutor = (ThreadPoolExecutor)Executors.newFixedThreadPool(nThreads-1, new NamedThreadFactory("NS-map-thread-%d")); - this.mapExecutor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy()); - this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d")); - this.reduceExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-reduce-thread-%d")); - } - - // start timing the time spent outside of the nanoScheduler - outsideSchedulerTimer.start(); - } - - /** - * The number of parallel map threads in use with this NanoScheduler - * @return - */ - @Ensures("result > 0") - public int getnThreads() { - return nThreads; - } - - /** - * The input buffer size used by this NanoScheduler - * @return - */ - @Ensures("result > 0") - public int getInputBufferSize() { - return inputBufferSize; - } - - /** - * Tells this nanoScheduler to shutdown immediately, releasing all its resources. - * - * After this call, execute cannot be invoked without throwing an error - */ - public void shutdown() { - outsideSchedulerTimer.stop(); - - if ( nThreads > 1 ) { - shutdownExecutor("inputExecutor", inputExecutor); - shutdownExecutor("mapExecutor", mapExecutor); - shutdownExecutor("reduceExecutor", reduceExecutor); - } - shutdown = true; - - if (TIME_CALLS) { - printTimerInfo("Input time", inputTimer); - printTimerInfo("Map time", mapTimer); - printTimerInfo("Reduce time", reduceTimer); - printTimerInfo("Outside time", outsideSchedulerTimer); - } - } - - /** - * Helper function to cleanly shutdown an execution service, checking that the execution - * state is clean when it's done. - * - * @param name a string name for error messages for the executorService we are shutting down - * @param executorService the executorService to shut down - */ - @Requires({"name != null", "executorService != null"}) - @Ensures("executorService.isShutdown()") - private void shutdownExecutor(final String name, final ExecutorService executorService) { - if ( executorService.isShutdown() || executorService.isTerminated() ) - throw new IllegalStateException("Executor service " + name + " is already shut down!"); - - final List remaining = executorService.shutdownNow(); - if ( ! remaining.isEmpty() ) - throw new IllegalStateException(remaining.size() + " remaining tasks found in an executor " + name + ", unexpected behavior!"); - } - - /** - * Print to logger.info timing information from timer, with name label - * - * @param label the name of the timer to display. Should be human readable - * @param timer the timer whose elapsed time we will display - */ - @Requires({"label != null", "timer != null"}) - private void printTimerInfo(final String label, final SimpleTimer timer) { - final double total = inputTimer.getElapsedTime() + mapTimer.getElapsedTime() - + reduceTimer.getElapsedTime() + outsideSchedulerTimer.getElapsedTime(); - final double myTimeInSec = timer.getElapsedTime(); - final double myTimePercent = myTimeInSec / total * 100; - logger.info(String.format("%s: %s (%5.2f%%)", label, new AutoFormattingTime(myTimeInSec), myTimePercent)); - } - - /** - * @return true if this nanoScheduler is shutdown, or false if its still open for business - */ - public boolean isShutdown() { - return shutdown; - } - - /** - * @return are we displaying verbose debugging information about the scheduling? - */ - public boolean isDebug() { - return debug; - } - - /** - * Helper function to display a String.formatted message if we are doing verbose debugging - * - * @param format the format argument suitable for String.format - * @param args the arguments for String.format - */ - @Requires("format != null") - private void debugPrint(final String format, Object ... args) { - if ( isDebug() ) - logger.info("Thread " + Thread.currentThread().getId() + ":" + String.format(format, args)); - } - - /** - * Turn on/off verbose debugging - * - * @param debug true if we want verbose debugging - */ - public void setDebug(boolean debug) { - this.debug = debug; - } - - /** - * Set the progress callback function to progressFunction - * - * The progress callback is invoked after each buffer size elements have been processed by map/reduce - * - * @param progressFunction a progress function to call, or null if you don't want any progress callback - */ - public void setProgressFunction(final NSProgressFunction progressFunction) { - this.progressFunction = progressFunction; - } - - /** - * Execute a map/reduce job with this nanoScheduler - * - * Data comes from inputReader. Will be read until hasNext() == false. - * map is called on each element provided by inputReader. No order of operations is guarenteed - * reduce is called in order of the input data provided by inputReader on the result of map() applied - * to each element. - * - * Note that the caller thread is put to work with this function call. The call doesn't return - * until all elements have been processes. - * - * It is safe to call this function repeatedly on a single nanoScheduler, at least until the - * shutdown method is called. - * - * Note that this function goes through a single threaded fast path if the number of threads - * is 1. - * - * @param inputReader an iterator providing us with the input data to nanoSchedule map/reduce over - * @param map the map function from input type -> map type, will be applied in parallel to each input - * @param reduce the reduce function from map type + reduce type -> reduce type to be applied in order to map results - * @return the last reduce value - */ - public ReduceType execute(final Iterator inputReader, - final NSMapFunction map, - final ReduceType initialValue, - final NSReduceFunction reduce) { - if ( isShutdown() ) throw new IllegalStateException("execute called on already shutdown NanoScheduler"); - if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null"); - if ( map == null ) throw new IllegalArgumentException("map function cannot be null"); - if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null"); - - outsideSchedulerTimer.stop(); - - ReduceType result; - if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) { - result = executeSingleThreaded(inputReader, map, initialValue, reduce); - } else { - result = executeMultiThreaded(inputReader, map, initialValue, reduce); - } - - outsideSchedulerTimer.restart(); - return result; - } - - /** - * Simple efficient reference implementation for single threaded execution. - * - * @return the reduce result of this map/reduce job - */ - @Requires({"inputReader != null", "map != null", "reduce != null"}) - private ReduceType executeSingleThreaded(final Iterator inputReader, - final NSMapFunction map, - final ReduceType initialValue, - final NSReduceFunction reduce) { - ReduceType sum = initialValue; - int i = 0; - - // start timer to ensure that both hasNext and next are caught by the timer - if ( TIME_CALLS ) inputTimer.restart(); - while ( inputReader.hasNext() ) { - final InputType input = inputReader.next(); - if ( TIME_CALLS ) inputTimer.stop(); - - // map - if ( TIME_CALLS ) mapTimer.restart(); - final long preMapTime = LOG_MAP_TIMES ? 0 : mapTimer.currentTimeNano(); - final MapType mapValue = map.apply(input); - if ( LOG_MAP_TIMES ) logger.info("MAP TIME " + (mapTimer.currentTimeNano() - preMapTime)); - if ( TIME_CALLS ) mapTimer.stop(); - - if ( i++ % inputBufferSize == 0 && progressFunction != null ) - progressFunction.progress(input); - - // reduce - if ( TIME_CALLS ) reduceTimer.restart(); - sum = reduce.apply(mapValue, sum); - if ( TIME_CALLS ) reduceTimer.stop(); - - if ( TIME_CALLS ) inputTimer.restart(); - } - - return sum; - } - - /** - * Efficient parallel version of Map/Reduce - * - * @return the reduce result of this map/reduce job - */ - @Requires({"inputReader != null", "map != null", "reduce != null"}) - private ReduceType executeMultiThreaded(final Iterator inputReader, - final NSMapFunction map, - final ReduceType initialValue, - final NSReduceFunction reduce) { - debugPrint("Executing nanoScheduler"); - - // a blocking queue that limits the number of input datum to the requested buffer size - final BlockingQueue.InputValue> inputQueue - = new LinkedBlockingDeque.InputValue>(inputBufferSize); - - // a priority queue that stores up to mapBufferSize elements - // produced by completed map jobs. - final BlockingQueue>> mapResultQueue = - new LinkedBlockingDeque>>(mapBufferSize); - - // Start running the input reader thread - inputExecutor.submit(new InputProducer(inputReader, inputTimer, inputQueue)); - - // Start running the reducer thread - final ReducerThread reducer - = new ReducerThread(reduce, reduceTimer, initialValue, mapResultQueue); - final Future reduceResult = reduceExecutor.submit(reducer); - - try { - int numJobs = 0; - - while ( true ) { - // block on input - final InputProducer.InputValue inputEnqueueWrapped = inputQueue.take(); - - if ( ! inputEnqueueWrapped.isLast() ) { - // get the object itself - final InputType input = inputEnqueueWrapped.getValue(); - - // the next map call has jobID + 1 - numJobs++; - - // send job for map via the completion service - final CallableMap doMap = new CallableMap(map, numJobs, input); - final Future> mapJob = mapExecutor.submit(doMap); - mapResultQueue.put(mapJob); - - debugPrint(" Done with cycle of map/reduce"); - - if ( numJobs % inputBufferSize == 0 && progressFunction != null ) - progressFunction.progress(input); - } else { - mapResultQueue.put(new FutureValue>(new MapResult())); - return reduceResult.get(); // wait for our result of reduce - } - } - } catch (InterruptedException ex) { - throw new ReviewedStingException("got execution exception", ex); - } catch (ExecutionException ex) { - throw new ReviewedStingException("got execution exception", ex); - } - } - - /** - * A simple callable version of the map function for use with the executor pool - */ - private class CallableMap implements Callable> { - final int id; - final InputType input; - final NSMapFunction map; - - @Requires({"map != null"}) - private CallableMap(final NSMapFunction map, - final int id, - final InputType input) { - this.id = id; - this.input = input; - this.map = map; - } - - @Override - public MapResult call() { - if ( TIME_CALLS ) mapTimer.restart(); - if ( debug ) debugPrint("\t\tmap " + input); - final MapType result = map.apply(input); - if ( TIME_CALLS ) mapTimer.stop(); - return new MapResult(result, id); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/ReducerThread.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/ReducerThread.java deleted file mode 100644 index 506e45453..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/ReducerThread.java +++ /dev/null @@ -1,65 +0,0 @@ -package org.broadinstitute.sting.utils.nanoScheduler; - -import org.broadinstitute.sting.utils.SimpleTimer; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; - -/** - * Thread that runs the reduce of the map/reduce. - * - * This thread reads from mapResultsQueue until the poison EOF object arrives. At each - * stage is calls reduce(value, sum). The blocking mapResultQueue ensures that the - * queue waits until the mapResultQueue has a value to take. Then, it gets and waits - * until the map result Future has a value. - */ -class ReducerThread implements Callable { - final NSReduceFunction reduce; - final SimpleTimer reduceTimer; - final BlockingQueue>> mapResultQueue; - - ReduceType sum; - int lastJobID = -1; - - public ReducerThread(final NSReduceFunction reduce, - final SimpleTimer reduceTimer, - final ReduceType sum, - final BlockingQueue>> mapResultQueue) { - if ( reduce == null ) throw new IllegalArgumentException("Reduce function cannot be null"); - if ( mapResultQueue == null ) throw new IllegalArgumentException("mapResultQueue cannot be null"); - - this.reduce = reduce; - this.reduceTimer = reduceTimer; - this.sum = sum; - this.mapResultQueue = mapResultQueue; - } - - public ReduceType call() { - try { - while ( true ) { - final MapResult result = mapResultQueue.take().get(); - if ( result.isLast() ) { - // we are done, just return sum - return sum; - } - else if ( result.getJobID() < lastJobID ) { - // make sure the map results are coming in order - throw new IllegalStateException("BUG: last jobID " + lastJobID + " > current jobID " + result.getJobID()); - } else { - lastJobID = result.getJobID(); - // apply reduce, keeping track of sum - if ( reduceTimer != null ) reduceTimer.restart(); - sum = reduce.apply(result.getValue(), sum); - if ( reduceTimer != null ) reduceTimer.stop(); - } - } - } catch (ExecutionException ex) { - throw new ReviewedStingException("got execution exception", ex); - } catch (InterruptedException ex) { - throw new ReviewedStingException("got execution exception", ex); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRMode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRMode.java deleted file mode 100644 index 431014032..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRMode.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.broadinstitute.sting.utils.recalibration; - -import org.broadinstitute.sting.gatk.iterators.ReadTransformer; - -import java.lang.annotation.*; - -/** - * User: hanna - * Date: May 14, 2009 - * Time: 1:51:22 PM - * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT - * Software and documentation are copyright 2005 by the Broad Institute. - * All rights are reserved. - * - * Users acknowledge that this software is supplied without any warranty or support. - * The Broad Institute is not responsible for its use, misuse, or - * functionality. - */ - -/** - * Allows the walker to indicate what type of data it wants to consume. - */ - -@Documented -@Inherited -@Retention(RetentionPolicy.RUNTIME) -@Target(ElementType.TYPE) -public @interface BQSRMode { - public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT; -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java deleted file mode 100644 index fae0e8c09..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java +++ /dev/null @@ -1,40 +0,0 @@ -package org.broadinstitute.sting.utils.recalibration; - -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.WalkerManager; -import org.broadinstitute.sting.gatk.iterators.ReadTransformer; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -/** - * A ReadTransformer that applies BQSR on the fly to reads - * - * User: rpoplin - * Date: 2/13/12 - */ -public class BQSRReadTransformer extends ReadTransformer { - private boolean enabled; - private BaseRecalibration bqsr; - - @Override - public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) { - this.enabled = engine.hasBaseRecalibration(); - this.bqsr = engine.getBaseRecalibration(); - final BQSRMode mode = WalkerManager.getWalkerAnnotation(walker, BQSRMode.class); - return mode.ApplicationTime(); - } - - @Override - public boolean enabled() { - return enabled; - } - - /** - * initialize a new BQSRReadTransformer that applies BQSR on the fly to incoming reads. - */ - @Override - public GATKSAMRecord apply(GATKSAMRecord read) { - bqsr.recalibrateRead(read); - return read; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialMultiSampleReadStream.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialMultiSampleReadStream.java deleted file mode 100644 index 7c2d9bfdc..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialMultiSampleReadStream.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.sam; - -import net.sf.picard.sam.MergingSamRecordIterator; -import net.sf.picard.sam.SamFileHeaderMerger; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.*; - -/** - * Simple wrapper class that multiplexes multiple ArtificialSingleSampleReadStreams into a single stream of reads - * - * @author David Roazen - */ -public class ArtificialMultiSampleReadStream implements Iterable { - - private Collection perSampleArtificialReadStreams; - private MergingSamRecordIterator mergingIterator; - - public ArtificialMultiSampleReadStream( Collection perSampleArtificialReadStreams ) { - if ( perSampleArtificialReadStreams == null || perSampleArtificialReadStreams.isEmpty() ) { - throw new ReviewedStingException("Can't create an ArtificialMultiSampleReadStream out of 0 ArtificialSingleSampleReadStreams"); - } - - this.perSampleArtificialReadStreams = perSampleArtificialReadStreams; - } - - public Iterator iterator() { - // lazy initialization to prevent reads from being created until they're needed - initialize(); - - return mergingIterator; - } - - public StingSAMIterator getStingSAMIterator() { - // lazy initialization to prevent reads from being created until they're needed - initialize(); - - return StingSAMIteratorAdapter.adapt(mergingIterator); - } - - private void initialize() { - Collection perSampleSAMReaders = new ArrayList(perSampleArtificialReadStreams.size()); - Collection headers = new ArrayList(perSampleArtificialReadStreams.size()); - - for ( ArtificialSingleSampleReadStream readStream : perSampleArtificialReadStreams ) { - Collection thisStreamReads = readStream.makeReads(); - - SAMFileReader reader = new ArtificialSAMFileReader(readStream.getHeader(), - thisStreamReads.toArray(new SAMRecord[thisStreamReads.size()])); - perSampleSAMReaders.add(reader); - headers.add(reader.getFileHeader()); - } - - SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, true); - mergingIterator = new MergingSamRecordIterator(headerMerger, perSampleSAMReaders, true); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStream.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStream.java deleted file mode 100644 index a9480692b..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStream.java +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.sam; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; - -/** - * An artificial stream of reads from a single read group/sample with configurable characteristics - * such as: - * - * -the number of contigs that the reads should be distributed across - * -number of "stacks" of reads sharing the same alignment start position per contig - * -the min/max number of reads in each stack (exact values chosen randomly from this range) - * -the min/max distance between stack start positions (exact values chosen randomly from this range) - * -the min/max length of each read (exact values chosen randomly from this range) - * -the number of unmapped reads - * - * The cigar string for all reads will be *M, where * is the length of the read. - * - * @author David Roazen - */ -public class ArtificialSingleSampleReadStream implements Iterable { - private SAMFileHeader header; - private String readGroupID; - private int numContigs; - private int numStacksPerContig; - private int minReadsPerStack; - private int maxReadsPerStack; - private int minDistanceBetweenStacks; - private int maxDistanceBetweenStacks; - private int minReadLength; - private int maxReadLength; - private int numUnmappedReads; - - private static final String READ_GROUP_TAG = "RG"; - - public ArtificialSingleSampleReadStream( SAMFileHeader header, - String readGroupID, - int numContigs, - int numStacksPerContig, - int minReadsPerStack, - int maxReadsPerStack, - int minDistanceBetweenStacks, - int maxDistanceBetweenStacks, - int minReadLength, - int maxReadLength, - int numUnmappedReads ) { - this.header = header; - this.readGroupID = readGroupID; - this.numContigs = numContigs; - this.numStacksPerContig = numStacksPerContig; - this.minReadsPerStack = minReadsPerStack; - this.maxReadsPerStack = maxReadsPerStack; - this.minDistanceBetweenStacks = minDistanceBetweenStacks; - this.maxDistanceBetweenStacks = maxDistanceBetweenStacks; - this.minReadLength = minReadLength; - this.maxReadLength = maxReadLength; - this.numUnmappedReads = numUnmappedReads; - - validateStreamParameters(); - } - - private void validateStreamParameters() { - if ( header == null || readGroupID == null ) { - throw new ReviewedStingException("null SAMFileHeader or read group ID") ; - } - - if ( header.getReadGroup(readGroupID) == null ) { - throw new ReviewedStingException(String.format("Read group %s not found in SAMFileHeader", readGroupID)); - } - - if ( numContigs < 0 || numStacksPerContig < 0 || minReadsPerStack < 0 || maxReadsPerStack < 0 || - minDistanceBetweenStacks < 0 || maxDistanceBetweenStacks < 0 || minReadLength < 0 || maxReadLength < 0 || - numUnmappedReads < 0 ) { - throw new ReviewedStingException("Read stream parameters must be >= 0"); - } - - if ( (numContigs == 0 && numStacksPerContig != 0) || (numContigs != 0 && numStacksPerContig == 0) ) { - throw new ReviewedStingException("numContigs and numStacksPerContig must either both be > 0, or both be 0"); - } - - if ( minReadsPerStack > maxReadsPerStack ) { - throw new ReviewedStingException("minReadsPerStack > maxReadsPerStack"); - } - - if ( minDistanceBetweenStacks > maxDistanceBetweenStacks ) { - throw new ReviewedStingException("minDistanceBetweenStacks > maxDistanceBetweenStacks"); - } - - if ( minReadLength > maxReadLength ) { - throw new ReviewedStingException("minReadLength > maxReadLength"); - } - } - - public Iterator iterator() { - return makeReads().iterator(); - } - - public StingSAMIterator getStingSAMIterator() { - return StingSAMIteratorAdapter.adapt(iterator()); - } - - public Collection makeReads() { - Collection reads = new ArrayList(numContigs * numStacksPerContig * maxReadsPerStack); - - for ( int contig = 0; contig < numContigs; contig++ ) { - int alignmentStart = 1; - - for ( int stack = 0; stack < numStacksPerContig; stack++ ) { - reads.addAll(makeReadStack(contig, alignmentStart, MathUtils.randomIntegerInRange(minReadsPerStack, maxReadsPerStack))); - alignmentStart += MathUtils.randomIntegerInRange(minDistanceBetweenStacks, maxDistanceBetweenStacks); - } - } - - if ( numUnmappedReads > 0 ) { - reads.addAll(makeReadStack(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, SAMRecord.NO_ALIGNMENT_START, numUnmappedReads)); - } - - return reads; - } - - private Collection makeReadStack( int contig, int alignmentStart, int stackSize ) { - Collection readStack = new ArrayList(stackSize); - - for ( int i = 0; i < stackSize; i++ ) { - SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, - "foo", - contig, - alignmentStart, - MathUtils.randomIntegerInRange(minReadLength, maxReadLength)); - read.setAttribute(READ_GROUP_TAG, readGroupID); - readStack.add(read); - } - - return readStack; - } - - public SAMFileHeader getHeader() { - return header; - } - - public String getReadGroupID() { - return readGroupID; - } - - public int getNumContigs() { - return numContigs; - } - - public int getNumStacksPerContig() { - return numStacksPerContig; - } - - public int getMinReadsPerStack() { - return minReadsPerStack; - } - - public int getMaxReadsPerStack() { - return maxReadsPerStack; - } - - public int getMinDistanceBetweenStacks() { - return minDistanceBetweenStacks; - } - - public int getMaxDistanceBetweenStacks() { - return maxDistanceBetweenStacks; - } - - public int getMinReadLength() { - return minReadLength; - } - - public int getMaxReadLength() { - return maxReadLength; - } - - public int getNumUnmappedReads() { - return numUnmappedReads; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStreamAnalyzer.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStreamAnalyzer.java deleted file mode 100644 index a4d7c5146..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStreamAnalyzer.java +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.sam; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.ArrayList; -import java.util.List; - -/** - * A class for analyzing and validating the read stream produced by an ArtificialSingleSampleReadStream. - * - * Collects various statistics about the stream of reads it's fed, and validates the stream - * by checking whether the collected statistics match the nominal properties of the stream. - * - * Subclasses are expected to override the validate() method in order to check whether an artificial - * read stream has been *transformed* in some way (eg., by downsampling or some other process), rather - * than merely checking whether the stream matches its original properties. - * - * Usage is simple: - * - * ArtificialSingleSampleReadStreamAnalyzer analyzer = new ArtificialSingleSampleReadStreamAnalyzer(originalStream); - * analyzer.analyze(originalOrTransformedStream); - * analyzer.validate(); // override this method if you want to check whether the stream has been transformed - * // in a certain way relative to the original stream - * - * @author David Roazen - */ -public class ArtificialSingleSampleReadStreamAnalyzer { - protected ArtificialSingleSampleReadStream originalStream; - protected SAMRecord lastRead; - protected int totalReads; - protected boolean allSamplesMatch; - protected int numContigs; - protected List stacksPerContig; - protected Integer minReadsPerStack; - protected Integer maxReadsPerStack; - protected Integer minDistanceBetweenStacks; - protected Integer maxDistanceBetweenStacks; - protected Integer minReadLength; - protected Integer maxReadLength; - protected int numUnmappedReads; - - protected int currentContigNumStacks; - protected int currentStackNumReads; - - /** - * Construct a new read stream analyzer, providing an ArtificialSingleSampleReadStream that will - * serve as the basis for comparison after the analysis is complete. - * - * @param originalStream the original ArtificialSingleSampleReadStream upon which the stream - * that will be fed to the analyzer is based - */ - public ArtificialSingleSampleReadStreamAnalyzer( ArtificialSingleSampleReadStream originalStream ) { - this.originalStream = originalStream; - reset(); - } - - /** - * Reset all read stream statistics collected by this analyzer to prepare for a fresh run - */ - public void reset() { - lastRead = null; - totalReads = 0; - allSamplesMatch = true; - numContigs = 0; - stacksPerContig = new ArrayList(); - minReadsPerStack = null; - maxReadsPerStack = null; - minDistanceBetweenStacks = null; - maxDistanceBetweenStacks = null; - minReadLength = null; - maxReadLength = null; - numUnmappedReads = 0; - currentContigNumStacks = 0; - currentStackNumReads = 0; - } - - /** - * Collect statistics on the stream of reads passed in - * - * @param stream the stream of reads to analyze - */ - public void analyze( Iterable stream ) { - for ( SAMRecord read : stream ) { - update(read); - } - finalizeStats(); - } - - /** - * Validate the stream by checking whether our collected statistics match the properties of the - * original stream. Throws a ReviewedStingException if the stream is invalid. - * - * Override this method if you want to check whether the stream has been transformed in some - * way relative to the original stream. - */ - public void validate() { - if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) && originalStream.getNumUnmappedReads() == 0 ) { - if ( totalReads != 0 ) { - throw new ReviewedStingException("got reads from the stream, but the stream was configured to have 0 reads"); - } - return; // no further validation needed for the 0-reads case - } - else if ( totalReads == 0 ) { - throw new ReviewedStingException("got no reads from the stream, but the stream was configured to have > 0 reads"); - } - - if ( ! allSamplesMatch ) { - throw new ReviewedStingException("some reads had the wrong sample"); - } - - if ( numContigs != originalStream.getNumContigs() ) { - throw new ReviewedStingException("number of contigs not correct"); - } - - if ( stacksPerContig.size() != originalStream.getNumContigs() ) { - throw new ReviewedStingException(String.format("bug in analyzer code: calculated sizes for %d contigs even though there were only %d contigs", - stacksPerContig.size(), originalStream.getNumContigs())); - } - - for ( int contigStackCount : stacksPerContig ) { - if ( contigStackCount != originalStream.getNumStacksPerContig() ) { - throw new ReviewedStingException("contig had incorrect number of stacks"); - } - } - - if ( originalStream.getNumStacksPerContig() > 0 ) { - if ( minReadsPerStack < originalStream.getMinReadsPerStack() ) { - throw new ReviewedStingException("stack had fewer than the minimum number of reads"); - } - if ( maxReadsPerStack > originalStream.getMaxReadsPerStack() ) { - throw new ReviewedStingException("stack had more than the maximum number of reads"); - } - } - else if ( minReadsPerStack != null || maxReadsPerStack != null ) { - throw new ReviewedStingException("bug in analyzer code: reads per stack was calculated even though 0 stacks per contig was specified"); - } - - if ( originalStream.getNumStacksPerContig() > 1 ) { - if ( minDistanceBetweenStacks < originalStream.getMinDistanceBetweenStacks() ) { - throw new ReviewedStingException("stacks were separated by less than the minimum distance"); - } - if ( maxDistanceBetweenStacks > originalStream.getMaxDistanceBetweenStacks() ) { - throw new ReviewedStingException("stacks were separated by more than the maximum distance"); - } - } - else if ( minDistanceBetweenStacks != null || maxDistanceBetweenStacks != null ) { - throw new ReviewedStingException("bug in analyzer code: distance between stacks was calculated even though numStacksPerContig was <= 1"); - } - - if ( minReadLength < originalStream.getMinReadLength() ) { - throw new ReviewedStingException("read was shorter than the minimum allowed length"); - } - if ( maxReadLength > originalStream.getMaxReadLength() ) { - throw new ReviewedStingException("read was longer than the maximum allowed length"); - } - - if ( numUnmappedReads != originalStream.getNumUnmappedReads() ) { - throw new ReviewedStingException(String.format("wrong number of unmapped reads: requested %d but saw %d", - originalStream.getNumUnmappedReads(), numUnmappedReads)); - } - - if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) && - numUnmappedReads != totalReads ) { - throw new ReviewedStingException("stream should have consisted only of unmapped reads, but saw some mapped reads"); - } - } - - public void update( SAMRecord read ) { - if ( read.getReadUnmappedFlag() ) { - numUnmappedReads++; - - if ( numUnmappedReads == 1 && lastRead != null ) { - processContigChange(); - numContigs--; - } - } - else if ( lastRead == null ) { - numContigs = 1; - currentContigNumStacks = 1; - currentStackNumReads = 1; - } - else if ( ! read.getReferenceIndex().equals(lastRead.getReferenceIndex()) ) { - processContigChange(); - } - else if ( read.getAlignmentStart() != lastRead.getAlignmentStart() ) { - processStackChangeWithinContig(read); - } - else { - currentStackNumReads++; - } - - updateReadLength(read.getReadLength()); - allSamplesMatch = allSamplesMatch && readHasCorrectSample(read); - totalReads++; - - lastRead = read; - } - - - private void processContigChange() { - numContigs++; - - stacksPerContig.add(currentContigNumStacks); - currentContigNumStacks = 1; - - updateReadsPerStack(currentStackNumReads); - currentStackNumReads = 1; - } - - private void processStackChangeWithinContig( SAMRecord read ) { - currentContigNumStacks++; - - updateReadsPerStack(currentStackNumReads); - currentStackNumReads = 1; - - updateDistanceBetweenStacks(read.getAlignmentStart() - lastRead.getAlignmentStart()); - } - - private void updateReadsPerStack( int stackReadCount ) { - if ( minReadsPerStack == null || stackReadCount < minReadsPerStack ) { - minReadsPerStack = stackReadCount; - } - if ( maxReadsPerStack == null || stackReadCount > maxReadsPerStack ) { - maxReadsPerStack = stackReadCount; - } - } - - private void updateDistanceBetweenStacks( int stackDistance ) { - if ( minDistanceBetweenStacks == null || stackDistance < minDistanceBetweenStacks ) { - minDistanceBetweenStacks = stackDistance; - } - if ( maxDistanceBetweenStacks == null || stackDistance > maxDistanceBetweenStacks ) { - maxDistanceBetweenStacks = stackDistance; - } - } - - private void updateReadLength( int readLength ) { - if ( minReadLength == null || readLength < minReadLength ) { - minReadLength = readLength; - } - if ( maxReadLength == null || readLength > maxReadLength ) { - maxReadLength = readLength; - } - } - - private boolean readHasCorrectSample( SAMRecord read ) { - return originalStream.getReadGroupID().equals(read.getAttribute("RG")); - } - - public void finalizeStats() { - if ( lastRead != null && ! lastRead.getReadUnmappedFlag() ) { - stacksPerContig.add(currentContigNumStacks); - updateReadsPerStack(currentStackNumReads); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/EfficiencyMonitoringThreadFactory.java b/public/java/src/org/broadinstitute/sting/utils/threading/EfficiencyMonitoringThreadFactory.java deleted file mode 100644 index b30198608..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/EfficiencyMonitoringThreadFactory.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package org.broadinstitute.sting.utils.threading; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Invariant; -import com.google.java.contract.Requires; -import org.apache.log4j.Logger; -import org.apache.log4j.Priority; -import org.broadinstitute.sting.utils.AutoFormattingTime; - -import java.lang.management.ManagementFactory; -import java.lang.management.ThreadInfo; -import java.lang.management.ThreadMXBean; -import java.util.ArrayList; -import java.util.EnumMap; -import java.util.List; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.TimeUnit; - -/** - * Creates threads that automatically monitor their efficiency via the parent ThreadEfficiencyMonitor - * - * User: depristo - * Date: 8/14/12 - * Time: 8:47 AM - */ -@Invariant({ - "activeThreads.size() <= nThreadsToCreate", - "countDownLatch.getCount() <= nThreadsToCreate", - "nThreadsCreated <= nThreadsToCreate" -}) -public class EfficiencyMonitoringThreadFactory extends ThreadEfficiencyMonitor implements ThreadFactory { - final int nThreadsToCreate; - final List activeThreads; - - int nThreadsCreated = 0; - - /** - * Counts down the number of active activeThreads whose runtime info hasn't been incorporated into - * times. Counts down from nThreadsToCreate to 0, at which point any code waiting - * on the final times is freed to run. - */ - final CountDownLatch countDownLatch; - - /** - * Create a new factory generating threads whose runtime and contention - * behavior is tracked in this factory. - * - * @param nThreadsToCreate the number of threads we will create in the factory before it's considered complete - */ - public EfficiencyMonitoringThreadFactory(final int nThreadsToCreate) { - super(); - if ( nThreadsToCreate <= 0 ) throw new IllegalArgumentException("nThreadsToCreate <= 0: " + nThreadsToCreate); - - this.nThreadsToCreate = nThreadsToCreate; - activeThreads = new ArrayList(nThreadsToCreate); - countDownLatch = new CountDownLatch(nThreadsToCreate); - } - - /** - * How many threads have been created by this factory so far? - * @return - */ - @Ensures("result >= 0") - public int getNThreadsCreated() { - return nThreadsCreated; - } - - /** - * Only useful for testing, so that we can wait for all of the threads in the factory to complete running - * - * @throws InterruptedException - */ - protected void waitForAllThreadsToComplete() throws InterruptedException { - countDownLatch.await(); - } - - @Ensures({ - "activeThreads.size() <= old(activeThreads.size())", - "! activeThreads.contains(thread)", - "countDownLatch.getCount() <= old(countDownLatch.getCount())" - }) - @Override - public synchronized void threadIsDone(final Thread thread) { - nThreadsAnalyzed++; - - if ( DEBUG ) logger.warn(" Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName()); - - super.threadIsDone(thread); - - // remove the thread from the list of active activeThreads, if it's in there, and decrement the countdown latch - if ( activeThreads.remove(thread) ) { - // one less thread is live for those blocking on all activeThreads to be complete - countDownLatch.countDown(); - if ( DEBUG ) logger.warn(" -> Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName()); - } - } - - /** - * Create a new thread from this factory - * - * @param runnable - * @return - */ - @Override - @Ensures({ - "activeThreads.size() > old(activeThreads.size())", - "activeThreads.contains(result)", - "nThreadsCreated == old(nThreadsCreated) + 1" - }) - public synchronized Thread newThread(final Runnable runnable) { - if ( activeThreads.size() >= nThreadsToCreate) - throw new IllegalStateException("Attempting to create more activeThreads than allowed by constructor argument nThreadsToCreate " + nThreadsToCreate); - - nThreadsCreated++; - final Thread myThread = new TrackingThread(runnable); - activeThreads.add(myThread); - return myThread; - } - - /** - * A wrapper around Thread that tracks the runtime of the thread and calls threadIsDone() when complete - */ - private class TrackingThread extends Thread { - private TrackingThread(Runnable runnable) { - super(runnable); - } - - @Override - public void run() { - super.run(); - threadIsDone(this); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/NamedThreadFactory.java b/public/java/src/org/broadinstitute/sting/utils/threading/NamedThreadFactory.java deleted file mode 100644 index b25375b87..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/NamedThreadFactory.java +++ /dev/null @@ -1,26 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import java.util.concurrent.ThreadFactory; - -/** - * Thread factor that produces threads with a given name pattern - * - * User: depristo - * Date: 9/5/12 - * Time: 9:22 PM - * - */ -public class NamedThreadFactory implements ThreadFactory { - static int id = 0; - final String format; - - public NamedThreadFactory(String format) { - this.format = format; - String.format(format, id); // test the name - } - - @Override - public Thread newThread(Runnable r) { - return new Thread(r, String.format(format, id++)); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/ThreadEfficiencyMonitor.java b/public/java/src/org/broadinstitute/sting/utils/threading/ThreadEfficiencyMonitor.java deleted file mode 100644 index 9159f5657..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/ThreadEfficiencyMonitor.java +++ /dev/null @@ -1,207 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Invariant; -import com.google.java.contract.Requires; -import org.apache.log4j.Logger; -import org.apache.log4j.Priority; -import org.broadinstitute.sting.utils.AutoFormattingTime; - -import java.lang.management.ManagementFactory; -import java.lang.management.ThreadInfo; -import java.lang.management.ThreadMXBean; -import java.util.EnumMap; -import java.util.concurrent.TimeUnit; - -/** - * Uses an MXBean to monitor thread efficiency - * - * Once the monitor is created, calls to threadIsDone() can be used to add information - * about the efficiency of the provided thread to this monitor. - * - * Provides simple print() for displaying efficiency information to a logger - * - * User: depristo - * Date: 8/22/12 - * Time: 10:48 AM - */ -@Invariant({"nThreadsAnalyzed >= 0"}) -public class ThreadEfficiencyMonitor { - protected static final boolean DEBUG = false; - protected static Logger logger = Logger.getLogger(EfficiencyMonitoringThreadFactory.class); - final EnumMap times = new EnumMap(State.class); - - /** - * The number of threads we've included in our efficiency monitoring - */ - int nThreadsAnalyzed = 0; - - /** - * The bean used to get the thread info about blocked and waiting times - */ - final ThreadMXBean bean; - - public ThreadEfficiencyMonitor() { - bean = ManagementFactory.getThreadMXBean(); - - // get the bean, and start tracking - if ( bean.isThreadContentionMonitoringSupported() ) - bean.setThreadContentionMonitoringEnabled(true); - else - logger.warn("Thread contention monitoring not supported, we cannot track GATK multi-threaded efficiency"); - //bean.setThreadCpuTimeEnabled(true); - - if ( bean.isThreadCpuTimeSupported() ) - bean.setThreadCpuTimeEnabled(true); - else - logger.warn("Thread CPU monitoring not supported, we cannot track GATK multi-threaded efficiency"); - - // initialize times to 0 - for ( final State state : State.values() ) - times.put(state, 0l); - } - - private static long nanoToMilli(final long timeInNano) { - return TimeUnit.NANOSECONDS.toMillis(timeInNano); - } - - /** - * Get the time spent in state across all threads created by this factory - * - * @param state to get information about - * @return the time in milliseconds - */ - @Ensures({"result >= 0"}) - public synchronized long getStateTime(final State state) { - return times.get(state); - } - - /** - * Get the total time spent in all states across all threads created by this factory - * - * @return the time in milliseconds - */ - @Ensures({"result >= 0"}) - public synchronized long getTotalTime() { - long total = 0; - for ( final long time : times.values() ) - total += time; - return total; - } - - /** - * Get the fraction of time spent in state across all threads created by this factory - * - * @return the percentage (0.0-100.0) of time spent in state over all state times of all threads - */ - @Ensures({"result >= 0.0", "result <= 100.0"}) - public synchronized double getStatePercent(final State state) { - return (100.0 * getStateTime(state)) / Math.max(getTotalTime(), 1); - } - - public int getnThreadsAnalyzed() { - return nThreadsAnalyzed; - } - - @Override - public synchronized String toString() { - final StringBuilder b = new StringBuilder(); - - b.append("total ").append(getTotalTime()).append(" "); - for ( final State state : State.values() ) { - b.append(state).append(" ").append(getStateTime(state)).append(" "); - } - - return b.toString(); - } - - /** - * Print usage information about threads from this factory to logger - * with the INFO priority - * - * @param logger - */ - public synchronized void printUsageInformation(final Logger logger) { - printUsageInformation(logger, Priority.INFO); - } - - /** - * Print usage information about threads from this factory to logger - * with the provided priority - * - * @param logger - */ - public synchronized void printUsageInformation(final Logger logger, final Priority priority) { - logger.debug("Number of threads monitored: " + getnThreadsAnalyzed()); - logger.debug("Total runtime " + new AutoFormattingTime(TimeUnit.MILLISECONDS.toSeconds(getTotalTime()))); - for ( final State state : State.values() ) { - logger.debug(String.format("\tPercent of time spent %s is %.2f", state.getUserFriendlyName(), getStatePercent(state))); - } - logger.log(priority, String.format("CPU efficiency : %6.2f%% of time spent %s", getStatePercent(State.USER_CPU), State.USER_CPU.getUserFriendlyName())); - logger.log(priority, String.format("Walker inefficiency : %6.2f%% of time spent %s", getStatePercent(State.BLOCKING), State.BLOCKING.getUserFriendlyName())); - logger.log(priority, String.format("I/O inefficiency : %6.2f%% of time spent %s", getStatePercent(State.WAITING_FOR_IO), State.WAITING_FOR_IO.getUserFriendlyName())); - logger.log(priority, String.format("Thread inefficiency : %6.2f%% of time spent %s", getStatePercent(State.WAITING), State.WAITING.getUserFriendlyName())); - } - - /** - * Update the information about completed thread that ran for runtime in milliseconds - * - * This method updates all of the key timing and tracking information in the factory so that - * thread can be retired. After this call the factory shouldn't have a pointer to the thread any longer - * - * @param thread the thread whose information we are updating - */ - @Ensures({ - "getTotalTime() >= old(getTotalTime())" - }) - public synchronized void threadIsDone(final Thread thread) { - nThreadsAnalyzed++; - - if ( DEBUG ) logger.warn("UpdateThreadInfo called"); - - final long threadID = thread.getId(); - final ThreadInfo info = bean.getThreadInfo(thread.getId()); - final long totalTimeNano = bean.getThreadCpuTime(threadID); - final long userTimeNano = bean.getThreadUserTime(threadID); - final long systemTimeNano = totalTimeNano - userTimeNano; - final long userTimeInMilliseconds = nanoToMilli(userTimeNano); - final long systemTimeInMilliseconds = nanoToMilli(systemTimeNano); - - if ( info != null ) { - if ( DEBUG ) logger.warn("Updating thread with user runtime " + userTimeInMilliseconds + " and system runtime " + systemTimeInMilliseconds + " of which blocked " + info.getBlockedTime() + " and waiting " + info.getWaitedTime()); - incTimes(State.BLOCKING, info.getBlockedTime()); - incTimes(State.WAITING, info.getWaitedTime()); - incTimes(State.USER_CPU, userTimeInMilliseconds); - incTimes(State.WAITING_FOR_IO, systemTimeInMilliseconds); - } - } - - /** - * Helper function that increments the times counter by by for state - * - * @param state - * @param by - */ - @Requires({"state != null", "by >= 0"}) - @Ensures("getTotalTime() == old(getTotalTime()) + by") - private synchronized void incTimes(final State state, final long by) { - times.put(state, times.get(state) + by); - } - - public enum State { - BLOCKING("blocking on synchronized data structures"), - WAITING("waiting on some other thread"), - USER_CPU("doing productive CPU work"), - WAITING_FOR_IO("waiting for I/O"); - - private final String userFriendlyName; - - private State(String userFriendlyName) { - this.userFriendlyName = userFriendlyName; - } - - public String getUserFriendlyName() { - return userFriendlyName; - } - } -} diff --git a/public/java/test/org/broadinstitute/sting/commandline/InvalidArgumentIntegrationTest.java b/public/java/test/org/broadinstitute/sting/commandline/InvalidArgumentIntegrationTest.java deleted file mode 100644 index 924c6ec5a..000000000 --- a/public/java/test/org/broadinstitute/sting/commandline/InvalidArgumentIntegrationTest.java +++ /dev/null @@ -1,41 +0,0 @@ -package org.broadinstitute.sting.commandline; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.WalkerTest; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import org.testng.annotations.Test; -import org.testng.annotations.DataProvider; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 8/31/12 - * Time: 11:03 AM - * To change this template use File | Settings | File Templates. - */ -public class InvalidArgumentIntegrationTest extends WalkerTest { - private static final String callsB36 = BaseTest.validationDataLocation + "lowpass.N3.chr1.raw.vcf"; - - private WalkerTest.WalkerTestSpec baseTest(String flag, String arg, Class exeption) { - return new WalkerTest.WalkerTestSpec("-T VariantsToTable -M 10 --variant:vcf " - + callsB36 + " -F POS,CHROM -R " - + b36KGReference + " -o %s " + flag + " " + arg, - 1, exeption); - - } - - @Test - public void testUnknownReadFilter() { - executeTest("UnknownReadFilter",baseTest("-rf","TestUnknownReadFilter", UserException.MalformedReadFilterException.class)); - } - - @Test - public void testMalformedWalkerArgs() { - executeTest("MalformedWalkerArgs", - new WalkerTest.WalkerTestSpec("-T UnknownWalkerName -M 10 --variant:vcf " - + callsB36 + " -F POS,CHROM -R " - + b36KGReference + " -o %s ", - 1, UserException.MalformedWalkerArgumentsException.class)); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java deleted file mode 100644 index 2717d014c..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.testng.annotations.Test; -import org.testng.annotations.DataProvider; -import org.testng.Assert; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.LinkedList; -import java.util.List; - -public class LevelingDownsamplerUnitTest extends BaseTest { - - private static class LevelingDownsamplerUniformStacksTest extends TestDataProvider { - public enum DataStructure { LINKED_LIST, ARRAY_LIST } - - int targetSize; - int numStacks; - int stackSize; - DataStructure dataStructure; - int expectedSize; - - public LevelingDownsamplerUniformStacksTest( int targetSize, int numStacks, int stackSize, DataStructure dataStructure ) { - super(LevelingDownsamplerUniformStacksTest.class); - - this.targetSize = targetSize; - this.numStacks = numStacks; - this.stackSize = stackSize; - this.dataStructure = dataStructure; - expectedSize = calculateExpectedDownsampledStackSize(); - - setName(String.format("%s: targetSize=%d numStacks=%d stackSize=%d dataStructure=%s expectedSize=%d", - getClass().getSimpleName(), targetSize, numStacks, stackSize, dataStructure, expectedSize)); - } - - public Collection> createStacks() { - Collection> stacks = new ArrayList>(); - - for ( int i = 1; i <= numStacks; i++ ) { - List stack = dataStructure == DataStructure.LINKED_LIST ? new LinkedList() : new ArrayList(); - - for ( int j = 1; j <= stackSize; j++ ) { - stack.add(new Object()); - } - - stacks.add(stack); - } - - return stacks; - } - - private int calculateExpectedDownsampledStackSize() { - int numItemsToRemove = numStacks * stackSize - targetSize; - - if ( numStacks == 0 ) { - return 0; - } - else if ( numItemsToRemove <= 0 ) { - return stackSize; - } - - return Math.max(1, stackSize - (numItemsToRemove / numStacks)); - } - } - - @DataProvider(name = "UniformStacksDataProvider") - public Object[][] createUniformStacksTestData() { - for ( int targetSize = 1; targetSize <= 10000; targetSize *= 10 ) { - for ( int numStacks = 0; numStacks <= 10; numStacks++ ) { - for ( int stackSize = 1; stackSize <= 1000; stackSize *= 10 ) { - for ( LevelingDownsamplerUniformStacksTest.DataStructure dataStructure : LevelingDownsamplerUniformStacksTest.DataStructure.values() ) { - new LevelingDownsamplerUniformStacksTest(targetSize, numStacks, stackSize, dataStructure); - } - } - } - } - - return LevelingDownsamplerUniformStacksTest.getTests(LevelingDownsamplerUniformStacksTest.class); - } - - @Test( dataProvider = "UniformStacksDataProvider" ) - public void testLevelingDownsamplerWithUniformStacks( LevelingDownsamplerUniformStacksTest test ) { - logger.warn("Running test: " + test); - - GenomeAnalysisEngine.resetRandomGenerator(); - - Downsampler> downsampler = new LevelingDownsampler, Object>(test.targetSize); - - downsampler.submit(test.createStacks()); - - if ( test.numStacks > 0 ) { - Assert.assertFalse(downsampler.hasFinalizedItems()); - Assert.assertTrue(downsampler.peekFinalized() == null); - Assert.assertTrue(downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekPending() != null); - } - else { - Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); - } - - downsampler.signalEndOfInput(); - - if ( test.numStacks > 0 ) { - Assert.assertTrue(downsampler.hasFinalizedItems()); - Assert.assertTrue(downsampler.peekFinalized() != null); - Assert.assertFalse(downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekPending() == null); - } - else { - Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); - } - - List> downsampledStacks = downsampler.consumeFinalizedItems(); - Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); - - Assert.assertEquals(downsampledStacks.size(), test.numStacks); - - int totalRemainingItems = 0; - for ( List stack : downsampledStacks ) { - Assert.assertTrue(Math.abs(stack.size() - test.expectedSize) <= 1); - totalRemainingItems += stack.size(); - } - - int numItemsReportedDiscarded = downsampler.getNumberOfDiscardedItems(); - int numItemsActuallyDiscarded = test.numStacks * test.stackSize - totalRemainingItems; - - Assert.assertEquals(numItemsReportedDiscarded, numItemsActuallyDiscarded); - - downsampler.reset(); - Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0); - - Assert.assertTrue(totalRemainingItems <= Math.max(test.targetSize, test.numStacks)); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIteratorUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIteratorUnitTest.java deleted file mode 100644 index b9022900b..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIteratorUnitTest.java +++ /dev/null @@ -1,298 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.iterators.VerifyingSamIterator; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.sam.ArtificialMultiSampleReadStream; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStream; -import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStreamAnalyzer; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.*; - -public class PerSampleDownsamplingReadsIteratorUnitTest extends BaseTest { - - private static class PerSampleDownsamplingReadsIteratorTest extends TestDataProvider { - - // TODO: tests should distinguish between variance across samples and variance within a sample - - private enum StreamDensity { - SPARSE (MAX_READ_LENGTH, MAX_READ_LENGTH * 2), - DENSE (1, MIN_READ_LENGTH), - MIXED (1, MAX_READ_LENGTH * 2), - UNIFORM_DENSE (1, 1), - UNIFORM_SPARSE (MAX_READ_LENGTH * 2, MAX_READ_LENGTH * 2); - - int minDistanceBetweenStacks; - int maxDistanceBetweenStacks; - - StreamDensity( int minDistanceBetweenStacks, int maxDistanceBetweenStacks ) { - this.minDistanceBetweenStacks = minDistanceBetweenStacks; - this.maxDistanceBetweenStacks = maxDistanceBetweenStacks; - } - - public String toString() { - return String.format("StreamDensity:%d-%d", minDistanceBetweenStacks, maxDistanceBetweenStacks); - } - } - - private enum StreamStackDepth { - NON_UNIFORM_LOW (1, 5), - NON_UNIFORM_HIGH (15, 20), - NON_UNIFORM_MIXED (1, 20), - UNIFORM_SINGLE (1, 1), - UNIFORM_LOW (2, 2), - UNIFORM_HIGH (20, 20), - UNIFORM_MEDIUM (10, 10); // should set target coverage to this value for testing - - int minReadsPerStack; - int maxReadsPerStack; - - StreamStackDepth( int minReadsPerStack, int maxReadsPerStack ) { - this.minReadsPerStack = minReadsPerStack; - this.maxReadsPerStack = maxReadsPerStack; - } - - public boolean isUniform() { - return minReadsPerStack == maxReadsPerStack; - } - - public String toString() { - return String.format("StreamStackDepth:%d-%d", minReadsPerStack, maxReadsPerStack); - } - } - - private enum StreamStacksPerContig { - UNIFORM(20, 20), - NON_UNIFORM(1, 30); - - int minStacksPerContig; - int maxStacksPerContig; - - StreamStacksPerContig( int minStacksPerContig, int maxStacksPerContig ) { - this.minStacksPerContig = minStacksPerContig; - this.maxStacksPerContig = maxStacksPerContig; - } - - public boolean isUniform() { - return minStacksPerContig == maxStacksPerContig; - } - - public String toString() { - return String.format("StreamStacksPerContig:%d-%d", minStacksPerContig, maxStacksPerContig); - } - } - - // Not interested in testing multiple ranges for the read lengths, as none of our current - // downsamplers are affected by read length - private static final int MIN_READ_LENGTH = 50; - private static final int MAX_READ_LENGTH = 150; - - private ReadsDownsamplerFactory downsamplerFactory; - private int targetCoverage; - private int numSamples; - private int minContigs; - private int maxContigs; - private StreamDensity streamDensity; - private StreamStackDepth streamStackDepth; - private StreamStacksPerContig streamStacksPerContig; - private double unmappedReadsFraction; - private int unmappedReadsCount; - private boolean verifySortedness; - - private ArtificialMultiSampleReadStream mergedReadStream; - private Map perSampleArtificialReadStreams; - private Map perSampleStreamAnalyzers; - private SAMFileHeader header; - - public PerSampleDownsamplingReadsIteratorTest( ReadsDownsamplerFactory downsamplerFactory, - int targetCoverage, - int numSamples, - int minContigs, - int maxContigs, - StreamDensity streamDensity, - StreamStackDepth streamStackDepth, - StreamStacksPerContig streamStacksPerContig, - double unmappedReadsFraction, - int unmappedReadsCount, - boolean verifySortedness ) { - super(PerSampleDownsamplingReadsIteratorTest.class); - - this.downsamplerFactory = downsamplerFactory; - this.targetCoverage = targetCoverage; - this.numSamples = numSamples; - this.minContigs = minContigs; - this.maxContigs = maxContigs; - this.streamDensity = streamDensity; - this.streamStackDepth = streamStackDepth; - this.streamStacksPerContig = streamStacksPerContig; - this.unmappedReadsFraction = unmappedReadsFraction; - this.unmappedReadsCount = unmappedReadsCount; - this.verifySortedness = verifySortedness; - - header = createHeader(); - createReadStreams(); - - setName(String.format("%s: targetCoverage=%d numSamples=%d minContigs=%d maxContigs=%d %s %s %s unmappedReadsFraction=%.2f unmappedReadsCount=%d verifySortedness=%b", - getClass().getSimpleName(), targetCoverage, numSamples, minContigs, maxContigs, streamDensity, streamStackDepth, streamStacksPerContig, unmappedReadsFraction, unmappedReadsCount, verifySortedness)); - } - - private SAMFileHeader createHeader() { - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(maxContigs, 1, (streamDensity.maxDistanceBetweenStacks + MAX_READ_LENGTH) * streamStacksPerContig.maxStacksPerContig + 100000); - List readGroups = new ArrayList(numSamples); - List sampleNames = new ArrayList(numSamples); - - for ( int i = 0; i < numSamples; i++ ) { - readGroups.add("ReadGroup" + i); - sampleNames.add("Sample" + i); - } - - return ArtificialSAMUtils.createEnumeratedReadGroups(header, readGroups, sampleNames); - } - - private void createReadStreams() { - perSampleArtificialReadStreams = new HashMap(numSamples); - perSampleStreamAnalyzers = new HashMap(numSamples); - - for (SAMReadGroupRecord readGroup : header.getReadGroups() ) { - String readGroupID = readGroup.getReadGroupId(); - String sampleName = readGroup.getSample(); - - int thisSampleNumContigs = MathUtils.randomIntegerInRange(minContigs, maxContigs); - int thisSampleStacksPerContig = MathUtils.randomIntegerInRange(streamStacksPerContig.minStacksPerContig, streamStacksPerContig.maxStacksPerContig); - - int thisSampleNumUnmappedReads = GenomeAnalysisEngine.getRandomGenerator().nextDouble() < unmappedReadsFraction ? unmappedReadsCount : 0; - - ArtificialSingleSampleReadStream thisSampleStream = new ArtificialSingleSampleReadStream(header, - readGroupID, - thisSampleNumContigs, - thisSampleStacksPerContig, - streamStackDepth.minReadsPerStack, - streamStackDepth.maxReadsPerStack, - streamDensity.minDistanceBetweenStacks, - streamDensity.maxDistanceBetweenStacks, - MIN_READ_LENGTH, - MAX_READ_LENGTH, - thisSampleNumUnmappedReads); - perSampleArtificialReadStreams.put(sampleName, thisSampleStream); - perSampleStreamAnalyzers.put(sampleName, new PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer(thisSampleStream, targetCoverage)); - } - - mergedReadStream = new ArtificialMultiSampleReadStream(perSampleArtificialReadStreams.values()); - } - - public void run() { - StingSAMIterator downsamplingIter = new PerSampleDownsamplingReadsIterator(mergedReadStream.getStingSAMIterator(), downsamplerFactory); - - if ( verifySortedness ) { - downsamplingIter = new VerifyingSamIterator(downsamplingIter); - } - - while ( downsamplingIter.hasNext() ) { - SAMRecord read = downsamplingIter.next(); - String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; - - ArtificialSingleSampleReadStreamAnalyzer analyzer = perSampleStreamAnalyzers.get(sampleName); - if ( analyzer != null ) { - analyzer.update(read); - } - else { - throw new ReviewedStingException("bug: stream analyzer for sample " + sampleName + " not found"); - } - } - - for ( Map.Entry analyzerEntry : perSampleStreamAnalyzers.entrySet() ) { - ArtificialSingleSampleReadStreamAnalyzer analyzer = analyzerEntry.getValue(); - analyzer.finalizeStats(); - - // Validate the downsampled read stream for each sample individually - analyzer.validate(); - } - - // Allow memory used by this test to be reclaimed: - mergedReadStream = null; - perSampleArtificialReadStreams = null; - perSampleStreamAnalyzers = null; - } - } - - @DataProvider(name = "PerSampleDownsamplingReadsIteratorTestDataProvider") - public Object[][] createPerSampleDownsamplingReadsIteratorTests() { - - GenomeAnalysisEngine.resetRandomGenerator(); - - // Some values don't vary across tests - int targetCoverage = PerSampleDownsamplingReadsIteratorTest.StreamStackDepth.UNIFORM_MEDIUM.minReadsPerStack; - ReadsDownsamplerFactory downsamplerFactory = new SimplePositionalDownsamplerFactory(targetCoverage); - int maxContigs = 3; - boolean verifySortedness = true; - - for ( int numSamples : Arrays.asList(1, 2, 10) ) { - for ( int minContigs = 1; minContigs <= maxContigs; minContigs++ ) { - for ( PerSampleDownsamplingReadsIteratorTest.StreamDensity streamDensity : PerSampleDownsamplingReadsIteratorTest.StreamDensity.values() ) { - for ( PerSampleDownsamplingReadsIteratorTest.StreamStackDepth streamStackDepth : PerSampleDownsamplingReadsIteratorTest.StreamStackDepth.values() ) { - for (PerSampleDownsamplingReadsIteratorTest.StreamStacksPerContig streamStacksPerContig : PerSampleDownsamplingReadsIteratorTest.StreamStacksPerContig.values() ) { - for ( double unmappedReadsFraction : Arrays.asList(0.0, 1.0, 0.5) ) { - for ( int unmappedReadsCount : Arrays.asList(1, 50) ) { - new PerSampleDownsamplingReadsIteratorTest(downsamplerFactory, - targetCoverage, - numSamples, - minContigs, - maxContigs, - streamDensity, - streamStackDepth, - streamStacksPerContig, - unmappedReadsFraction, - unmappedReadsCount, - verifySortedness); - } - } - } - } - } - } - } - - return PerSampleDownsamplingReadsIteratorTest.getTests(PerSampleDownsamplingReadsIteratorTest.class); - } - - @Test(dataProvider = "PerSampleDownsamplingReadsIteratorTestDataProvider") - public void runPerSampleDownsamplingReadsIteratorTest( PerSampleDownsamplingReadsIteratorTest test ) { - logger.warn("Running test: " + test); - - GenomeAnalysisEngine.resetRandomGenerator(); - test.run(); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer.java deleted file mode 100644 index 9cbd0db8a..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer.java +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStream; -import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStreamAnalyzer; - -/** - * Class for analyzing an artificial read stream that has been positionally downsampled, and verifying - * that the downsampling was done correctly without changing the stream in unexpected ways. - * - * @author David Roazen - */ -public class PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer extends ArtificialSingleSampleReadStreamAnalyzer { - private int targetCoverage; - - public PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer( ArtificialSingleSampleReadStream originalStream, int targetCoverage ) { - super(originalStream); - this.targetCoverage = targetCoverage; - } - - /** - * Overridden validate() method that checks for the effects of positional downsampling in addition to checking - * for whether the original properties of the stream not affected by downsampling have been preserved - */ - @Override - public void validate() { - if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) && originalStream.getNumUnmappedReads() == 0 ) { - if ( totalReads != 0 ) { - throw new ReviewedStingException("got reads from the stream, but the stream was configured to have 0 reads"); - } - return; // no further validation needed for the 0-reads case - } - else if ( totalReads == 0 ) { - throw new ReviewedStingException("got no reads from the stream, but the stream was configured to have > 0 reads"); - } - - if ( ! allSamplesMatch ) { - throw new ReviewedStingException("some reads had the wrong sample"); - } - - if ( numContigs != originalStream.getNumContigs() ) { - throw new ReviewedStingException("number of contigs not correct"); - } - - if ( stacksPerContig.size() != originalStream.getNumContigs() ) { - throw new ReviewedStingException(String.format("bug in analyzer code: calculated sizes for %d contigs even though there were only %d contigs", - stacksPerContig.size(), originalStream.getNumContigs())); - } - - for ( int contigStackCount : stacksPerContig ) { - if ( contigStackCount != originalStream.getNumStacksPerContig() ) { - throw new ReviewedStingException("contig had incorrect number of stacks"); - } - } - - if ( originalStream.getNumStacksPerContig() > 0 ) { - - // Check for the effects of positional downsampling: - int stackMinimumAfterDownsampling = Math.min(targetCoverage, originalStream.getMinReadsPerStack()); - int stackMaximumAfterDownsampling = targetCoverage; - - if ( minReadsPerStack < stackMinimumAfterDownsampling ) { - throw new ReviewedStingException("stack had fewer than the minimum number of reads after downsampling"); - } - if ( maxReadsPerStack > stackMaximumAfterDownsampling ) { - throw new ReviewedStingException("stack had more than the maximum number of reads after downsampling"); - } - } - else if ( minReadsPerStack != null || maxReadsPerStack != null ) { - throw new ReviewedStingException("bug in analyzer code: reads per stack was calculated even though 0 stacks per contig was specified"); - } - - if ( originalStream.getNumStacksPerContig() > 1 ) { - if ( minDistanceBetweenStacks < originalStream.getMinDistanceBetweenStacks() ) { - throw new ReviewedStingException("stacks were separated by less than the minimum distance"); - } - if ( maxDistanceBetweenStacks > originalStream.getMaxDistanceBetweenStacks() ) { - throw new ReviewedStingException("stacks were separated by more than the maximum distance"); - } - } - else if ( minDistanceBetweenStacks != null || maxDistanceBetweenStacks != null ) { - throw new ReviewedStingException("bug in analyzer code: distance between stacks was calculated even though numStacksPerContig was <= 1"); - } - - if ( minReadLength < originalStream.getMinReadLength() ) { - throw new ReviewedStingException("read was shorter than the minimum allowed length"); - } - if ( maxReadLength > originalStream.getMaxReadLength() ) { - throw new ReviewedStingException("read was longer than the maximum allowed length"); - } - - if ( numUnmappedReads != originalStream.getNumUnmappedReads() ) { - throw new ReviewedStingException(String.format("wrong number of unmapped reads: requested %d but saw %d", - originalStream.getNumUnmappedReads(), numUnmappedReads)); - } - - if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) && - numUnmappedReads != totalReads ) { - throw new ReviewedStingException("stream should have consisted only of unmapped reads, but saw some mapped reads"); - } - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java deleted file mode 100644 index 75d0448c4..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; -import org.testng.Assert; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - -public class ReservoirDownsamplerUnitTest extends BaseTest { - - private static class ReservoirDownsamplerTest extends TestDataProvider { - int reservoirSize; - int totalReads; - int expectedNumReadsAfterDownsampling; - int expectedNumDiscardedItems; - - public ReservoirDownsamplerTest( int reservoirSize, int totalReads ) { - super(ReservoirDownsamplerTest.class); - - this.reservoirSize = reservoirSize; - this.totalReads = totalReads; - - expectedNumReadsAfterDownsampling = Math.min(reservoirSize, totalReads); - expectedNumDiscardedItems = totalReads <= reservoirSize ? 0 : totalReads - reservoirSize; - - setName(String.format("%s: reservoirSize=%d totalReads=%d expectedNumReadsAfterDownsampling=%d expectedNumDiscardedItems=%d", - getClass().getSimpleName(), reservoirSize, totalReads, expectedNumReadsAfterDownsampling, expectedNumDiscardedItems)); - } - - public Collection createReads() { - Collection reads = new ArrayList(totalReads); - - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); - reads.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(totalReads, header, "foo", 0, 1, 100)); - - return reads; - } - } - - @DataProvider(name = "ReservoirDownsamplerTestDataProvider") - public Object[][] createReservoirDownsamplerTestData() { - for ( int reservoirSize = 1; reservoirSize <= 10000; reservoirSize *= 10 ) { - new ReservoirDownsamplerTest(reservoirSize, 0); - for ( int totalReads = 1; totalReads <= 10000; totalReads *= 10 ) { - new ReservoirDownsamplerTest(reservoirSize, totalReads); - } - } - - return ReservoirDownsamplerTest.getTests(ReservoirDownsamplerTest.class); - } - - @Test(dataProvider = "ReservoirDownsamplerTestDataProvider") - public void testReservoirDownsampler( ReservoirDownsamplerTest test ) { - logger.warn("Running test: " + test); - - GenomeAnalysisEngine.resetRandomGenerator(); - - ReadsDownsampler downsampler = new ReservoirDownsampler(test.reservoirSize); - - downsampler.submit(test.createReads()); - - if ( test.totalReads > 0 ) { - Assert.assertTrue(downsampler.hasFinalizedItems()); - Assert.assertTrue(downsampler.peekFinalized() != null); - Assert.assertFalse(downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekPending() == null); - } - else { - Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); - } - - downsampler.signalEndOfInput(); - - if ( test.totalReads > 0 ) { - Assert.assertTrue(downsampler.hasFinalizedItems()); - Assert.assertTrue(downsampler.peekFinalized() != null); - Assert.assertFalse(downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekPending() == null); - } - else { - Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); - } - - List downsampledReads = downsampler.consumeFinalizedItems(); - Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); - - Assert.assertEquals(downsampledReads.size(), test.expectedNumReadsAfterDownsampling); - - Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), test.expectedNumDiscardedItems); - Assert.assertEquals(test.totalReads - downsampledReads.size(), test.expectedNumDiscardedItems); - - downsampler.reset(); - Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerUnitTest.java deleted file mode 100644 index 5dc41b4a0..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerUnitTest.java +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.downsampling; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; -import org.testng.Assert; - -import java.util.*; - -public class SimplePositionalDownsamplerUnitTest extends BaseTest { - - private static class SimplePositionalDownsamplerTest extends TestDataProvider { - int targetCoverage; - int numStacks; - List stackSizes; - List expectedStackSizes; - boolean multipleContigs; - int totalInitialReads; - - public SimplePositionalDownsamplerTest( int targetCoverage, List stackSizes, boolean multipleContigs ) { - super(SimplePositionalDownsamplerTest.class); - - this.targetCoverage = targetCoverage; - this.numStacks = stackSizes.size(); - this.stackSizes = stackSizes; - this.multipleContigs = multipleContigs; - - calculateExpectedDownsampledStackSizes(); - - totalInitialReads = 0; - for ( Integer stackSize : stackSizes ) { - totalInitialReads += stackSize; - } - - setName(String.format("%s: targetCoverage=%d numStacks=%d stackSizes=%s expectedSizes=%s multipleContigs=%b", - getClass().getSimpleName(), targetCoverage, numStacks, stackSizes, expectedStackSizes, multipleContigs)); - } - - public Collection createReads() { - Collection reads = new ArrayList(); - SAMFileHeader header = multipleContigs ? - ArtificialSAMUtils.createArtificialSamHeader(2, 1, 1000000) : - ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); - - int refIndex = 0; - int alignmentStart = 1; - int readLength = 100; - - for ( int i = 0; i < numStacks; i++ ) { - if ( multipleContigs && refIndex == 0 && i >= numStacks / 2 ) { - refIndex++; - } - - reads.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(stackSizes.get(i), header, "foo", - refIndex, alignmentStart, readLength)); - - alignmentStart += 10; - } - - return reads; - } - - private void calculateExpectedDownsampledStackSizes() { - expectedStackSizes = new ArrayList(numStacks); - - for ( Integer stackSize : stackSizes ) { - int expectedSize = targetCoverage >= stackSize ? stackSize : targetCoverage; - expectedStackSizes.add(expectedSize); - } - } - } - - @DataProvider(name = "SimplePositionalDownsamplerTestDataProvider") - public Object[][] createSimplePositionalDownsamplerTestData() { - GenomeAnalysisEngine.resetRandomGenerator(); - - for ( int targetCoverage = 1; targetCoverage <= 10000; targetCoverage *= 10 ) { - for ( int contigs = 1; contigs <= 2; contigs++ ) { - for ( int numStacks = 0; numStacks <= 10; numStacks++ ) { - List stackSizes = new ArrayList(numStacks); - for ( int stack = 1; stack <= numStacks; stack++ ) { - stackSizes.add(GenomeAnalysisEngine.getRandomGenerator().nextInt(targetCoverage * 2) + 1); - } - new SimplePositionalDownsamplerTest(targetCoverage, stackSizes, contigs > 1); - } - } - } - - return SimplePositionalDownsamplerTest.getTests(SimplePositionalDownsamplerTest.class); - } - - @Test( dataProvider = "SimplePositionalDownsamplerTestDataProvider" ) - public void testSimplePostionalDownsampler( SimplePositionalDownsamplerTest test ) { - logger.warn("Running test: " + test); - - GenomeAnalysisEngine.resetRandomGenerator(); - - ReadsDownsampler downsampler = new SimplePositionalDownsampler(test.targetCoverage); - - downsampler.submit(test.createReads()); - - if ( test.numStacks > 1 ) { - Assert.assertTrue(downsampler.hasFinalizedItems()); - Assert.assertTrue(downsampler.peekFinalized() != null); - Assert.assertTrue(downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekPending() != null); - } - else if ( test.numStacks == 1 ) { - Assert.assertFalse(downsampler.hasFinalizedItems()); - Assert.assertTrue(downsampler.peekFinalized() == null); - Assert.assertTrue(downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekPending() != null); - } - else { - Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); - } - - downsampler.signalEndOfInput(); - - if ( test.numStacks > 0 ) { - Assert.assertTrue(downsampler.hasFinalizedItems()); - Assert.assertTrue(downsampler.peekFinalized() != null); - Assert.assertFalse(downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekPending() == null); - } - else { - Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); - } - - List downsampledReads = downsampler.consumeFinalizedItems(); - Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null); - - if ( test.numStacks == 0 ) { - Assert.assertTrue(downsampledReads.isEmpty()); - } - else { - List downsampledStackSizes = getDownsampledStackSizesAndVerifySortedness(downsampledReads); - - Assert.assertEquals(downsampledStackSizes.size(), test.numStacks); - Assert.assertEquals(downsampledStackSizes, test.expectedStackSizes); - - int numReadsActuallyEliminated = test.totalInitialReads - downsampledReads.size(); - int numReadsReportedEliminated = downsampler.getNumberOfDiscardedItems(); - Assert.assertEquals(numReadsActuallyEliminated, numReadsReportedEliminated); - } - - downsampler.reset(); - Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0); - } - - private List getDownsampledStackSizesAndVerifySortedness( List downsampledReads ) { - List stackSizes = new ArrayList(); - - if ( downsampledReads.isEmpty() ) { - return stackSizes; - } - - Iterator iter = downsampledReads.iterator(); - Assert.assertTrue(iter.hasNext()); - - SAMRecord previousRead = iter.next(); - int currentStackSize = 1; - - while ( iter.hasNext() ) { - SAMRecord currentRead = iter.next(); - - if ( currentRead.getReferenceIndex() > previousRead.getReferenceIndex() || currentRead.getAlignmentStart() > previousRead.getAlignmentStart() ) { - stackSizes.add(currentStackSize); - currentStackSize = 1; - } - else if ( currentRead.getReferenceIndex() < previousRead.getReferenceIndex() || currentRead.getAlignmentStart() < previousRead.getAlignmentStart() ) { - Assert.fail(String.format("Reads are out of order: %s %s", previousRead, currentRead)); - } - else { - currentStackSize++; - } - - previousRead = currentRead; - } - - stackSizes.add(currentStackSize); - return stackSizes; - } - - @Test - public void testSimplePositionalDownsamplerSignalNoMoreReadsBefore() { - ReadsDownsampler downsampler = new SimplePositionalDownsampler(1000); - - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); - - Collection readStack = new ArrayList(); - readStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(50, header, "foo", 0, 1, 100)); - downsampler.submit(readStack); - - Assert.assertFalse(downsampler.hasFinalizedItems()); - Assert.assertTrue(downsampler.peekFinalized() == null); - Assert.assertTrue(downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekPending() != null); - - SAMRecord laterRead = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 2, 100); - downsampler.signalNoMoreReadsBefore(laterRead); - - Assert.assertTrue(downsampler.hasFinalizedItems()); - Assert.assertTrue(downsampler.peekFinalized() != null); - Assert.assertFalse(downsampler.hasPendingItems()); - Assert.assertTrue(downsampler.peekPending() == null); - - List downsampledReads = downsampler.consumeFinalizedItems(); - - Assert.assertEquals(downsampledReads.size(), readStack.size()); - } - - @Test - public void testBasicUnmappedReadsSupport() { - ReadsDownsampler downsampler = new SimplePositionalDownsampler(100); - - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); - - Collection readStack = new ArrayList(); - readStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(200, header, "foo", SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, - SAMRecord.NO_ALIGNMENT_START, 100)); - for ( SAMRecord read : readStack ) { - Assert.assertTrue(read.getReadUnmappedFlag()); - } - - downsampler.submit(readStack); - downsampler.signalEndOfInput(); - - List downsampledReads = downsampler.consumeFinalizedItems(); - - // Unmapped reads should not get downsampled at all by the SimplePositionalDownsampler - Assert.assertEquals(downsampledReads.size(), readStack.size()); - - for ( SAMRecord read: downsampledReads ) { - Assert.assertTrue(read.getReadUnmappedFlag()); - } - } - - @Test - public void testMixedMappedAndUnmappedReadsSupport() { - ReadsDownsampler downsampler = new SimplePositionalDownsampler(100); - - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); - - Collection mappedReadStack = new ArrayList(); - mappedReadStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(200, header, "foo", 0, 1, 100)); - for ( SAMRecord read : mappedReadStack ) { - Assert.assertFalse(read.getReadUnmappedFlag()); - } - - Collection unmappedReadStack = new ArrayList(); - unmappedReadStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(200, header, "foo", SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, - SAMRecord.NO_ALIGNMENT_START, 100)); - for ( SAMRecord read : unmappedReadStack ) { - Assert.assertTrue(read.getReadUnmappedFlag()); - } - - downsampler.submit(mappedReadStack); - downsampler.submit(unmappedReadStack); - downsampler.signalEndOfInput(); - - List downsampledReads = downsampler.consumeFinalizedItems(); - - // Unmapped reads should not get downsampled at all by the SimplePositionalDownsampler - Assert.assertEquals(downsampledReads.size(), 300); - Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 100); - - int count = 1; - for ( SAMRecord read: downsampledReads ) { - if ( count <= 100 ) { - Assert.assertFalse(read.getReadUnmappedFlag()); - } - else { - Assert.assertTrue(read.getReadUnmappedFlag()); - } - - count++; - } - } - - @Test - public void testGATKSAMRecordSupport() { - ReadsDownsampler downsampler = new SimplePositionalDownsampler(1000); - - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); - - List reads = new ArrayList(); - for ( int i = 0; i < 10; i++ ) { - reads.add(ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 10, 20 * i + 10)); - } - - downsampler.submit(reads); - downsampler.signalEndOfInput(); - List downsampledReads = downsampler.consumeFinalizedItems(); - - Assert.assertEquals(downsampledReads.size(), 10); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimentalUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimentalUnitTest.java deleted file mode 100644 index c148bcf84..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimentalUnitTest.java +++ /dev/null @@ -1,546 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; -import org.broadinstitute.sting.gatk.filters.ReadFilter; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.*; - -/** - * testing of the experimental version of LocusIteratorByState - */ -public class LocusIteratorByStateExperimentalUnitTest extends BaseTest { - private static SAMFileHeader header; - private LocusIteratorByStateExperimental li; - private GenomeLocParser genomeLocParser; - - @BeforeClass - public void beforeClass() { - header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); - } - - private final LocusIteratorByStateExperimental makeLTBS(List reads, ReadProperties readAttributes) { - return new LocusIteratorByStateExperimental(new FakeCloseableIterator(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByStateExperimental.sampleListForSAMWithoutReadGroups()); - } - - private static ReadProperties createTestReadProperties() { - return createTestReadProperties(null); - } - - private static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) { - return new ReadProperties( - Collections.emptyList(), - new SAMFileHeader(), - false, - SAMFileReader.ValidationStringency.STRICT, - downsamplingMethod, - new ValidationExclusion(), - Collections.emptyList(), - Collections.emptyList(), - false, - (byte) -1 - ); - } - - private static class FakeCloseableIterator implements CloseableIterator { - Iterator iterator; - - public FakeCloseableIterator(Iterator it) { - iterator = it; - } - - @Override - public void close() { - return; - } - - @Override - public boolean hasNext() { - return iterator.hasNext(); - } - - @Override - public T next() { - return iterator.next(); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Don't remove!"); - } - } - - @Test - public void testXandEQOperators() { - final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; - final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'}; - - // create a test version of the Reads object - ReadProperties readAttributes = createTestReadProperties(); - - SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10); - r1.setReadBases(bases1); - r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20}); - r1.setCigarString("10M"); - - SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10); - r2.setReadBases(bases2); - r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20}); - r2.setCigarString("3=1X5=1X"); - - SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10); - r3.setReadBases(bases2); - r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20}); - r3.setCigarString("3=1X5M1X"); - - SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10); - r4.setReadBases(bases2); - r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20}); - r4.setCigarString("10M"); - - List reads = Arrays.asList(r1, r2, r3, r4); - - // create the iterator by state with the fake reads and fake records - li = makeLTBS(reads,readAttributes); - - while (li.hasNext()) { - AlignmentContext context = li.next(); - ReadBackedPileup pileup = context.getBasePileup(); - Assert.assertEquals(pileup.depthOfCoverage(), 4); - } - } - - @Test - public void testIndelsInRegularPileup() { - final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; - final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'}; - - // create a test version of the Reads object - ReadProperties readAttributes = createTestReadProperties(); - - SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); - before.setReadBases(bases); - before.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20}); - before.setCigarString("10M"); - - SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10); - during.setReadBases(indelBases); - during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20}); - during.setCigarString("4M2I6M"); - - SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10); - after.setReadBases(bases); - after.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20}); - after.setCigarString("10M"); - - List reads = Arrays.asList(before, during, after); - - // create the iterator by state with the fake reads and fake records - li = makeLTBS(reads,readAttributes); - - boolean foundIndel = false; - while (li.hasNext()) { - AlignmentContext context = li.next(); - ReadBackedPileup pileup = context.getBasePileup().getBaseFilteredPileup(10); - for (PileupElement p : pileup) { - if (p.isBeforeInsertion()) { - foundIndel = true; - Assert.assertEquals(p.getEventLength(), 2, "Wrong event length"); - Assert.assertEquals(p.getEventBases(), "CT", "Inserted bases are incorrect"); - break; - } - } - - } - - Assert.assertTrue(foundIndel,"Indel in pileup not found"); - } - - @Test - public void testWholeIndelReadInIsolation() { - final int firstLocus = 44367789; - - // create a test version of the Reads object - ReadProperties readAttributes = createTestReadProperties(); - - SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76); - indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76)); - indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76)); - indelOnlyRead.setCigarString("76I"); - - List reads = Arrays.asList(indelOnlyRead); - - // create the iterator by state with the fake reads and fake records - li = makeLTBS(reads, readAttributes); - - // Traditionally, reads that end with indels bleed into the pileup at the following locus. Verify that the next pileup contains this read - // and considers it to be an indel-containing read. - Assert.assertTrue(li.hasNext(),"Should have found a whole-indel read in the normal base pileup without extended events enabled"); - AlignmentContext alignmentContext = li.next(); - Assert.assertEquals(alignmentContext.getLocation().getStart(), firstLocus, "Base pileup is at incorrect location."); - ReadBackedPileup basePileup = alignmentContext.getBasePileup(); - Assert.assertEquals(basePileup.getReads().size(),1,"Pileup is of incorrect size"); - Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect"); - } - - /** - * Test to make sure that reads supporting only an indel (example cigar string: 76I) do - * not negatively influence the ordering of the pileup. - */ - @Test - public void testWholeIndelRead() { - final int firstLocus = 44367788, secondLocus = firstLocus + 1; - - SAMRecord leadingRead = ArtificialSAMUtils.createArtificialRead(header,"leading",0,firstLocus,76); - leadingRead.setReadBases(Utils.dupBytes((byte)'A',76)); - leadingRead.setBaseQualities(Utils.dupBytes((byte)'@',76)); - leadingRead.setCigarString("1M75I"); - - SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,secondLocus,76); - indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76)); - indelOnlyRead.setBaseQualities(Utils.dupBytes((byte)'@',76)); - indelOnlyRead.setCigarString("76I"); - - SAMRecord fullMatchAfterIndel = ArtificialSAMUtils.createArtificialRead(header,"fullMatch",0,secondLocus,76); - fullMatchAfterIndel.setReadBases(Utils.dupBytes((byte)'A',76)); - fullMatchAfterIndel.setBaseQualities(Utils.dupBytes((byte)'@',76)); - fullMatchAfterIndel.setCigarString("75I1M"); - - List reads = Arrays.asList(leadingRead, indelOnlyRead, fullMatchAfterIndel); - - // create the iterator by state with the fake reads and fake records - li = makeLTBS(reads, createTestReadProperties()); - int currentLocus = firstLocus; - int numAlignmentContextsFound = 0; - - while(li.hasNext()) { - AlignmentContext alignmentContext = li.next(); - Assert.assertEquals(alignmentContext.getLocation().getStart(),currentLocus,"Current locus returned by alignment context is incorrect"); - - if(currentLocus == firstLocus) { - List readsAtLocus = alignmentContext.getBasePileup().getReads(); - Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + currentLocus); - Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + currentLocus); - } - else if(currentLocus == secondLocus) { - List readsAtLocus = alignmentContext.getBasePileup().getReads(); - Assert.assertEquals(readsAtLocus.size(),2,"Wrong number of reads at locus " + currentLocus); - Assert.assertSame(readsAtLocus.get(0),indelOnlyRead,"indelOnlyRead absent from pileup at locus " + currentLocus); - Assert.assertSame(readsAtLocus.get(1),fullMatchAfterIndel,"fullMatchAfterIndel absent from pileup at locus " + currentLocus); - } - - currentLocus++; - numAlignmentContextsFound++; - } - - Assert.assertEquals(numAlignmentContextsFound, 2, "Found incorrect number of alignment contexts"); - } - - /** - * Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly - */ - @Test - public void testWholeIndelReadRepresentedTest() { - final int firstLocus = 44367788, secondLocus = firstLocus + 1; - - SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,secondLocus,1); - read1.setReadBases(Utils.dupBytes((byte) 'A', 1)); - read1.setBaseQualities(Utils.dupBytes((byte) '@', 1)); - read1.setCigarString("1I"); - - List reads = Arrays.asList(read1); - - // create the iterator by state with the fake reads and fake records - li = makeLTBS(reads, createTestReadProperties()); - - while(li.hasNext()) { - AlignmentContext alignmentContext = li.next(); - ReadBackedPileup p = alignmentContext.getBasePileup(); - Assert.assertTrue(p.getNumberOfElements() == 1); - PileupElement pe = p.iterator().next(); - Assert.assertTrue(pe.isBeforeInsertion()); - Assert.assertFalse(pe.isAfterInsertion()); - Assert.assertEquals(pe.getEventBases(), "A"); - } - - SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10); - read2.setReadBases(Utils.dupBytes((byte) 'A', 10)); - read2.setBaseQualities(Utils.dupBytes((byte) '@', 10)); - read2.setCigarString("10I"); - - reads = Arrays.asList(read2); - - // create the iterator by state with the fake reads and fake records - li = makeLTBS(reads, createTestReadProperties()); - - while(li.hasNext()) { - AlignmentContext alignmentContext = li.next(); - ReadBackedPileup p = alignmentContext.getBasePileup(); - Assert.assertTrue(p.getNumberOfElements() == 1); - PileupElement pe = p.iterator().next(); - Assert.assertTrue(pe.isBeforeInsertion()); - Assert.assertFalse(pe.isAfterInsertion()); - Assert.assertEquals(pe.getEventBases(), "AAAAAAAAAA"); - } - } - - //////////////////////////////////////////// - // comprehensive LIBS/PileupElement tests // - //////////////////////////////////////////// - - private static final int IS_BEFORE_DELETED_BASE_FLAG = 1; - private static final int IS_BEFORE_DELETION_START_FLAG = 2; - private static final int IS_AFTER_DELETED_BASE_FLAG = 4; - private static final int IS_AFTER_DELETION_END_FLAG = 8; - private static final int IS_BEFORE_INSERTION_FLAG = 16; - private static final int IS_AFTER_INSERTION_FLAG = 32; - private static final int IS_NEXT_TO_SOFTCLIP_FLAG = 64; - - private static class LIBSTest { - - - final String cigar; - final int readLength; - final List offsets; - final List flags; - - private LIBSTest(final String cigar, final int readLength, final List offsets, final List flags) { - this.cigar = cigar; - this.readLength = readLength; - this.offsets = offsets; - this.flags = flags; - } - } - - @DataProvider(name = "LIBSTest") - public Object[][] createLIBSTestData() { - return new Object[][]{ - {new LIBSTest("1I", 1, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))}, - {new LIBSTest("10I", 10, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))}, - {new LIBSTest("2M2I2M", 6, Arrays.asList(0,1,4,5), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG,IS_AFTER_INSERTION_FLAG,0))}, - {new LIBSTest("2M2I", 4, Arrays.asList(0,1), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG))}, - //TODO -- uncomment these when LIBS is fixed - //{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))}, - //{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))}, - //{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))}, - {new LIBSTest("1M2D2M", 3, Arrays.asList(0,1,2), Arrays.asList(IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG,0))}, - {new LIBSTest("1S1M", 2, Arrays.asList(1), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))}, - {new LIBSTest("1M1S", 2, Arrays.asList(0), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))}, - {new LIBSTest("1S1M1I", 3, Arrays.asList(1), Arrays.asList(IS_BEFORE_INSERTION_FLAG | IS_NEXT_TO_SOFTCLIP_FLAG))} - }; - } - - @Test(dataProvider = "LIBSTest") - public void testLIBS(LIBSTest params) { - final int locus = 44367788; - - SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength); - read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength)); - read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength)); - read.setCigarString(params.cigar); - - // create the iterator by state with the fake reads and fake records - li = makeLTBS(Arrays.asList(read), createTestReadProperties()); - - int offset = 0; - while ( li.hasNext() ) { - AlignmentContext alignmentContext = li.next(); - ReadBackedPileup p = alignmentContext.getBasePileup(); - Assert.assertTrue(p.getNumberOfElements() == 1); - PileupElement pe = p.iterator().next(); - - final int flag = params.flags.get(offset); - Assert.assertEquals(pe.isBeforeDeletedBase(), (flag & IS_BEFORE_DELETED_BASE_FLAG) != 0); - Assert.assertEquals(pe.isBeforeDeletionStart(), (flag & IS_BEFORE_DELETION_START_FLAG) != 0); - Assert.assertEquals(pe.isAfterDeletedBase(), (flag & IS_AFTER_DELETED_BASE_FLAG) != 0); - Assert.assertEquals(pe.isAfterDeletionEnd(), (flag & IS_AFTER_DELETION_END_FLAG) != 0); - Assert.assertEquals(pe.isBeforeInsertion(), (flag & IS_BEFORE_INSERTION_FLAG) != 0); - Assert.assertEquals(pe.isAfterInsertion(), (flag & IS_AFTER_INSERTION_FLAG) != 0); - Assert.assertEquals(pe.isNextToSoftClip(), (flag & IS_NEXT_TO_SOFTCLIP_FLAG) != 0); - - Assert.assertEquals(pe.getOffset(), params.offsets.get(offset).intValue()); - - offset++; - } - } - - //////////////////////////////////////////////// - // End comprehensive LIBS/PileupElement tests // - //////////////////////////////////////////////// - - - /////////////////////////////////////// - // Read State Manager Tests // - /////////////////////////////////////// - - private class PerSampleReadStateManagerTest extends TestDataProvider { - private List readCountsPerAlignmentStart; - private List reads; - private List> recordStatesByAlignmentStart; - private int removalInterval; - - public PerSampleReadStateManagerTest( List readCountsPerAlignmentStart, int removalInterval ) { - super(PerSampleReadStateManagerTest.class); - - this.readCountsPerAlignmentStart = readCountsPerAlignmentStart; - this.removalInterval = removalInterval; - - reads = new ArrayList(); - recordStatesByAlignmentStart = new ArrayList>(); - - setName(String.format("%s: readCountsPerAlignmentStart: %s removalInterval: %d", - getClass().getSimpleName(), readCountsPerAlignmentStart, removalInterval)); - } - - public void run() { - LocusIteratorByStateExperimental libs = makeLTBS(new ArrayList(), createTestReadProperties()); - LocusIteratorByStateExperimental.ReadStateManager readStateManager = - libs.new ReadStateManager(new ArrayList().iterator()); - LocusIteratorByStateExperimental.ReadStateManager.PerSampleReadStateManager perSampleReadStateManager = - readStateManager.new PerSampleReadStateManager(); - - makeReads(); - - for ( ArrayList stackRecordStates : recordStatesByAlignmentStart ) { - perSampleReadStateManager.addStatesAtNextAlignmentStart(stackRecordStates); - } - - // read state manager should have the right number of reads - Assert.assertEquals(reads.size(), perSampleReadStateManager.size()); - - Iterator originalReadsIterator = reads.iterator(); - Iterator recordStateIterator = perSampleReadStateManager.iterator(); - int recordStateCount = 0; - int numReadStatesRemoved = 0; - - // Do a first-pass validation of the record state iteration by making sure we get back everything we - // put in, in the same order, doing any requested removals of read states along the way - while ( recordStateIterator.hasNext() ) { - LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next(); - recordStateCount++; - SAMRecord readFromPerSampleReadStateManager = readState.getRead(); - - Assert.assertTrue(originalReadsIterator.hasNext()); - SAMRecord originalRead = originalReadsIterator.next(); - - // The read we get back should be literally the same read in memory as we put in - Assert.assertTrue(originalRead == readFromPerSampleReadStateManager); - - // If requested, remove a read state every removalInterval states - if ( removalInterval > 0 && recordStateCount % removalInterval == 0 ) { - recordStateIterator.remove(); - numReadStatesRemoved++; - } - } - - Assert.assertFalse(originalReadsIterator.hasNext()); - - // If we removed any read states, do a second pass through the read states to make sure the right - // states were removed - if ( numReadStatesRemoved > 0 ) { - Assert.assertEquals(perSampleReadStateManager.size(), reads.size() - numReadStatesRemoved); - - originalReadsIterator = reads.iterator(); - recordStateIterator = perSampleReadStateManager.iterator(); - int readCount = 0; - int readStateCount = 0; - - // Match record states with the reads that should remain after removal - while ( recordStateIterator.hasNext() ) { - LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next(); - readStateCount++; - SAMRecord readFromPerSampleReadStateManager = readState.getRead(); - - Assert.assertTrue(originalReadsIterator.hasNext()); - - SAMRecord originalRead = originalReadsIterator.next(); - readCount++; - - if ( readCount % removalInterval == 0 ) { - originalRead = originalReadsIterator.next(); // advance to next read, since the previous one should have been discarded - readCount++; - } - - // The read we get back should be literally the same read in memory as we put in (after accounting for removals) - Assert.assertTrue(originalRead == readFromPerSampleReadStateManager); - } - - Assert.assertEquals(readStateCount, reads.size() - numReadStatesRemoved); - } - - // Allow memory used by this test to be reclaimed - readCountsPerAlignmentStart = null; - reads = null; - recordStatesByAlignmentStart = null; - } - - private void makeReads() { - int alignmentStart = 1; - - for ( int readsThisStack : readCountsPerAlignmentStart ) { - ArrayList stackReads = new ArrayList(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100))); - ArrayList stackRecordStates = new ArrayList(); - - for ( SAMRecord read : stackReads ) { - stackRecordStates.add(new LocusIteratorByStateExperimental.SAMRecordState(read)); - } - - reads.addAll(stackReads); - recordStatesByAlignmentStart.add(stackRecordStates); - } - } - } - - @DataProvider(name = "PerSampleReadStateManagerTestDataProvider") - public Object[][] createPerSampleReadStateManagerTests() { - for ( List thisTestReadStateCounts : Arrays.asList( Arrays.asList(1), - Arrays.asList(2), - Arrays.asList(10), - Arrays.asList(1, 1), - Arrays.asList(2, 2), - Arrays.asList(10, 10), - Arrays.asList(1, 10), - Arrays.asList(10, 1), - Arrays.asList(1, 1, 1), - Arrays.asList(2, 2, 2), - Arrays.asList(10, 10, 10), - Arrays.asList(1, 1, 1, 1, 1, 1), - Arrays.asList(10, 10, 10, 10, 10, 10), - Arrays.asList(1, 2, 10, 1, 2, 10) - ) ) { - - for ( int removalInterval : Arrays.asList(0, 2, 3) ) { - new PerSampleReadStateManagerTest(thisTestReadStateCounts, removalInterval); - } - } - - return PerSampleReadStateManagerTest.getTests(PerSampleReadStateManagerTest.class); - } - - @Test(dataProvider = "PerSampleReadStateManagerTestDataProvider") - public void runPerSampleReadStateManagerTest( PerSampleReadStateManagerTest test ) { - logger.warn("Running test: " + test); - - test.run(); - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/LegacyReservoirDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/LegacyReservoirDownsamplerUnitTest.java deleted file mode 100644 index 5b052454a..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/LegacyReservoirDownsamplerUnitTest.java +++ /dev/null @@ -1,166 +0,0 @@ -package org.broadinstitute.sting.utils; - -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.testng.Assert; -import org.testng.annotations.Test; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileHeader; - -import java.util.*; - -/** - * Basic tests to prove the integrity of the reservoir downsampler. - * At the moment, always run tests on SAM records as that's the task - * for which the downsampler was conceived. - * - * @author mhanna - * @version 0.1 - */ -public class LegacyReservoirDownsamplerUnitTest { - private static final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,200); - - - @Test - public void testEmptyIterator() { - ReservoirDownsampler downsampler = new ReservoirDownsampler(1); - Assert.assertTrue(downsampler.isEmpty(),"Downsampler is not empty but should be."); - } - - @Test - public void testOneElementWithPoolSizeOne() { - List reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - ReservoirDownsampler downsampler = new ReservoirDownsampler(1); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - Collection batchedReads = downsampler.getDownsampledContents(); - Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads"); - Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read"); - } - - @Test - public void testOneElementWithPoolSizeGreaterThanOne() { - List reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - ReservoirDownsampler downsampler = new ReservoirDownsampler(5); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - Collection batchedReads = downsampler.getDownsampledContents(); - Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads"); - Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read"); - - } - - @Test - public void testPoolFilledPartially() { - List reads = new ArrayList(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76)); - ReservoirDownsampler downsampler = new ReservoirDownsampler(5); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - List batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 3, "Downsampler is returning the wrong number of reads"); - - Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect"); - Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect"); - Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect"); - } - - @Test - public void testPoolFilledExactly() { - List reads = new ArrayList(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76)); - ReservoirDownsampler downsampler = new ReservoirDownsampler(5); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - List batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 5, "Downsampler is returning the wrong number of reads"); - Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read"); - - Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect"); - Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect"); - Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect"); - Assert.assertSame(batchedReads.get(3), reads.get(3), "Downsampler read 4 is incorrect"); - Assert.assertSame(batchedReads.get(4), reads.get(4), "Downsampler read 5 is incorrect"); - } - - @Test - public void testLargerPileWithZeroElementPool() { - List reads = new ArrayList(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76)); - ReservoirDownsampler downsampler = new ReservoirDownsampler(0); - downsampler.addAll(reads); - - Assert.assertTrue(downsampler.isEmpty(),"Downsampler isn't empty but should be"); - List batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 0, "Downsampler is returning the wrong number of reads"); - } - - @Test - public void testLargerPileWithSingleElementPool() { - List reads = new ArrayList(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76)); - ReservoirDownsampler downsampler = new ReservoirDownsampler(1); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - List batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads"); - Assert.assertTrue(reads.contains(batchedReads.get(0)),"Downsampler is returning a bad read."); - } - - @Test - public void testFillingAcrossLoci() { - List reads = new ArrayList(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); - ReservoirDownsampler downsampler = new ReservoirDownsampler(5); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - List batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads"); - Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read."); - - reads.clear(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,2,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,2,76)); - - downsampler.clear(); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads"); - Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read."); - Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read."); - - reads.clear(); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,3,76)); - reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,3,76)); - - downsampler.clear(); - downsampler.addAll(reads); - - Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be"); - batchedReads = new ArrayList(downsampler.getDownsampledContents()); - Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads"); - Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read."); - Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read."); - } - -} diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/InputProducerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/InputProducerUnitTest.java deleted file mode 100644 index b3365c13c..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/InputProducerUnitTest.java +++ /dev/null @@ -1,71 +0,0 @@ -package org.broadinstitute.sting.utils.nanoScheduler; - -import org.broadinstitute.sting.BaseTest; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingDeque; - -/** - * UnitTests for the InputProducer - * - * User: depristo - * Date: 8/24/12 - * Time: 11:25 AM - * To change this template use File | Settings | File Templates. - */ -public class InputProducerUnitTest extends BaseTest { - @DataProvider(name = "InputProducerTest") - public Object[][] createInputProducerTest() { - List tests = new ArrayList(); - - for ( final int nElements : Arrays.asList(0, 1, 10, 100, 1000, 10000, 100000) ) { - for ( final int queueSize : Arrays.asList(1, 10, 100) ) { - tests.add(new Object[]{ nElements, queueSize }); - } - } - - return tests.toArray(new Object[][]{}); - } - - @Test(enabled = true, dataProvider = "InputProducerTest", timeOut = NanoSchedulerUnitTest.NANO_SCHEDULE_MAX_RUNTIME) - public void testInputProducer(final int nElements, final int queueSize) throws InterruptedException { - final List elements = new ArrayList(nElements); - for ( int i = 0; i < nElements; i++ ) elements.add(i); - - final LinkedBlockingDeque.InputValue> readQueue = - new LinkedBlockingDeque.InputValue>(queueSize); - - final InputProducer ip = new InputProducer(elements.iterator(), null, readQueue); - - final ExecutorService es = Executors.newSingleThreadExecutor(); - es.submit(ip); - - int lastValue = -1; - int nRead = 0; - while ( true ) { - final int observedQueueSize = readQueue.size(); - Assert.assertTrue(observedQueueSize <= queueSize, - "Reader is enqueuing more elements " + observedQueueSize + " than allowed " + queueSize); - - final InputProducer.InputValue value = readQueue.take(); - if ( value.isLast() ) { - Assert.assertEquals(nRead, nElements, "Number of input values " + nRead + " not all that are expected " + nElements); - Assert.assertEquals(readQueue.size(), 0, "Last queue element found but queue contains more values!"); - break; - } else { - Assert.assertTrue(lastValue < value.getValue(), "Read values coming out of order!"); - final int expected = lastValue + 1; - Assert.assertEquals((int)value.getValue(), expected, "Value observed " + value.getValue() + " not equal to the expected value " + expected); - nRead++; - lastValue = value.getValue(); - } - } - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java deleted file mode 100644 index 47dcc1d5e..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java +++ /dev/null @@ -1,182 +0,0 @@ -package org.broadinstitute.sting.utils.nanoScheduler; - -import org.apache.log4j.BasicConfigurator; -import org.broadinstitute.sting.BaseTest; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; - -/** - * UnitTests for the NanoScheduler - * - * User: depristo - * Date: 8/24/12 - * Time: 11:25 AM - * To change this template use File | Settings | File Templates. - */ -public class NanoSchedulerUnitTest extends BaseTest { - public static final int NANO_SCHEDULE_MAX_RUNTIME = 60000; - - private static class Map2x implements NSMapFunction { - @Override public Integer apply(Integer input) { return input * 2; } - } - - private static class ReduceSum implements NSReduceFunction { - int prevOne = Integer.MIN_VALUE; - - @Override public Integer apply(Integer one, Integer sum) { - Assert.assertTrue(prevOne < one, "Reduce came in out of order. Prev " + prevOne + " cur " + one); - return one + sum; - } - } - - private static class ProgressCallback implements NSProgressFunction { - int callBacks = 0; - - @Override - public void progress(Integer lastMapInput) { - callBacks++; - } - } - - - private static int sum2x(final int start, final int end) { - int sum = 0; - for ( int i = start; i < end; i++ ) - sum += 2 * i; - return sum; - } - - private static class NanoSchedulerBasicTest extends TestDataProvider { - final int bufferSize, nThreads, start, end, expectedResult; - - public NanoSchedulerBasicTest(final int bufferSize, final int nThreads, final int start, final int end) { - super(NanoSchedulerBasicTest.class); - this.bufferSize = bufferSize; - this.nThreads = nThreads; - this.start = start; - this.end = end; - this.expectedResult = sum2x(start, end); - setName(String.format("%s nt=%d buf=%d start=%d end=%d sum=%d", - getClass().getSimpleName(), nThreads, bufferSize, start, end, expectedResult)); - } - - public Iterator makeReader() { - final List ints = new ArrayList(); - for ( int i = start; i < end; i++ ) - ints.add(i); - return ints.iterator(); - } - - public int nExpectedCallbacks() { - int nElements = Math.max(end - start, 0); - return nElements / bufferSize; - } - - public Map2x makeMap() { return new Map2x(); } - public Integer initReduce() { return 0; } - public ReduceSum makeReduce() { return new ReduceSum(); } - } - - static NanoSchedulerBasicTest exampleTest = null; - @DataProvider(name = "NanoSchedulerBasicTest") - public Object[][] createNanoSchedulerBasicTest() { - for ( final int bufferSize : Arrays.asList(1, 10, 1000, 1000000) ) { - for ( final int nt : Arrays.asList(1, 2, 4) ) { - for ( final int start : Arrays.asList(0) ) { - for ( final int end : Arrays.asList(0, 1, 2, 11, 10000, 100000) ) { - exampleTest = new NanoSchedulerBasicTest(bufferSize, nt, start, end); - } - } - } - } - - return NanoSchedulerBasicTest.getTests(NanoSchedulerBasicTest.class); - } - - @Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", timeOut = NANO_SCHEDULE_MAX_RUNTIME) - public void testSingleThreadedNanoScheduler(final NanoSchedulerBasicTest test) throws InterruptedException { - logger.warn("Running " + test); - if ( test.nThreads == 1 ) - testNanoScheduler(test); - } - - @Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", timeOut = NANO_SCHEDULE_MAX_RUNTIME, dependsOnMethods = "testSingleThreadedNanoScheduler") - public void testMultiThreadedNanoScheduler(final NanoSchedulerBasicTest test) throws InterruptedException { - logger.warn("Running " + test); - if ( test.nThreads >= 1 ) - testNanoScheduler(test); - } - - private void testNanoScheduler(final NanoSchedulerBasicTest test) throws InterruptedException { - final NanoScheduler nanoScheduler = - new NanoScheduler(test.bufferSize, test.nThreads); - - final ProgressCallback callback = new ProgressCallback(); - nanoScheduler.setProgressFunction(callback); - - Assert.assertEquals(nanoScheduler.getInputBufferSize(), test.bufferSize, "inputBufferSize argument"); - Assert.assertEquals(nanoScheduler.getnThreads(), test.nThreads, "nThreads argument"); - - final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce()); - Assert.assertNotNull(sum); - Assert.assertEquals((int)sum, test.expectedResult, "NanoScheduler sum not the same as calculated directly"); - - Assert.assertTrue(callback.callBacks >= test.nExpectedCallbacks(), "Not enough callbacks detected. Expected at least " + test.nExpectedCallbacks() + " but saw only " + callback.callBacks); - nanoScheduler.shutdown(); - } - - @Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", dependsOnMethods = "testMultiThreadedNanoScheduler", timeOut = NANO_SCHEDULE_MAX_RUNTIME) - public void testNanoSchedulerInLoop(final NanoSchedulerBasicTest test) throws InterruptedException { - if ( test.bufferSize > 1) { - logger.warn("Running " + test); - - final NanoScheduler nanoScheduler = - new NanoScheduler(test.bufferSize, test.nThreads); - - // test reusing the scheduler - for ( int i = 0; i < 10; i++ ) { - final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce()); - Assert.assertNotNull(sum); - Assert.assertEquals((int)sum, test.expectedResult, "NanoScheduler sum not the same as calculated directly"); - } - - nanoScheduler.shutdown(); - } - } - - @Test(timeOut = NANO_SCHEDULE_MAX_RUNTIME) - public void testShutdown() throws InterruptedException { - final NanoScheduler nanoScheduler = new NanoScheduler(1, 2); - Assert.assertFalse(nanoScheduler.isShutdown(), "scheduler should be alive"); - nanoScheduler.shutdown(); - Assert.assertTrue(nanoScheduler.isShutdown(), "scheduler should be dead"); - } - - @Test(expectedExceptions = IllegalStateException.class, timeOut = NANO_SCHEDULE_MAX_RUNTIME) - public void testShutdownExecuteFailure() throws InterruptedException { - final NanoScheduler nanoScheduler = new NanoScheduler(1, 2); - nanoScheduler.shutdown(); - nanoScheduler.execute(exampleTest.makeReader(), exampleTest.makeMap(), exampleTest.initReduce(), exampleTest.makeReduce()); - } - - public static void main(String [ ] args) { - org.apache.log4j.Logger logger = org.apache.log4j.Logger.getRootLogger(); - BasicConfigurator.configure(); - logger.setLevel(org.apache.log4j.Level.DEBUG); - - final NanoSchedulerBasicTest test = new NanoSchedulerBasicTest(1000, Integer.valueOf(args[0]), 0, Integer.valueOf(args[1])); - final NanoScheduler nanoScheduler = - new NanoScheduler(test.bufferSize, test.nThreads); - nanoScheduler.setDebug(true); - - final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce()); - System.out.printf("Sum = %d, expected =%d%n", sum, test.expectedResult); - nanoScheduler.shutdown(); - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/ReducerThreadUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/ReducerThreadUnitTest.java deleted file mode 100644 index 61d1330bc..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/ReducerThreadUnitTest.java +++ /dev/null @@ -1,94 +0,0 @@ -package org.broadinstitute.sting.utils.nanoScheduler; - -import org.broadinstitute.sting.BaseTest; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.*; - -/** - * UnitTests for the InputProducer - * - * User: depristo - * Date: 8/24/12 - * Time: 11:25 AM - * To change this template use File | Settings | File Templates. - */ -public class ReducerThreadUnitTest extends BaseTest { - @DataProvider(name = "ReducerThreadTest") - public Object[][] createReducerThreadTest() { - List tests = new ArrayList(); - - for ( final int nElements : Arrays.asList(0, 1, 10, 100, 1000, 10000, 100000) ) { - tests.add(new Object[]{ nElements }); - } - - return tests.toArray(new Object[][]{}); - } - - @Test(enabled = true, dataProvider = "ReducerThreadTest", timeOut = NanoSchedulerUnitTest.NANO_SCHEDULE_MAX_RUNTIME) - public void testReducerThreadTest(final int nElements) throws Exception { - List values = new ArrayList(nElements); - List jobIDs = new ArrayList(nElements); - for ( int i = 0; i < nElements; i++ ) { - values.add(i); - jobIDs.add(i); - } - - runTests(values, jobIDs); - } - - @Test(enabled = true, timeOut = NanoSchedulerUnitTest.NANO_SCHEDULE_MAX_RUNTIME, expectedExceptions = ExecutionException.class) - public void testReducerThreadTestByJobOrder() throws Exception { - runTests(Arrays.asList(0, 1, 2), Arrays.asList(1, 3, 2)); - } - - private void runTests( final List mapValues, final List jobIDs) throws Exception { - final LinkedBlockingDeque>> mapResultsQueue = - new LinkedBlockingDeque>>(mapValues.size()+1); - - for ( int i = 0; i < mapValues.size(); i++ ) { - final int value = mapValues.get(i); - final int jobID = jobIDs.get(i); - final MapResult mapResult = new MapResult(value, jobID); - mapResultsQueue.add(new FutureValue>(mapResult)); - } - mapResultsQueue.add(new FutureValue>(new MapResult())); - - final ReduceSumTest reduce = new ReduceSumTest(mapResultsQueue); - final ReducerThread thread - = new ReducerThread(reduce, null, 0, mapResultsQueue); - - final ExecutorService es = Executors.newSingleThreadExecutor(); - final Future value = es.submit(thread); - value.get(); - - Assert.assertEquals(reduce.nRead, mapValues.size()); - } - - public class ReduceSumTest implements NSReduceFunction { - final LinkedBlockingDeque>> mapResultsQueue; - int nRead = 0; - int lastValue = -1; - - public ReduceSumTest(LinkedBlockingDeque>> mapResultsQueue) { - this.mapResultsQueue = mapResultsQueue; - } - - @Override public Integer apply(Integer one, Integer sum) { - Assert.assertTrue(lastValue < one, "Reduce came in out of order. Prev " + lastValue + " cur " + one); - - Assert.assertTrue(lastValue < one, "Read values coming out of order!"); - final int expected = lastValue + 1; - Assert.assertEquals((int)one, expected, "Value observed " + one + " not equal to the expected value " + expected); - nRead++; - lastValue = expected; - - return one + sum; - } - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStreamUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStreamUnitTest.java deleted file mode 100644 index 74626d031..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStreamUnitTest.java +++ /dev/null @@ -1,161 +0,0 @@ -package org.broadinstitute.sting.utils.sam; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMReadGroupRecord; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.testng.annotations.Test; -import org.testng.annotations.DataProvider; - -import org.broadinstitute.sting.BaseTest; - -public class ArtificialSingleSampleReadStreamUnitTest extends BaseTest { - - private static class ArtificialSingleSampleReadStreamTest extends TestDataProvider { - private ArtificialSingleSampleReadStream stream; - private ArtificialSingleSampleReadStreamAnalyzer streamAnalyzer; - - public ArtificialSingleSampleReadStreamTest( ArtificialSingleSampleReadStream stream ) { - super(ArtificialSingleSampleReadStreamTest.class); - - this.stream = stream; - - setName(String.format("%s: numContigs=%d stacksPerContig=%d readsPerStack=%d-%d distanceBetweenStacks=%d-%d readLength=%d-%d unmappedReads=%d", - getClass().getSimpleName(), - stream.getNumContigs(), - stream.getNumStacksPerContig(), - stream.getMinReadsPerStack(), - stream.getMaxReadsPerStack(), - stream.getMinDistanceBetweenStacks(), - stream.getMaxDistanceBetweenStacks(), - stream.getMinReadLength(), - stream.getMaxReadLength(), - stream.getNumUnmappedReads())); - } - - public void run() { - streamAnalyzer= new ArtificialSingleSampleReadStreamAnalyzer(stream); - - streamAnalyzer.analyze(stream); - - // Check whether the observed properties of the stream match its nominal properties - streamAnalyzer.validate(); - } - } - - @DataProvider(name = "ArtificialSingleSampleReadStreamTestDataProvider") - public Object[][] createArtificialSingleSampleReadStreamTests() { - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, 10000); - String readGroupID = "testReadGroup"; - SAMReadGroupRecord readGroup = new SAMReadGroupRecord(readGroupID); - readGroup.setSample("testSample"); - header.addReadGroup(readGroup); - - GenomeAnalysisEngine.resetRandomGenerator(); - - // brute force testing! - for ( int numContigs = 0; numContigs <= 2; numContigs++ ) { - for ( int stacksPerContig = 0; stacksPerContig <= 2; stacksPerContig++ ) { - for ( int minReadsPerStack = 1; minReadsPerStack <= 2; minReadsPerStack++ ) { - for ( int maxReadsPerStack = 1; maxReadsPerStack <= 3; maxReadsPerStack++ ) { - for ( int minDistanceBetweenStacks = 1; minDistanceBetweenStacks <= 2; minDistanceBetweenStacks++ ) { - for ( int maxDistanceBetweenStacks = 1; maxDistanceBetweenStacks <= 3; maxDistanceBetweenStacks++ ) { - for ( int minReadLength = 1; minReadLength <= 2; minReadLength++ ) { - for ( int maxReadLength = 1; maxReadLength <= 3; maxReadLength++ ) { - for ( int numUnmappedReads = 0; numUnmappedReads <= 2; numUnmappedReads++ ) { - // Only test sane combinations here - if ( minReadsPerStack <= maxReadsPerStack && - minDistanceBetweenStacks <= maxDistanceBetweenStacks && - minReadLength <= maxReadLength && - ((numContigs > 0 && stacksPerContig > 0) || (numContigs == 0 && stacksPerContig == 0)) ) { - - new ArtificialSingleSampleReadStreamTest(new ArtificialSingleSampleReadStream(header, - readGroupID, - numContigs, - stacksPerContig, - minReadsPerStack, - maxReadsPerStack, - minDistanceBetweenStacks, - maxDistanceBetweenStacks, - minReadLength, - maxReadLength, - numUnmappedReads)); - } - } - } - } - } - } - } - } - } - } - - return ArtificialSingleSampleReadStreamTest.getTests(ArtificialSingleSampleReadStreamTest.class); - } - - @Test(dataProvider = "ArtificialSingleSampleReadStreamTestDataProvider") - public void testArtificialSingleSampleReadStream( ArtificialSingleSampleReadStreamTest test ) { - logger.warn("Running test: " + test); - - GenomeAnalysisEngine.resetRandomGenerator(); - test.run(); - } - - @DataProvider(name = "ArtificialSingleSampleReadStreamInvalidArgumentsTestDataProvider") - public Object[][] createInvalidArgumentsTests() { - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, 10000); - String readGroupID = "testReadGroup"; - header.addReadGroup(new SAMReadGroupRecord(readGroupID)); - - return new Object[][] { - {"testNullHeader", null, readGroupID, 1, 1, 1, 2, 1, 2, 1, 2, 0}, - {"testNullReadGroup", header, null, 1, 1, 1, 2, 1, 2, 1, 2, 0}, - {"testInvalidReadGroup", header, "foo", 1, 1, 1, 2, 1, 2, 1, 2, 0}, - {"testInvalidNumContigs", header, readGroupID, -1, 1, 1, 2, 1, 2, 1, 2, 0}, - {"testInvalidNumStacksPerContig", header, readGroupID, 1, -1, 1, 2, 1, 2, 1, 2, 0}, - {"test0ContigsNon0StacksPerContig", header, readGroupID, 0, 1, 1, 2, 1, 2, 1, 2, 0}, - {"testNon0Contigs0StacksPerContig", header, readGroupID, 1, 0, 1, 2, 1, 2, 1, 2, 0}, - {"testInvalidMinReadsPerStack", header, readGroupID, 1, 1, -1, 2, 1, 2, 1, 2, 0}, - {"testInvalidMaxReadsPerStack", header, readGroupID, 1, 1, 1, -2, 1, 2, 1, 2, 0}, - {"testInvalidMinDistanceBetweenStacks", header, readGroupID, 1, 1, 1, 2, -1, 2, 1, 2, 0}, - {"testInvalidMaxDistanceBetweenStacks", header, readGroupID, 1, 1, 1, 2, 1, -2, 1, 2, 0}, - {"testInvalidMinReadLength", header, readGroupID, 1, 1, 1, 2, 1, 2, -1, 2, 0}, - {"testInvalidMaxReadLength", header, readGroupID, 1, 1, 1, 2, 1, 2, 1, -2, 0}, - {"testInvalidReadsPerStackRange", header, readGroupID, 1, 1, 2, 1, 1, 2, 1, 2, 0}, - {"testInvalidDistanceBetweenStacksRange", header, readGroupID, 1, 1, 1, 2, 2, 1, 1, 2, 0}, - {"testInvalidReadLengthRange", header, readGroupID, 1, 1, 1, 2, 1, 2, 2, 1, 0}, - {"testInvalidNumUnmappedReads", header, readGroupID, 1, 1, 1, 2, 1, 2, 1, 2, -1}, - }; - } - - @Test(dataProvider = "ArtificialSingleSampleReadStreamInvalidArgumentsTestDataProvider", - expectedExceptions = ReviewedStingException.class) - public void testInvalidArguments( String testName, - SAMFileHeader header, - String readGroupID, - int numContigs, - int numStacksPerContig, - int minReadsPerStack, - int maxReadsPerStack, - int minDistanceBetweenStacks, - int maxDistanceBetweenStacks, - int minReadLength, - int maxReadLength, - int numUnmappedReads ) { - - logger.warn("Running test: " + testName); - - ArtificialSingleSampleReadStream stream = new ArtificialSingleSampleReadStream(header, - readGroupID, - numContigs, - numStacksPerContig, - minReadsPerStack, - maxReadsPerStack, - minDistanceBetweenStacks, - maxDistanceBetweenStacks, - minReadLength, - maxReadLength, - numUnmappedReads); - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/threading/EfficiencyMonitoringThreadFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/threading/EfficiencyMonitoringThreadFactoryUnitTest.java deleted file mode 100755 index 7381bebc4..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/threading/EfficiencyMonitoringThreadFactoryUnitTest.java +++ /dev/null @@ -1,184 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package org.broadinstitute.sting.utils.threading; - -import org.apache.log4j.Priority; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - -/** - * Tests for the state monitoring thread factory. - */ -public class EfficiencyMonitoringThreadFactoryUnitTest extends BaseTest { - // the duration of the tests -- 100 ms is tolerable given the number of tests we are doing - private final static long THREAD_TARGET_DURATION_IN_MILLISECOND = 100000; - private final static int MAX_THREADS = 4; - final static Object GLOBAL_LOCK = new Object(); - - private class StateTest extends TestDataProvider { - private final double TOLERANCE = 0.1; // willing to tolerate a 10% error - - final List statesForThreads; - - public StateTest(final List statesForThreads) { - super(StateTest.class); - this.statesForThreads = statesForThreads; - setName("StateTest " + Utils.join(",", statesForThreads)); - } - - public List getStatesForThreads() { - return statesForThreads; - } - - public int getNStates() { return statesForThreads.size(); } - - public double maxStatePercent(final EfficiencyMonitoringThreadFactory.State state) { return 100*(fraction(state) + TOLERANCE); } - public double minStatePercent(final EfficiencyMonitoringThreadFactory.State state) { return 100*(fraction(state) - TOLERANCE); } - - private double fraction(final EfficiencyMonitoringThreadFactory.State state) { - return Collections.frequency(statesForThreads, state) / (1.0 * statesForThreads.size()); - } - } - - /** - * Test helper threading class that puts the thread into RUNNING, BLOCKED, or WAITING state as - * requested for input argument - */ - private static class StateTestThread implements Callable { - private final EfficiencyMonitoringThreadFactory.State stateToImplement; - - private StateTestThread(final EfficiencyMonitoringThreadFactory.State stateToImplement) { - this.stateToImplement = stateToImplement; - } - - @Override - public Double call() throws Exception { - switch ( stateToImplement ) { - case USER_CPU: - // do some work until we get to THREAD_TARGET_DURATION_IN_MILLISECOND - double sum = 0.0; - final long startTime = System.currentTimeMillis(); - for ( int i = 1; System.currentTimeMillis() - startTime < (THREAD_TARGET_DURATION_IN_MILLISECOND - 1); i++ ) { - sum += Math.log10(i); - } - return sum; - case WAITING: - Thread.currentThread().sleep(THREAD_TARGET_DURATION_IN_MILLISECOND); - return 0.0; - case BLOCKING: - if ( EfficiencyMonitoringThreadFactory.DEBUG ) logger.warn("Blocking..."); - synchronized (GLOBAL_LOCK) { - // the GLOBAL_LOCK must be held by the unit test itself for this to properly block - if ( EfficiencyMonitoringThreadFactory.DEBUG ) logger.warn(" ... done blocking"); - } - return 0.0; - case WAITING_FOR_IO: - // TODO -- implement me - // shouldn't ever get here, throw an exception - throw new ReviewedStingException("WAITING_FOR_IO testing currently not implemented, until we figure out how to force a system call block"); - default: - throw new ReviewedStingException("Unexpected thread test state " + stateToImplement); - } - } - } - - @DataProvider(name = "StateTest") - public Object[][] createStateTest() { - for ( final int nThreads : Arrays.asList(3) ) { - //final List allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.WAITING_FOR_IO); - final List allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.USER_CPU, EfficiencyMonitoringThreadFactory.State.WAITING, EfficiencyMonitoringThreadFactory.State.BLOCKING); - //final List allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.values()); - for (final List states : Utils.makePermutations(allStates, nThreads, true) ) { - //if ( Collections.frequency(states, Thread.State.BLOCKED) > 0) - new StateTest(states); - } - } - - return StateTest.getTests(StateTest.class); - } - - @Test(enabled = true, dataProvider = "StateTest", timeOut = MAX_THREADS * THREAD_TARGET_DURATION_IN_MILLISECOND) - public void testStateTest(final StateTest test) throws InterruptedException { - // allows us to test blocking - final EfficiencyMonitoringThreadFactory factory = new EfficiencyMonitoringThreadFactory(test.getNStates()); - final ExecutorService threadPool = Executors.newFixedThreadPool(test.getNStates(), factory); - - logger.warn("Running " + test); - synchronized (GLOBAL_LOCK) { - //logger.warn(" Have lock"); - for ( final EfficiencyMonitoringThreadFactory.State threadToRunState : test.getStatesForThreads() ) - threadPool.submit(new StateTestThread(threadToRunState)); - - // lock has to be here for the whole running of the activeThreads but end before the sleep so the blocked activeThreads - // can block for their allotted time - threadPool.shutdown(); - Thread.sleep(THREAD_TARGET_DURATION_IN_MILLISECOND); - } - //logger.warn(" Releasing lock"); - threadPool.awaitTermination(10, TimeUnit.SECONDS); - //logger.warn(" done awaiting termination"); - //logger.warn(" waiting for all activeThreads to complete"); - factory.waitForAllThreadsToComplete(); - //logger.warn(" done waiting for activeThreads"); - - // make sure we counted everything properly - final long totalTime = factory.getTotalTime(); - final long minTime = (long)(THREAD_TARGET_DURATION_IN_MILLISECOND * 0.5) * test.getNStates(); - final long maxTime = (long)(THREAD_TARGET_DURATION_IN_MILLISECOND * 1.5) * test.getNStates(); - //logger.warn("Testing total time"); - Assert.assertTrue(totalTime >= minTime, "Factory results not properly accumulated: totalTime = " + totalTime + " < minTime = " + minTime); - Assert.assertTrue(totalTime <= maxTime, "Factory results not properly accumulated: totalTime = " + totalTime + " > maxTime = " + maxTime); - - for (final EfficiencyMonitoringThreadFactory.State state : EfficiencyMonitoringThreadFactory.State.values() ) { - final double min = test.minStatePercent(state); - final double max = test.maxStatePercent(state); - final double obs = factory.getStatePercent(state); -// logger.warn(" Checking " + state -// + " min " + String.format("%.2f", min) -// + " max " + String.format("%.2f", max) -// + " obs " + String.format("%.2f", obs) -// + " factor = " + factory); - Assert.assertTrue(obs >= min, "Too little time spent in state " + state + " obs " + obs + " min " + min); - Assert.assertTrue(obs <= max, "Too much time spent in state " + state + " obs " + obs + " max " + min); - } - - // we actually ran the expected number of activeThreads - Assert.assertEquals(factory.getNThreadsCreated(), test.getNStates()); - - // should be called to ensure we don't format / NPE on output - factory.printUsageInformation(logger, Priority.WARN); - } -} \ No newline at end of file