diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index bee25dc2f..1187039bb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -570,32 +570,11 @@ public class GenomeAnalysisEngine { else if(walker instanceof ActiveRegionWalker) { if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate) throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Active region walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately."); - - switch(argCollection.activeRegionShardType) { - case LOCUSSHARD: - if(intervals == null) - return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer()); - else - return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer()); - case READSHARD: - // Use the legacy ReadShardBalancer if legacy downsampling is enabled - ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ? - new LegacyReadShardBalancer() : - new ReadShardBalancer(); - - if(intervals == null) - return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(), readShardBalancer); - else - return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), readShardBalancer); - case ACTIVEREGIONSHARD: - if(intervals == null) - return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new ActiveRegionShardBalancer()); - else - return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new ActiveRegionShardBalancer()); - default: - throw new UserException.CommandLineException("Invalid active region shard type."); - } - } + if(intervals == null) + return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer()); + else + return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer()); + } else if(walker instanceof ReadWalker || walker instanceof ReadPairWalker || walker instanceof DuplicateWalker) { // Apply special validation to read pair walkers. if(walker instanceof ReadPairWalker) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index beaeacc85..d9c7c9008 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.gatk.samples.PedigreeValidationType; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule; @@ -449,14 +448,5 @@ public class GATKArgumentCollection { @Hidden public boolean generateShadowBCF = false; // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed - - // -------------------------------------------------------------------------------------------------------------- - // - // Experimental Active Region Traversal modes - // - // -------------------------------------------------------------------------------------------------------------- - - @Argument(fullName = "active_region_traversal_shard_type", shortName = "active_region_traversal_shard_type", doc = "Choose an experimental shard type for active region traversal, instead of the default LocusShard", required = false) - public ExperimentalActiveRegionShardType activeRegionShardType = ExperimentalActiveRegionShardType.LOCUSSHARD; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ActiveRegionShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ActiveRegionShardDataProvider.java deleted file mode 100644 index 55e51f934..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ActiveRegionShardDataProvider.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.providers; - -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.iterators.LocusIterator; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; - -import java.util.Collection; - -/** - * @author Joel Thibault - */ -public class ActiveRegionShardDataProvider extends ShardDataProvider { - final private ReadShardDataProvider readProvider; - final private LocusShardDataProvider locusProvider; - - public ActiveRegionShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, StingSAMIterator reads, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection rods) { - super(shard, genomeLocParser, reference, rods); // TODO: necessary? - readProvider = new ReadShardDataProvider(shard, genomeLocParser, reads, reference, rods); - locusProvider = new LocusShardDataProvider(shard, sourceInfo, genomeLocParser, locus, locusIterator, reference, rods); - } - - public ReadShardDataProvider getReadShardDataProvider() { - return readProvider; - } - - public LocusShardDataProvider getLocusShardDataProvider(LocusIterator iterator) { - return locusProvider; - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java index 1607469eb..4888b9f41 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java @@ -44,22 +44,6 @@ public class LocusShardDataProvider extends ShardDataProvider { this.locusIterator = locusIterator; } - /** - * Create a data provider based on an input provider - * Used only by ExperimentalReadShardTraverseActiveRegions - * @param dataProvider - * @param sourceInfo - * @param genomeLocParser - * @param locus - * @param locusIterator - */ - public LocusShardDataProvider(ShardDataProvider dataProvider, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator) { - super(dataProvider.getShard(),genomeLocParser,dataProvider.getReference(),dataProvider.getReferenceOrderedData()); - this.sourceInfo = sourceInfo; - this.locus = locus; - this.locusIterator = locusIterator; - } - /** * Returns information about the source of the reads. * @return Info about the source of the reads. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShard.java deleted file mode 100755 index 381b193e9..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShard.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.reads; - -import net.sf.samtools.SAMFileSpan; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; - -import java.util.List; -import java.util.Map; - -/** - * @author Joel Thibault - */ -public class ActiveRegionShard extends ReadShard { - public ActiveRegionShard(GenomeLocParser parser, SAMDataSource readsDataSource, Map fileSpans, List loci, boolean isUnmapped) { - super(parser, readsDataSource, fileSpans, loci, isUnmapped); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancer.java deleted file mode 100644 index 338dd1bdf..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancer.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.reads; - -/** - * @author Joel Thibault - */ -public class ActiveRegionShardBalancer extends ReadShardBalancer { - // TODO ? -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java index 314156af6..e22a7a54d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java @@ -40,9 +40,7 @@ import java.util.Map; */ public abstract class Shard implements HasGenomeLocation { public enum ShardType { - READ, - LOCUS, - ACTIVEREGION // Used only by ExperimentalActiveRegionShardTraverseActiveRegions + READ, LOCUS } protected final GenomeLocParser parser; // incredibly annoying! diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 44f9978a6..f3c1ae91c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.executive; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; @@ -12,8 +11,6 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.DirectOutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; -import org.broadinstitute.sting.gatk.traversals.ExperimentalActiveRegionShardTraverseActiveRegions; -import org.broadinstitute.sting.gatk.traversals.ExperimentalReadShardTraverseActiveRegions; import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions; import org.broadinstitute.sting.gatk.walkers.Walker; @@ -81,18 +78,6 @@ public class LinearMicroScheduler extends MicroScheduler { } windowMaker.close(); } - else if(shard.getShardType() == Shard.ShardType.ACTIVEREGION) { - WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), - getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine)); - for(WindowMaker.WindowMakerIterator iterator: windowMaker) { - ShardDataProvider dataProvider = new ActiveRegionShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),getReadIterator(shard),iterator.getLocus(),iterator,reference,rods); - Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); - accumulator.accumulate(dataProvider,result); - dataProvider.close(); - if ( walker.isDone() ) break; - } - windowMaker.close(); - } else { ShardDataProvider dataProvider = new ReadShardDataProvider(shard,engine.getGenomeLocParser(),getReadIterator(shard),reference,rods); Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); @@ -108,14 +93,6 @@ public class LinearMicroScheduler extends MicroScheduler { final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit()); accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator } - else if( traversalEngine instanceof ExperimentalReadShardTraverseActiveRegions ) { - final Object result = ((ExperimentalReadShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit()); - accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator - } - else if( traversalEngine instanceof ExperimentalActiveRegionShardTraverseActiveRegions) { - final Object result = ((ExperimentalActiveRegionShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit()); - accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator - } Object result = accumulator.finishTraversal(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 13c11def6..f8aec1489 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -41,7 +41,6 @@ import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.AutoFormattingTime; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.progressmeter.ProgressMeter; @@ -246,12 +245,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { } else if (walker instanceof ReadPairWalker) { return new TraverseReadPairs(); } else if (walker instanceof ActiveRegionWalker) { - switch (engine.getArguments().activeRegionShardType) { - case LOCUSSHARD: return new TraverseActiveRegions(); - case READSHARD: return new ExperimentalReadShardTraverseActiveRegions(); - case ACTIVEREGIONSHARD: return new ExperimentalActiveRegionShardTraverseActiveRegions(); - default: throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type of ActiveRegionWalker."); - } + return new TraverseActiveRegions(); } else { throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type."); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/ExperimentalActiveRegionShardTraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/ExperimentalActiveRegionShardTraverseActiveRegions.java deleted file mode 100644 index 45d132678..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/ExperimentalActiveRegionShardTraverseActiveRegions.java +++ /dev/null @@ -1,309 +0,0 @@ -package org.broadinstitute.sting.gatk.traversals; - -import net.sf.samtools.SAMFileHeader; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.WalkerManager; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.providers.*; -import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.executive.WindowMaker; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension; -import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.activeregion.ActiveRegion; -import org.broadinstitute.sting.utils.activeregion.ActivityProfile; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.util.*; - -public class ExperimentalActiveRegionShardTraverseActiveRegions extends TraversalEngine,ActiveRegionShardDataProvider> { - /** - * our log, which we want to capture anything from this class - */ - protected final static Logger logger = Logger.getLogger(TraversalEngine.class); - - private final LinkedList workQueue = new LinkedList(); - private final LinkedList myReads = new LinkedList(); - - @Override - public String getTraversalUnits() { - return "active regions"; - } - - @Override - public T traverse( final ActiveRegionWalker walker, - final ActiveRegionShardDataProvider dataProvider, - T sum) { - logger.debug(String.format("ExperimentalActiveRegionShardTraverseActiveRegions.traverse: Shard is %s", dataProvider)); - - ReadShardDataProvider readDataProvider = dataProvider.getReadShardDataProvider(); - - final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension(); - final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion(); - - final ReadView readView = new ReadView(readDataProvider); - - final List activeRegions = new LinkedList(); - ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions()); - - Shard readShard = readDataProvider.getShard(); - SAMFileHeader header = readShard.getReadProperties().getHeader(); - WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(), - readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header)); - - for(WindowMaker.WindowMakerIterator iterator: windowMaker) { - LocusShardDataProvider locusDataProvider = dataProvider.getLocusShardDataProvider(iterator); - final LocusView locusView = new AllLocusView(locusDataProvider); - final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider ); - ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView); - - // We keep processing while the next reference location is within the interval - GenomeLoc prevLoc = null; - while( locusView.hasNext() ) { - final AlignmentContext locus = locusView.next(); - final GenomeLoc location = locus.getLocation(); - - if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) { - // we've move across some interval boundary, restart profile - profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize); - } - - readDataProvider.getShard().getReadMetrics().incrementNumIterations(); - - // create reference context. Note that if we have a pileup of "extended events", the context will - // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). - final ReferenceContext refContext = referenceView.getReferenceContext(location); - - // Iterate forward to get all reference ordered data covering this location - final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); - - // Call the walkers isActive function for this locus and add them to the list to be integrated later - profile.add(walkerActiveProb(walker, tracker, refContext, locus, location)); - - prevLoc = location; - - printProgress(locus.getLocation()); - } - - locusDataProvider.close(); - } - - windowMaker.close(); - - updateCumulativeMetrics(readDataProvider.getShard()); - - if ( ! profile.isEmpty() ) - incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize); - - // add active regions to queue of regions to process - // first check if can merge active regions over shard boundaries - if( !activeRegions.isEmpty() ) { - if( !workQueue.isEmpty() ) { - final ActiveRegion last = workQueue.getLast(); - final ActiveRegion first = activeRegions.get(0); - if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) { - workQueue.removeLast(); - activeRegions.remove(first); - workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension)); - } - } - workQueue.addAll( activeRegions ); - } - - logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." ); - - // now process the active regions, where possible - boolean emptyQueue = false; - sum = processActiveRegions(walker, sum, emptyQueue); - - return sum; - } - - /** - * Take the individual isActive calls and integrate them into contiguous active regions and - * add these blocks of work to the work queue - * band-pass filter the list of isActive probabilities and turn into active regions - * - * @param profile - * @param activeRegions - * @param activeRegionExtension - * @param maxRegionSize - * @return - */ - private ActivityProfile incorporateActiveRegions(final ActivityProfile profile, - final List activeRegions, - final int activeRegionExtension, - final int maxRegionSize) { - if ( profile.isEmpty() ) - throw new IllegalStateException("trying to incorporate an empty active profile " + profile); - - final ActivityProfile bandPassFiltered = profile.bandPassFilter(); - activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize )); - return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() ); - } - - - // -------------------------------------------------------------------------------- - // - // simple utility functions - // - // -------------------------------------------------------------------------------- - - private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker walker, - final RefMetaDataTracker tracker, final ReferenceContext refContext, - final AlignmentContext locus, final GenomeLoc location) { - if ( walker.hasPresetActiveRegions() ) { - return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0); - } else { - return walker.isActive( tracker, refContext, locus ); - } - } - - private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker walker, - final LocusShardDataProvider dataProvider, - final LocusView locusView) { - if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) - return new ManagingReferenceOrderedView( dataProvider ); - else - return (RodLocusView)locusView; - } - - // -------------------------------------------------------------------------------- - // - // code to handle processing active regions - // - // -------------------------------------------------------------------------------- - - private T processActiveRegions( final ActiveRegionWalker walker, T sum, boolean emptyQueue ) { - if( walker.activeRegionOutStream != null ) { - writeActiveRegionsToStream(walker); - return sum; - } else { - return callWalkerMapOnActiveRegions(walker, sum, emptyQueue); - } - } - - /** - * Write out each active region to the walker activeRegionOutStream - * - * @param walker - */ - private void writeActiveRegionsToStream( final ActiveRegionWalker walker ) { - // Just want to output the active regions to a file, not actually process them - for( final ActiveRegion activeRegion : workQueue ) { - if( activeRegion.isActive ) { - walker.activeRegionOutStream.println( activeRegion.getLocation() ); - } - } - } - - private T callWalkerMapOnActiveRegions( final ActiveRegionWalker walker, T sum, boolean emptyQueue ) { - final int lastRegionStart = workQueue.getLast().getLocation().getStart(); - final String lastRegionContig = workQueue.getLast().getLocation().getContig(); - - // If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them - // TODO can implement parallel traversal here - while( workQueue.peekFirst() != null ) { - ActiveRegion firstRegion = workQueue.getFirst(); - final String firstRegionContig = firstRegion.getLocation().getContig(); - if (emptyQueue || firstRegionContig != lastRegionContig) { - sum = processFirstActiveRegion(sum, walker); - } - else { - final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop(); - if (lastRegionStart > firstRegionMaxReadStop) { - sum = processFirstActiveRegion( sum, walker ); - } - else { - break; - } - } - } - - return sum; - } - - /** - * Process the first active region and all remaining reads which overlap - * - * Remove the first active region from the queue - * (NB: some reads associated with this active region may have already been processed) - * - * Remove all of these reads from the queue - * (NB: some may be associated with other active regions) - * - * @param sum - * @param walker - * @return - */ - private T processFirstActiveRegion( final T sum, final ActiveRegionWalker walker ) { - final ActiveRegion firstRegion = workQueue.removeFirst(); - - GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here - GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead ); - - while ( firstRegion.getLocation().overlapsP( firstReadLoc ) || - (walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) { - if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) { - // The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region) - long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc ); - ActiveRegion bestRegion = firstRegion; - for( final ActiveRegion otherRegionToTest : workQueue ) { - if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) { - maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc ); - bestRegion = otherRegionToTest; - } - } - bestRegion.add( firstRead ); - - // The read is also added to all other regions in which it overlaps but marked as non-primary - if( walker.wantsNonPrimaryReads() ) { - if( !bestRegion.equals(firstRegion) ) { - firstRegion.add(firstRead); - } - for( final ActiveRegion otherRegionToTest : workQueue ) { - if( !bestRegion.equals(otherRegionToTest) ) { - // check for non-primary vs. extended - if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) { - otherRegionToTest.add( firstRead ); - } else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) { - otherRegionToTest.add( firstRead ); - } - } - } - } - - // check for non-primary vs. extended - } else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) { - if ( walker.wantsNonPrimaryReads() ) { - firstRegion.add( firstRead ); - } - } else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) { - firstRegion.add( firstRead ); - } - - myReads.removeFirst(); - firstRead = myReads.peekFirst(); - firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead ); - } - - logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc()); - final M x = walker.map( firstRegion, null ); - return walker.reduce(x, sum); - } - - /** - * Special function called in LinearMicroScheduler to empty out the work queue. - * Ugly for now but will be cleaned up when we push this functionality more into the engine - */ - public T endTraversal( final Walker walker, T sum) { - boolean emptyQueue = true; - return processActiveRegions((ActiveRegionWalker)walker, sum, emptyQueue); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/ExperimentalReadShardTraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/ExperimentalReadShardTraverseActiveRegions.java deleted file mode 100644 index 299ee4f56..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/ExperimentalReadShardTraverseActiveRegions.java +++ /dev/null @@ -1,309 +0,0 @@ -package org.broadinstitute.sting.gatk.traversals; - -import net.sf.samtools.SAMFileHeader; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.WalkerManager; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.providers.*; -import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.executive.WindowMaker; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension; -import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.activeregion.ActiveRegion; -import org.broadinstitute.sting.utils.activeregion.ActivityProfile; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.util.*; - -public class ExperimentalReadShardTraverseActiveRegions extends TraversalEngine,ReadShardDataProvider> { - /** - * our log, which we want to capture anything from this class - */ - protected final static Logger logger = Logger.getLogger(TraversalEngine.class); - - private final LinkedList workQueue = new LinkedList(); - private final LinkedList myReads = new LinkedList(); - - @Override - public String getTraversalUnits() { - return "active regions"; - } - - @Override - public T traverse( final ActiveRegionWalker walker, - final ReadShardDataProvider readDataProvider, - T sum) { - logger.debug(String.format("ExperimentalReadShardTraverseActiveRegions.traverse: Read Shard is %s", readDataProvider)); - - final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension(); - final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion(); - - final ReadView readView = new ReadView(readDataProvider); - - final List activeRegions = new LinkedList(); - ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions()); - - Shard readShard = readDataProvider.getShard(); - SAMFileHeader header = readShard.getReadProperties().getHeader(); - WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(), - readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header)); - - for(WindowMaker.WindowMakerIterator iterator: windowMaker) { - LocusShardDataProvider locusDataProvider = new LocusShardDataProvider(readDataProvider, - iterator.getSourceInfo(), engine.getGenomeLocParser(), iterator.getLocus(), iterator); - - final LocusView locusView = new AllLocusView(locusDataProvider); - final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider ); - ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView); - - // We keep processing while the next reference location is within the interval - GenomeLoc prevLoc = null; - while( locusView.hasNext() ) { - final AlignmentContext locus = locusView.next(); - final GenomeLoc location = locus.getLocation(); - - if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) { - // we've move across some interval boundary, restart profile - profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize); - } - - readDataProvider.getShard().getReadMetrics().incrementNumIterations(); - - // create reference context. Note that if we have a pileup of "extended events", the context will - // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). - final ReferenceContext refContext = referenceView.getReferenceContext(location); - - // Iterate forward to get all reference ordered data covering this location - final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); - - // Call the walkers isActive function for this locus and add them to the list to be integrated later - profile.add(walkerActiveProb(walker, tracker, refContext, locus, location)); - - prevLoc = location; - - printProgress(locus.getLocation()); - } - - locusDataProvider.close(); - } - - windowMaker.close(); - - updateCumulativeMetrics(readDataProvider.getShard()); - - if ( ! profile.isEmpty() ) - incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize); - - // add active regions to queue of regions to process - // first check if can merge active regions over shard boundaries - if( !activeRegions.isEmpty() ) { - if( !workQueue.isEmpty() ) { - final ActiveRegion last = workQueue.getLast(); - final ActiveRegion first = activeRegions.get(0); - if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) { - workQueue.removeLast(); - activeRegions.remove(first); - workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension)); - } - } - workQueue.addAll( activeRegions ); - } - - logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." ); - - // now process the active regions, where possible - boolean emptyQueue = false; - sum = processActiveRegions(walker, sum, emptyQueue); - - return sum; - } - - /** - * Take the individual isActive calls and integrate them into contiguous active regions and - * add these blocks of work to the work queue - * band-pass filter the list of isActive probabilities and turn into active regions - * - * @param profile - * @param activeRegions - * @param activeRegionExtension - * @param maxRegionSize - * @return - */ - private ActivityProfile incorporateActiveRegions(final ActivityProfile profile, - final List activeRegions, - final int activeRegionExtension, - final int maxRegionSize) { - if ( profile.isEmpty() ) - throw new IllegalStateException("trying to incorporate an empty active profile " + profile); - - final ActivityProfile bandPassFiltered = profile.bandPassFilter(); - activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize )); - return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() ); - } - - - // -------------------------------------------------------------------------------- - // - // simple utility functions - // - // -------------------------------------------------------------------------------- - - private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker walker, - final RefMetaDataTracker tracker, final ReferenceContext refContext, - final AlignmentContext locus, final GenomeLoc location) { - if ( walker.hasPresetActiveRegions() ) { - return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0); - } else { - return walker.isActive( tracker, refContext, locus ); - } - } - - private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker walker, - final LocusShardDataProvider dataProvider, - final LocusView locusView) { - if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) - return new ManagingReferenceOrderedView( dataProvider ); - else - return (RodLocusView)locusView; - } - - // -------------------------------------------------------------------------------- - // - // code to handle processing active regions - // - // -------------------------------------------------------------------------------- - - private T processActiveRegions( final ActiveRegionWalker walker, T sum, boolean emptyQueue ) { - if( walker.activeRegionOutStream != null ) { - writeActiveRegionsToStream(walker); - return sum; - } else { - return callWalkerMapOnActiveRegions(walker, sum, emptyQueue); - } - } - - /** - * Write out each active region to the walker activeRegionOutStream - * - * @param walker - */ - private void writeActiveRegionsToStream( final ActiveRegionWalker walker ) { - // Just want to output the active regions to a file, not actually process them - for( final ActiveRegion activeRegion : workQueue ) { - if( activeRegion.isActive ) { - walker.activeRegionOutStream.println( activeRegion.getLocation() ); - } - } - } - - private T callWalkerMapOnActiveRegions( final ActiveRegionWalker walker, T sum, boolean emptyQueue ) { - final int lastRegionStart = workQueue.getLast().getLocation().getStart(); - final String lastRegionContig = workQueue.getLast().getLocation().getContig(); - - // If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them - // TODO can implement parallel traversal here - while( workQueue.peekFirst() != null ) { - ActiveRegion firstRegion = workQueue.getFirst(); - final String firstRegionContig = firstRegion.getLocation().getContig(); - if (emptyQueue || firstRegionContig != lastRegionContig) { - sum = processFirstActiveRegion(sum, walker); - } - else { - final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop(); - if (lastRegionStart > firstRegionMaxReadStop) { - sum = processFirstActiveRegion( sum, walker ); - } - else { - break; - } - } - } - - return sum; - } - - /** - * Process the first active region and all remaining reads which overlap - * - * Remove the first active region from the queue - * (NB: some reads associated with this active region may have already been processed) - * - * Remove all of these reads from the queue - * (NB: some may be associated with other active regions) - * - * @param sum - * @param walker - * @return - */ - private T processFirstActiveRegion( final T sum, final ActiveRegionWalker walker ) { - final ActiveRegion firstRegion = workQueue.removeFirst(); - - GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here - GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead ); - - while ( firstRegion.getLocation().overlapsP( firstReadLoc ) || - (walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) { - if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) { - // The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region) - long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc ); - ActiveRegion bestRegion = firstRegion; - for( final ActiveRegion otherRegionToTest : workQueue ) { - if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) { - maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc ); - bestRegion = otherRegionToTest; - } - } - bestRegion.add( firstRead ); - - // The read is also added to all other regions in which it overlaps but marked as non-primary - if( walker.wantsNonPrimaryReads() ) { - if( !bestRegion.equals(firstRegion) ) { - firstRegion.add(firstRead); - } - for( final ActiveRegion otherRegionToTest : workQueue ) { - if( !bestRegion.equals(otherRegionToTest) ) { - // check for non-primary vs. extended - if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) { - otherRegionToTest.add( firstRead ); - } else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) { - otherRegionToTest.add( firstRead ); - } - } - } - } - - // check for non-primary vs. extended - } else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) { - if ( walker.wantsNonPrimaryReads() ) { - firstRegion.add( firstRead ); - } - } else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) { - firstRegion.add( firstRead ); - } - - myReads.removeFirst(); - firstRead = myReads.peekFirst(); - firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead ); - } - - logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc()); - final M x = walker.map( firstRegion, null ); - return walker.reduce(x, sum); - } - - /** - * Special function called in LinearMicroScheduler to empty out the work queue. - * Ugly for now but will be cleaned up when we push this functionality more into the engine - */ - public T endTraversal( final Walker walker, T sum) { - boolean emptyQueue = true; - return processActiveRegions((ActiveRegionWalker)walker, sum, emptyQueue); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index d1199ad3d..c12dfcee9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -26,11 +26,6 @@ public class ActiveRegion implements HasGenomeLocation { private final GenomeLocParser genomeLocParser; public final boolean isActive; - // maximum stop position of all reads with start position in this active region - // Used only by ExperimentalReadShardTraverseActiveRegions - // NB: these reads may not be associated with this active region! - private int maxReadStop; - public ActiveRegion( final GenomeLoc activeRegionLoc, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) { this.activeRegionLoc = activeRegionLoc; this.isActive = isActive; @@ -38,7 +33,6 @@ public class ActiveRegion implements HasGenomeLocation { this.extension = extension; extendedLoc = genomeLocParser.createGenomeLocOnContig(activeRegionLoc.getContig(), activeRegionLoc.getStart() - extension, activeRegionLoc.getStop() + extension); fullExtentReferenceLoc = extendedLoc; - maxReadStop = activeRegionLoc.getStart(); } @Override @@ -99,18 +93,6 @@ public class ActiveRegion implements HasGenomeLocation { public void remove( final GATKSAMRecord read ) { reads.remove( read ); } public void removeAll( final ArrayList readsToRemove ) { reads.removeAll( readsToRemove ); } - public void setMaxReadStop(int maxReadStop) { - this.maxReadStop = maxReadStop; - } - - public int getMaxReadStop() { - return maxReadStop; - } - - public int getExtendedMaxReadStop() { - return maxReadStop + extension; - } - public boolean equalExceptReads(final ActiveRegion other) { if ( activeRegionLoc.compareTo(other.activeRegionLoc) != 0 ) return false; if ( isActive != other.isActive ) return false; diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ExperimentalActiveRegionShardType.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ExperimentalActiveRegionShardType.java deleted file mode 100644 index 1e9a0ee94..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ExperimentalActiveRegionShardType.java +++ /dev/null @@ -1,14 +0,0 @@ -package org.broadinstitute.sting.utils.activeregion; - -/** - * Created with IntelliJ IDEA. - * User: thibault - * Date: 1/2/13 - * Time: 4:59 PM - * To change this template use File | Settings | File Templates. - */ -public enum ExperimentalActiveRegionShardType { - LOCUSSHARD, // default/legacy type - READSHARD, - ACTIVEREGIONSHARD -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java index 0ec4f57f6..645f1ffc4 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java @@ -3,16 +3,10 @@ package org.broadinstitute.sting.gatk.traversals; import com.google.java.contract.PreconditionError; import net.sf.samtools.*; import org.broadinstitute.sting.commandline.Tags; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; -import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.*; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; -import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -21,6 +15,7 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.executive.WindowMaker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -33,7 +28,6 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; -import org.testng.TestException; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -101,9 +95,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { } } - private final TraverseActiveRegions traverse = new TraverseActiveRegions(); - private final ExperimentalReadShardTraverseActiveRegions readShardTraverse = new ExperimentalReadShardTraverseActiveRegions(); - private final ExperimentalActiveRegionShardTraverseActiveRegions activeRegionShardTraverse = new ExperimentalActiveRegionShardTraverseActiveRegions(); + private final TraverseActiveRegions t = new TraverseActiveRegions(); private IndexedFastaSequenceFile reference; private SAMSequenceDictionary dictionary; @@ -114,8 +106,6 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { private static final String testBAM = "TraverseActiveRegionsUnitTest.bam"; private static final String testBAI = "TraverseActiveRegionsUnitTest.bai"; - private static final ExperimentalActiveRegionShardType shardType = ExperimentalActiveRegionShardType.LOCUSSHARD; - @BeforeClass private void init() throws FileNotFoundException { reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference)); @@ -183,8 +173,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { private List getIsActiveIntervals(DummyActiveRegionWalker walker, List intervals) { List activeIntervals = new ArrayList(); - for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) { - traverse(walker, dataProvider, 0); + for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) { + t.traverse(walker, dataProvider, 0); activeIntervals.addAll(walker.isActiveCalls); } @@ -421,10 +411,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { } private Map getActiveRegions(DummyActiveRegionWalker walker, List intervals) { - for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) - traverse(walker, dataProvider, 0); + for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) + t.traverse(walker, dataProvider, 0); - endTraversal(walker, 0); + t.endTraversal(walker, 0); return walker.mappedActiveRegions; } @@ -485,12 +475,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { return record; } - private List createDataProviders(List intervals, String bamFile) { + private List createDataProviders(List intervals, String bamFile) { GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); engine.setGenomeLocParser(genomeLocParser); - GATKArgumentCollection arguments = new GATKArgumentCollection(); - arguments.activeRegionShardType = shardType; // make explicit - engine.setArguments(arguments); + t.initialize(engine); Collection samFiles = new ArrayList(); SAMReaderID readerID = new SAMReaderID(new File(bamFile), new Tags()); @@ -498,65 +486,13 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser); - List providers = new ArrayList(); - - switch (shardType) { - case LOCUSSHARD: - traverse.initialize(engine); - for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) { - for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) { - providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList())); - } - } - break; - case READSHARD: - readShardTraverse.initialize(engine); - for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ReadShardBalancer())) { - providers.add(new ReadShardDataProvider(shard, genomeLocParser, shard.iterator(), reference, new ArrayList())); - } - break; - case ACTIVEREGIONSHARD: - activeRegionShardTraverse.initialize(engine); - for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer())) { - for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) { - providers.add(new ActiveRegionShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, shard.iterator(), window.getLocus(), window, reference, new ArrayList())); - } - } - break; - default: throw new TestException("Invalid shard type"); + List providers = new ArrayList(); + for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) { + for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) { + providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList())); + } } return providers; } - - private void traverse(DummyActiveRegionWalker walker, ShardDataProvider dataProvider, int i) { - switch (shardType) { - case LOCUSSHARD: - traverse.traverse(walker, (LocusShardDataProvider) dataProvider, i); - break; - case READSHARD: - readShardTraverse.traverse(walker, (ReadShardDataProvider) dataProvider, i); - break; - case ACTIVEREGIONSHARD: - activeRegionShardTraverse.traverse(walker, (ActiveRegionShardDataProvider) dataProvider, i); - break; - default: throw new TestException("Invalid shard type"); - } - } - - private void endTraversal(DummyActiveRegionWalker walker, int i) { - switch (shardType) { - case LOCUSSHARD: - traverse.endTraversal(walker, i); - break; - case READSHARD: - readShardTraverse.endTraversal(walker, i); - break; - case ACTIVEREGIONSHARD: - activeRegionShardTraverse.endTraversal(walker, i); - break; - default: throw new TestException("Invalid shard type"); - } - } - }