From a7ba88e64918dc4e088f75ff4465ac2cf873f909 Mon Sep 17 00:00:00 2001 From: hanna Date: Thu, 11 Mar 2010 18:40:31 +0000 Subject: [PATCH] Rework the way the MicroScheduler handles locus shards to handle intervals that span shards with less memory consumption. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2981 348d0f76-0448-11de-a6fe-93d51630548a --- .../datasources/providers/AllLocusView.java | 2 +- .../providers/CoveredLocusView.java | 2 +- .../providers/LocusReferenceView.java | 6 +- .../providers/LocusShardDataProvider.java | 74 +++++++++++ .../gatk/datasources/providers/LocusView.java | 48 +------ .../providers/ReadShardDataProvider.java | 57 ++++++++ .../gatk/datasources/providers/ReadView.java | 2 +- .../datasources/providers/RodLocusView.java | 2 +- .../providers/ShardDataProvider.java | 50 +------ .../BlockDrivenSAMDataSource.java | 2 +- .../sting/gatk/executive/Accumulator.java | 7 +- .../executive/HierarchicalMicroScheduler.java | 8 +- .../gatk/executive/LinearMicroScheduler.java | 15 ++- .../sting/gatk/executive/ShardTraverser.java | 17 ++- .../sting/gatk/executive/WindowMaker.java | 123 +++++++++++++----- .../gatk/traversals/TraversalEngine.java | 17 +-- .../gatk/traversals/TraverseDuplicates.java | 32 ++--- .../sting/gatk/traversals/TraverseLoci.java | 41 ++---- .../gatk/traversals/TraverseLocusWindows.java | 66 +++++----- .../sting/gatk/traversals/TraverseReads.java | 29 ++--- .../sting/gatk/walkers/Walker.java | 2 - .../utils/sam/ArtificialReadsTraversal.java | 10 +- .../providers/AllLocusViewTest.java | 2 +- .../providers/CoveredLocusViewTest.java | 2 +- .../providers/LocusReferenceViewTest.java | 6 +- .../providers/LocusViewTemplate.java | 51 ++++++-- .../providers/ReadReferenceViewTest.java | 4 +- .../providers/ReferenceOrderedViewTest.java | 6 +- .../providers/ShardDataProviderTest.java | 2 +- .../gatk/traversals/TraverseReadsTest.java | 5 +- 30 files changed, 389 insertions(+), 301 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java index ba1d7ad72..26ae8a43a 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java @@ -44,7 +44,7 @@ public class AllLocusView extends LocusView { * Create a new queue of locus contexts. * @param provider */ - public AllLocusView(ShardDataProvider provider) { + public AllLocusView(LocusShardDataProvider provider) { super( provider ); // Seed the state tracking members with the first possible seek position and the first possible locus context. locusIterator = new GenomeLocusIterator(provider.getLocus()); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusView.java index 8491e258e..e1304c968 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusView.java @@ -27,7 +27,7 @@ public class CoveredLocusView extends LocusView { * Create a new queue of locus contexts. * @param provider */ - public CoveredLocusView(ShardDataProvider provider) { + public CoveredLocusView(LocusShardDataProvider provider) { super(provider); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java index a46cafb78..993918b0f 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java @@ -69,7 +69,7 @@ public class LocusReferenceView extends ReferenceView { * the walkers, etc. * @param provider source for locus data. */ - public LocusReferenceView( ShardDataProvider provider ) { + public LocusReferenceView( LocusShardDataProvider provider ) { super(provider); initializeBounds(provider); windowStart = windowStop = 0; @@ -80,7 +80,7 @@ public class LocusReferenceView extends ReferenceView { * Create a new locus reference view. * @param provider source for locus data. */ - public LocusReferenceView( Walker walker, ShardDataProvider provider ) { + public LocusReferenceView( Walker walker, LocusShardDataProvider provider ) { super( provider ); initializeBounds(provider); @@ -137,7 +137,7 @@ public class LocusReferenceView extends ReferenceView { * Initialize the bounds of this shard, trimming the bounds so that they match the reference. * @param provider Provider covering the appropriate locus. */ - private void initializeBounds(ShardDataProvider provider) { + private void initializeBounds(LocusShardDataProvider provider) { if(provider.getLocus() != null) { long sequenceLength = reference.getSequenceDictionary().getSequence(provider.getLocus().getContig()).getSequenceLength(); bounds = GenomeLocParser.createGenomeLoc(provider.getLocus().getContig(), diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java new file mode 100644 index 000000000..168cd98f0 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java @@ -0,0 +1,74 @@ +package org.broadinstitute.sting.gatk.datasources.providers; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import org.broadinstitute.sting.gatk.Reads; + +import java.util.Collection; + +/** + * Presents data sharded by locus to the traversal engine. + * + * @author mhanna + * @version 0.1 + */ +public class LocusShardDataProvider extends ShardDataProvider { + /** + * Information about the source of the read data. + */ + private final Reads sourceInfo; + + /** + * The particular locus for which data is provided. Should be contained within shard.getGenomeLocs(). + */ + private final GenomeLoc locus; + + /** + * The raw collection of reads. + */ + private final LocusIterator locusIterator; + + /** + * Create a data provider for the shard given the reads and reference. + * @param shard The chunk of data over which traversals happen. + * @param reference A getter for a section of the reference. + */ + public LocusShardDataProvider(Shard shard, Reads sourceInfo, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection rods) { + super(shard,reference,rods); + this.sourceInfo = sourceInfo; + this.locus = locus; + this.locusIterator = locusIterator; + } + + /** + * Returns information about the source of the reads. + * @return Info about the source of the reads. + */ + public Reads getSourceInfo() { + return sourceInfo; + } + + /** + * Gets the locus associated with this shard data provider. + * @return The locus. + */ + public GenomeLoc getLocus() { + return locus; + } + + /** + * Gets an iterator over all the reads bound by this shard. + * @return An iterator over all reads in this shard. + */ + public LocusIterator getLocusIterator() { + return locusIterator; + } + + @Override + public void close() { + super.close(); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index f4ec2958f..faa3ca318 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -53,13 +53,12 @@ public abstract class LocusView extends LocusIterator implements View { */ private AlignmentContext nextLocus = null; - public LocusView(ShardDataProvider provider) { + public LocusView(LocusShardDataProvider provider) { this.locus = provider.getLocus(); - Iterator reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc()); - this.sourceInfo = provider.getReadIterator().getSourceInfo(); + this.sourceInfo = provider.getSourceInfo(); + this.loci = provider.getLocusIterator(); - this.loci = new LocusIteratorByState(reads, sourceInfo); seedNextLocus(); provider.register(this); @@ -150,6 +149,7 @@ public abstract class LocusView extends LocusIterator implements View { nextLocus = loci.next(); // If the location of this shard is available, trim the data stream to match the shard. + // TODO: Much of this functionality is being replaced by the WindowMaker. if(locus != null) { // Iterate through any elements not contained within this shard. while( nextLocus != null && !isContainedInShard(nextLocus.getLocation()) && loci.hasNext() ) @@ -169,44 +169,4 @@ public abstract class LocusView extends LocusIterator implements View { private boolean isContainedInShard(GenomeLoc location) { return locus.containsP(location); } - - /** - * Class to filter out un-handle-able reads from the stream. We currently are skipping - * unmapped reads, non-primary reads, unaligned reads, and duplicate reads. - */ - private static class LocusStreamFilterFunc implements SamRecordFilter { - SAMRecord lastRead = null; - public boolean filterOut(SAMRecord rec) { - boolean result = false; - String why = ""; - if (rec.getReadUnmappedFlag()) { - TraversalStatistics.nUnmappedReads++; - result = true; - why = "Unmapped"; - } else if (rec.getNotPrimaryAlignmentFlag()) { - TraversalStatistics.nNotPrimary++; - result = true; - why = "Not Primary"; - } else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) { - TraversalStatistics.nBadAlignments++; - result = true; - why = "No alignment start"; - } else if (rec.getDuplicateReadFlag()) { - TraversalStatistics.nDuplicates++; - result = true; - why = "Duplicate reads"; - } - else { - result = false; - } - - if (result) { - TraversalStatistics.nSkippedReads++; - //System.out.printf(" [filter] %s => %b %s", rec.getReadName(), result, why); - } else { - TraversalStatistics.nReads++; - } - return result; - } - } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java new file mode 100644 index 000000000..0141be312 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java @@ -0,0 +1,57 @@ +package org.broadinstitute.sting.gatk.datasources.providers; + +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; + +import java.util.Collection; + +/** + * Present data sharded by read to a traversal engine. + * + * @author mhanna + * @version 0.1 + */ +public class ReadShardDataProvider extends ShardDataProvider { + /** + * The raw collection of reads. + */ + private final StingSAMIterator reads; + + /** + * Create a data provider for the shard given the reads and reference. + * @param shard The chunk of data over which traversals happen. + * @param reference A getter for a section of the reference. + */ + public ReadShardDataProvider(Shard shard, StingSAMIterator reads, IndexedFastaSequenceFile reference, Collection rods) { + super(shard,reference,rods); + this.reads = reads; + } + + /** + * Can this data source provide reads? + * @return True if reads are available, false otherwise. + */ + public boolean hasReads() { + return reads != null; + } + + /** + * Gets an iterator over all the reads bound by this shard. + * @return An iterator over all reads in this shard. + */ + public StingSAMIterator getReadIterator() { + return reads; + } + + @Override + public void close() { + super.close(); + + if(reads != null) + reads.close(); + } + +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java index 3b8bdf301..0c4b78a7c 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java @@ -32,7 +32,7 @@ public class ReadView implements View, Iterable { * Create a new view of the reads given the current data set. * @param provider Source for the data. */ - public ReadView( ShardDataProvider provider ) { + public ReadView( ReadShardDataProvider provider ) { reads = provider.getReadIterator(); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 0205fed4f..4549b83e1 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -56,7 +56,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { * * @param provider */ - public RodLocusView( ShardDataProvider provider ) { + public RodLocusView( LocusShardDataProvider provider ) { super(provider); GenomeLoc loc = provider.getLocus(); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java index 06e627436..216f14eb7 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java @@ -1,11 +1,9 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.GenomeLoc; import java.util.ArrayList; import java.util.List; @@ -27,7 +25,7 @@ import java.util.Collection; * An umbrella class that examines the data passed to the microscheduler and * tries to assemble as much as possible with it. */ -public class ShardDataProvider { +public abstract class ShardDataProvider { /** * An ArrayList of all the views that are examining this data. */ @@ -38,16 +36,6 @@ public class ShardDataProvider { */ private final Shard shard; - /** - * The particular locus for which data is provided. Should be contained within shard.getGenomeLocs(). - */ - private final GenomeLoc locus; - - /** - * The raw collection of reads. - */ - private final StingSAMIterator reads; - /** * Provider of reference data for this particular shard. */ @@ -66,22 +54,6 @@ public class ShardDataProvider { return shard; } - /** - * Gets the locus associated with this shard data provider. - * @return The locus. - */ - public GenomeLoc getLocus() { - return locus; - } - - /** - * Can this data source provide reads? - * @return True if reads are available, false otherwise. - */ - public boolean hasReads() { - return reads != null; - } - /** * Can this data source provide reference information? * @return True if possible, false otherwise. @@ -90,13 +62,6 @@ public class ShardDataProvider { return reference != null; } - /** - * Gets an iterator over all the reads bound by this shard. - * @return An iterator over all reads in this shard. - */ - public StingSAMIterator getReadIterator() { - return reads; - } /** * Gets a pointer into the given indexed fasta sequence file. @@ -118,13 +83,10 @@ public class ShardDataProvider { /** * Create a data provider for the shard given the reads and reference. * @param shard The chunk of data over which traversals happen. - * @param reads A window into the reads for a given region. * @param reference A getter for a section of the reference. */ - public ShardDataProvider(Shard shard,GenomeLoc locus,StingSAMIterator reads,IndexedFastaSequenceFile reference,Collection rods) { + public ShardDataProvider(Shard shard,IndexedFastaSequenceFile reference,Collection rods) { this.shard = shard; - this.locus = locus; - this.reads = reads; this.reference = reference; this.referenceOrderedData = rods; } @@ -132,10 +94,9 @@ public class ShardDataProvider { /** * Skeletal, package protected constructor for unit tests which require a ShardDataProvider. * @param shard the shard - * @param reads reads iterator. */ - ShardDataProvider(Shard shard,GenomeLoc locus,StingSAMIterator reads) { - this(shard,locus,reads,null,null); + ShardDataProvider(Shard shard) { + this(shard,null,null); } /** @@ -177,9 +138,6 @@ public class ShardDataProvider { // Explicitly purge registered views to ensure that we don't end up with circular references // to views, which can in turn hold state. registeredViews.clear(); - - if(reads != null) - reads.close(); } @Override diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java index 2959e6c9b..53b9061b3 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java @@ -411,7 +411,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { */ public SAMReaders(Reads sourceInfo) { for(File readsFile: sourceInfo.getReadsFiles()) { - SAMFileReader2 reader = new SAMFileReader2(readsFile,true); + SAMFileReader2 reader = new SAMFileReader2(readsFile,false); reader.setValidationStringency(sourceInfo.getValidationStringency()); // If no read group is present, hallucinate one. diff --git a/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java b/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java index c120decd0..03d7a0140 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java @@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.executive; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.StingException; import java.util.ArrayList; import java.util.List; @@ -170,7 +172,10 @@ public abstract class Accumulator { * Create a holder for interval results if none exists. Add the result to the holder. */ public void accumulate( ShardDataProvider provider, Object result ) { - GenomeLoc location = provider.getLocus(); + if(!(provider instanceof LocusShardDataProvider)) + throw new StingException("Unable to reduce by interval on reads traversals at this time."); + + GenomeLoc location = ((LocusShardDataProvider)provider).getLocus(); // Pull the interval iterator ahead to the interval overlapping this shard fragment. while((currentInterval == null || currentInterval.isBefore(location)) && intervalIterator.hasNext()) diff --git a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 8ee821ae7..494c534d6 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -6,11 +6,9 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.io.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor; @@ -277,7 +275,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar * @param walker Walker to apply to the dataset. * @param reduceTree Tree of reduces to which to add this shard traverse. */ - protected Future queueNextShardTraverse( Walker walker, ReduceTree reduceTree ) { + protected void queueNextShardTraverse( Walker walker, ReduceTree reduceTree ) { if (traverseTasks.size() == 0) throw new IllegalStateException("Cannot traverse; no pending traversals exist."); @@ -286,7 +284,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar ShardTraverser traverser = new ShardTraverser(this, traversalEngine, walker, - new ShardDataProvider(shard,shard.getGenomeLocs().get(0),getReadIterator(shard),reference,rods), + shard, outputTracker); Future traverseResult = threadPool.submit(traverser); @@ -298,8 +296,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar // No more data? Let the reduce tree know so it can finish processing what it's got. if (!isShardTraversePending()) reduceTree.complete(); - - return traverseResult; } /** Pulls the next reduce from the queue and runs it. */ diff --git a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index d9aa0e850..807f12b9a 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -1,6 +1,8 @@ package org.broadinstitute.sting.gatk.executive; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; @@ -9,10 +11,15 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.io.DirectOutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; +import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import java.util.Collection; +import net.sf.picard.filter.FilteringIterator; + /** A micro-scheduling manager for single-threaded execution of a traversal. */ public class LinearMicroScheduler extends MicroScheduler { @@ -49,12 +56,10 @@ public class LinearMicroScheduler extends MicroScheduler { for (Shard shard : shardStrategy) { // New experimental code for managing locus intervals. - // TODO: we'll need a similar but slightly different strategy for dealing with read intervals, so generalize this code. - if((shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) && - shard.getGenomeLocs().size() > 0) { + if(shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) { WindowMaker windowMaker = new WindowMaker(getReadIterator(shard),shard.getGenomeLocs()); for(WindowMaker.WindowMakerIterator iterator: windowMaker) { - ShardDataProvider dataProvider = new ShardDataProvider(shard,iterator.getLocus(),iterator,reference,rods); + ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,reference,rods); Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); accumulator.accumulate(dataProvider,result); dataProvider.close(); @@ -62,7 +67,7 @@ public class LinearMicroScheduler extends MicroScheduler { windowMaker.close(); } else { - ShardDataProvider dataProvider = new ShardDataProvider(shard,null,getReadIterator(shard),reference,rods); + ShardDataProvider dataProvider = new ReadShardDataProvider(shard,getReadIterator(shard),reference,rods); Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); accumulator.accumulate(dataProvider,result); dataProvider.close(); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index abaf00ff6..1330f6d3c 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -1,10 +1,12 @@ package org.broadinstitute.sting.gatk.executive; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.utils.StingException; import java.util.concurrent.Callable; @@ -26,8 +28,8 @@ import java.util.concurrent.Callable; public class ShardTraverser implements Callable { private HierarchicalMicroScheduler microScheduler; private Walker walker; + private Shard shard; private TraversalEngine traversalEngine; - private ShardDataProvider dataProvider; private ThreadLocalOutputTracker outputTracker; private OutputMergeTask outputMergeTask; @@ -39,12 +41,12 @@ public class ShardTraverser implements Callable { public ShardTraverser( HierarchicalMicroScheduler microScheduler, TraversalEngine traversalEngine, Walker walker, - ShardDataProvider dataProvider, + Shard shard, ThreadLocalOutputTracker outputTracker ) { this.microScheduler = microScheduler; this.walker = walker; this.traversalEngine = traversalEngine; - this.dataProvider = dataProvider; + this.shard = shard; this.outputTracker = outputTracker; } @@ -52,11 +54,16 @@ public class ShardTraverser implements Callable { long startTime = System.currentTimeMillis(); Object accumulator = walker.reduceInit(); + WindowMaker windowMaker = new WindowMaker(microScheduler.getReadIterator(shard),shard.getGenomeLocs()); try { - accumulator = traversalEngine.traverse( walker, dataProvider, accumulator ); + for(WindowMaker.WindowMakerIterator iterator: windowMaker) { + ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods); + accumulator = traversalEngine.traverse( walker, dataProvider, accumulator ); + dataProvider.close(); + } } finally { - dataProvider.close(); + windowMaker.close(); outputMergeTask = outputTracker.closeStorage(); synchronized(this) { diff --git a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index 3314aa669..a6945e2b3 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -2,12 +2,19 @@ package org.broadinstitute.sting.gatk.executive; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; +import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker; import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.*; import net.sf.samtools.SAMRecord; import net.sf.picard.util.PeekableIterator; +import net.sf.picard.filter.FilteringIterator; +import net.sf.picard.filter.SamRecordFilter; /** * Buffer shards of data which may or may not contain multiple loci into @@ -23,10 +30,20 @@ public class WindowMaker implements Iterable, I */ private final Reads sourceInfo; + /** + * Hold the read iterator so that it can be closed later. + */ + private final StingSAMIterator readIterator; + + /** + * The locus overflow tracker. + */ + private final LocusOverflowTracker locusOverflowTracker; + /** * The data source for reads. Will probably come directly from the BAM file. */ - private final PeekableIterator sourceIterator; + private final PeekableIterator sourceIterator; /** * Stores the sequence of intervals that the windowmaker should be tracking. @@ -34,9 +51,9 @@ public class WindowMaker implements Iterable, I private final PeekableIterator intervalIterator; /** - * Which reads should be saved to go into the next interval? + * In the case of monolithic sharding, this case returns whether the only shard has been generated. */ - private Queue overlappingReads = new ArrayDeque(); + private boolean shardGenerated = false; /** * Create a new window maker with the given iterator as a data source, covering @@ -46,8 +63,13 @@ public class WindowMaker implements Iterable, I */ public WindowMaker(StingSAMIterator iterator, List intervals) { this.sourceInfo = iterator.getSourceInfo(); - this.sourceIterator = new PeekableIterator(iterator); - this.intervalIterator = new PeekableIterator(intervals.iterator()); + this.readIterator = iterator; + + LocusIterator locusIterator = new LocusIteratorByState(new FilteringIterator(iterator,new LocusStreamFilterFunc()),sourceInfo); + this.locusOverflowTracker = locusIterator.getLocusOverflowTracker(); + + this.sourceIterator = new PeekableIterator(locusIterator); + this.intervalIterator = intervals.size()>0 ? new PeekableIterator(intervals.iterator()) : null; } public Iterator iterator() { @@ -55,11 +77,12 @@ public class WindowMaker implements Iterable, I } public boolean hasNext() { - return intervalIterator.hasNext(); + return (intervalIterator != null && intervalIterator.hasNext()) || !shardGenerated; } public WindowMakerIterator next() { - return new WindowMakerIterator(intervalIterator.next()); + shardGenerated = true; + return new WindowMakerIterator(intervalIterator != null ? intervalIterator.next() : null); } public void remove() { @@ -67,22 +90,18 @@ public class WindowMaker implements Iterable, I } public void close() { - this.sourceIterator.close(); + this.readIterator.close(); } - public class WindowMakerIterator implements StingSAMIterator { + public class WindowMakerIterator extends LocusIterator { /** * The locus for which this iterator is currently returning reads. */ private final GenomeLoc locus; - /** - * Which reads should be saved to go into the next interval? - */ - private final Queue pendingOverlaps = new ArrayDeque(); - public WindowMakerIterator(GenomeLoc locus) { this.locus = locus; + seedNextLocus(); } public Reads getSourceInfo() { @@ -98,32 +117,68 @@ public class WindowMaker implements Iterable, I } public boolean hasNext() { - if(overlappingReads.size() > 0) return true; - if(sourceIterator.hasNext()) { - SAMRecord nextRead = sourceIterator.peek(); - if((nextRead.getAlignmentStart() >= locus.getStart() && nextRead.getAlignmentStart() <= locus.getStop()) || - (nextRead.getAlignmentEnd() >= locus.getStart() && nextRead.getAlignmentEnd() <= locus.getStop()) || - (nextRead.getAlignmentStart() < locus.getStart() && nextRead.getAlignmentEnd() > locus.getStop())) - return true; - - } - return false; + // locus == null when doing monolithic sharding. + // TODO: Move the monolithic sharding iterator so that we don't have to special case here. + return sourceIterator.hasNext() && (locus == null || sourceIterator.peek().getLocation().overlapsP(locus)); } - public SAMRecord next() { + public AlignmentContext next() { if(!hasNext()) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval."); - SAMRecord nextRead = overlappingReads.size() > 0 ? overlappingReads.remove() : sourceIterator.next(); - if(intervalIterator.hasNext() && nextRead.getAlignmentEnd() >= intervalIterator.peek().getStart()) - pendingOverlaps.add(nextRead); - return nextRead; + return sourceIterator.next(); } - public void close() { - overlappingReads = pendingOverlaps; + public LocusOverflowTracker getLocusOverflowTracker() { + return locusOverflowTracker; } - public void remove() { - throw new UnsupportedOperationException("Unable to remove from a window maker iterator."); - } + public void seedNextLocus() { + // locus == null when doing monolithic sharding. + // TODO: Move the monolithic sharding iterator so that we don't have to special case here. + if(locus == null) return; + + while(sourceIterator.hasNext() && sourceIterator.peek().getLocation().isBefore(locus)) + sourceIterator.next(); + } } + + /** + * Class to filter out un-handle-able reads from the stream. We currently are skipping + * unmapped reads, non-primary reads, unaligned reads, and duplicate reads. + */ + private static class LocusStreamFilterFunc implements SamRecordFilter { + SAMRecord lastRead = null; + public boolean filterOut(SAMRecord rec) { + boolean result = false; + String why = ""; + if (rec.getReadUnmappedFlag()) { + TraversalStatistics.nUnmappedReads++; + result = true; + why = "Unmapped"; + } else if (rec.getNotPrimaryAlignmentFlag()) { + TraversalStatistics.nNotPrimary++; + result = true; + why = "Not Primary"; + } else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) { + TraversalStatistics.nBadAlignments++; + result = true; + why = "No alignment start"; + } else if (rec.getDuplicateReadFlag()) { + TraversalStatistics.nDuplicates++; + result = true; + why = "Duplicate reads"; + } + else { + result = false; + } + + if (result) { + TraversalStatistics.nSkippedReads++; + //System.out.printf(" [filter] %s => %b %s", rec.getReadName(), result, why); + } else { + TraversalStatistics.nReads++; + } + return result; + } + } + } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 68ba501fc..bc9e72738 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -2,11 +2,10 @@ package org.broadinstitute.sting.gatk.traversals; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; -public abstract class TraversalEngine { +public abstract class TraversalEngine,ProviderType extends ShardDataProvider> { // Time in milliseconds since we initialized this engine private long startTime = -1; private long lastProgressPrintTime = -1; // When was the last time we printed our progress? @@ -76,18 +75,16 @@ public abstract class TraversalEngine { * A passthrough method so that subclasses can report which types of traversals they're using. * * @param sum Result of the computation. - * @param Type of the computation. */ - public abstract void printOnTraversalDone(T sum); + public abstract void printOnTraversalDone(T sum); /** * Called after a traversal to print out information about the traversal process * * @param type describing this type of traversal * @param sum The reduce result of the traversal - * @param ReduceType of the traversal */ - protected void printOnTraversalDone(final String type, T sum) { + protected void printOnTraversalDone(final String type, T sum) { printProgress(true, type, null); logger.info("Traversal reduce result is " + sum); final long curTime = System.currentTimeMillis(); @@ -115,12 +112,10 @@ public abstract class TraversalEngine { * @param walker the walker to run with * @param dataProvider the data provider that generates data given the shard * @param sum the accumulator - * @param an object of the map type - * @param an object of the reduce type * * @return an object of the reduce type */ - public abstract T traverse(Walker walker, - ShardDataProvider dataProvider, - T sum); + public abstract T traverse(WalkerType walker, + ProviderType dataProvider, + T sum); } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index 2e2934d0e..1075cb30a 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -31,17 +31,11 @@ import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.datasources.providers.ReadView; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.providers.ManagingReferenceOrderedView; -import org.broadinstitute.sting.gatk.datasources.shards.ReadShard; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.walkers.DuplicateWalker; -import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.*; @@ -54,7 +48,7 @@ import java.util.*; *

* This class handles traversing lists of duplicate reads in the new shardable style */ -public class TraverseDuplicates extends TraversalEngine { +public class TraverseDuplicates extends TraversalEngine,ReadShardDataProvider> { /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraverseDuplicates.class); @@ -196,19 +190,12 @@ public class TraverseDuplicates extends TraversalEngine { * * @param walker the walker to execute over * @param sum of type T, the return from the walker - * @param the generic type - * @param the return type of the reduce function * * @return the result type T, the product of all the reduce calls */ - public T traverse(Walker walker, - ShardDataProvider dataProvider, - T sum) { - // safety first :-) - if (!(walker instanceof DuplicateWalker)) - throw new IllegalArgumentException("Walker isn't a duplicate walker!"); - DuplicateWalker dupWalker = (DuplicateWalker) walker; - + public T traverse(DuplicateWalker walker, + ReadShardDataProvider dataProvider, + T sum) { FilteringIterator filterIter = new FilteringIterator(new ReadView(dataProvider).iterator(), new duplicateStreamFilterFunc()); PushbackIterator iter = new PushbackIterator(filterIter); @@ -233,10 +220,10 @@ public class TraverseDuplicates extends TraversalEngine { TraversalStatistics.nRecords++; // actually call filter and map, accumulating sum - final boolean keepMeP = dupWalker.filter(site, locus, readSets); + final boolean keepMeP = walker.filter(site, locus, readSets); if (keepMeP) { - M x = dupWalker.map(site, locus, readSets); - sum = dupWalker.reduce(x, sum); + M x = walker.map(site, locus, readSets); + sum = walker.reduce(x, sum); } printProgress(DUPS_STRING, site); @@ -254,9 +241,8 @@ public class TraverseDuplicates extends TraversalEngine { * Temporary override of printOnTraversalDone. * * @param sum Result of the computation. - * @param Type of the result. */ - public void printOnTraversalDone(T sum) { + public void printOnTraversalDone(T sum) { printOnTraversalDone(DUPS_STRING, sum); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index b5a7a1aa1..f4cd660d8 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -2,26 +2,21 @@ package org.broadinstitute.sting.gatk.traversals; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.WalkerManager; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.providers.*; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.ArrayList; - /** * A simple solution to iterating over all reference positions over a series of genomic locations. */ -public class TraverseLoci extends TraversalEngine { +public class TraverseLoci extends TraversalEngine,LocusShardDataProvider> { final private static String LOCI_STRING = "sites"; /** @@ -29,24 +24,12 @@ public class TraverseLoci extends TraversalEngine { */ protected static Logger logger = Logger.getLogger(TraversalEngine.class); - public T traverse(Walker walker, ArrayList locations) { - if ( locations.isEmpty() ) - Utils.scareUser("Requested all locations be processed without providing locations to be processed!"); - - throw new UnsupportedOperationException("This traversal type not supported by TraverseLoci"); - } - @Override - public T traverse( Walker walker, - ShardDataProvider dataProvider, - T sum ) { + public T traverse( LocusWalker walker, + LocusShardDataProvider dataProvider, + T sum ) { logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider)); - if ( !(walker instanceof LocusWalker) ) - throw new IllegalArgumentException("Walker isn't a loci walker!"); - - LocusWalker locusWalker = (LocusWalker)walker; - LocusView locusView = getLocusView( walker, dataProvider ); if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all @@ -87,10 +70,10 @@ public class TraverseLoci extends TraversalEngine { // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). ReferenceContext refContext = referenceView.getReferenceContext(location); - final boolean keepMeP = locusWalker.filter(tracker, refContext, locus); + final boolean keepMeP = walker.filter(tracker, refContext, locus); if (keepMeP) { - M x = locusWalker.map(tracker, refContext, locus); - sum = locusWalker.reduce(x, sum); + M x = walker.map(tracker, refContext, locus); + sum = walker.reduce(x, sum); } if (this.maximumIterations > 0 && TraversalStatistics.nRecords > this.maximumIterations) { @@ -110,8 +93,8 @@ public class TraverseLoci extends TraversalEngine { if ( nSkipped > 0 ) { GenomeLoc site = rodLocusView.getLocOneBeyondShard(); AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileup(site), nSkipped); - M x = locusWalker.map(null, null, ac); - sum = locusWalker.reduce(x, sum); + M x = walker.map(null, null, ac); + sum = walker.reduce(x, sum); } } @@ -122,9 +105,8 @@ public class TraverseLoci extends TraversalEngine { * Temporary override of printOnTraversalDone. * * @param sum Result of the computation. - * @param Type of the result. */ - public void printOnTraversalDone( T sum ) { + public void printOnTraversalDone( T sum ) { printOnTraversalDone(LOCI_STRING, sum ); } @@ -132,8 +114,9 @@ public class TraverseLoci extends TraversalEngine { * Gets the best view of loci for this walker given the available data. * @param walker walker to interrogate. * @param dataProvider Data which which to drive the locus view. + * @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal. */ - private LocusView getLocusView( Walker walker, ShardDataProvider dataProvider ) { + private LocusView getLocusView( Walker walker, LocusShardDataProvider dataProvider ) { DataSource dataSource = WalkerManager.getWalkerDataSource(walker); if( dataSource == DataSource.READS ) return new CoveredLocusView(dataProvider); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java index cacca751c..f1e65c1bd 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java @@ -1,20 +1,17 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.datasources.providers.*; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker; -import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.StingException; -import java.util.ArrayList; -import java.util.List; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -23,26 +20,20 @@ import java.util.List; * Time: 10:26:03 AM * To change this template use File | Settings | File Templates. */ -public class TraverseLocusWindows extends TraversalEngine { +public class TraverseLocusWindows extends TraversalEngine,LocusShardDataProvider> { /** descriptor of the type */ private static final String LOCUS_WINDOW_STRING = "intervals"; - public T traverse( Walker walker, - ShardDataProvider dataProvider, - T sum ) { - - if ( !(walker instanceof LocusWindowWalker) ) - throw new IllegalArgumentException("Walker isn't a locus window walker!"); - - LocusWindowWalker locusWindowWalker = (LocusWindowWalker)walker; + public T traverse( LocusWindowWalker walker, + LocusShardDataProvider dataProvider, + T sum ) { GenomeLoc interval = dataProvider.getLocus(); - ReadView readView = new ReadView( dataProvider ); LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider ); ReferenceOrderedView referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider ); - Pair> locus = getLocusContext(readView.iterator(), interval); + Pair> locus = getLocusContext(dataProvider.getLocusIterator(), interval); // The TraverseByLocusWindow expands intervals to cover all reads in a non-standard way. // TODO: Convert this approach to the standard. @@ -58,8 +49,8 @@ public class TraverseLocusWindows extends TraversalEngine { // //final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus); //if (keepMeP) { - M x = locusWindowWalker.map(tracker, referenceSubsequence, locus.getFirst(), locus.getSecond()); - sum = locusWindowWalker.reduce(x, sum); + M x = walker.map(tracker, referenceSubsequence, locus.getFirst(), locus.getSecond()); + sum = walker.reduce(x, sum); //} printProgress(LOCUS_WINDOW_STRING, locus.getFirst()); @@ -67,26 +58,34 @@ public class TraverseLocusWindows extends TraversalEngine { return sum; } - private Pair> getLocusContext(StingSAMIterator readIter, GenomeLoc interval) { - ArrayList reads = new ArrayList(); + private Pair> getLocusContext(LocusIterator locusIter, GenomeLoc interval) { + List reads = new ArrayList(); boolean done = false; long leftmostIndex = interval.getStart(), rightmostIndex = interval.getStop(); - while (readIter.hasNext() && !done) { - TraversalStatistics.nRecords++; - SAMRecord read = readIter.next(); - reads.add(read); - if ( read.getAlignmentStart() < leftmostIndex ) - leftmostIndex = read.getAlignmentStart(); - if ( read.getAlignmentEnd() > rightmostIndex ) - rightmostIndex = read.getAlignmentEnd(); - if ( this.maximumIterations > 0 && TraversalStatistics.nRecords > this.maximumIterations) { - logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords)); - done = true; + while(locusIter.hasNext() && !done) { + AlignmentContext alignment = locusIter.next(); + Iterator readIter = alignment.getReads().iterator(); + + while (readIter.hasNext() && !done) { + TraversalStatistics.nRecords++; + + SAMRecord read = readIter.next(); + if(reads.contains(read)) continue; + reads.add(read); + if ( read.getAlignmentStart() < leftmostIndex ) + leftmostIndex = read.getAlignmentStart(); + if ( read.getAlignmentEnd() > rightmostIndex ) + rightmostIndex = read.getAlignmentEnd(); + if ( this.maximumIterations > 0 && TraversalStatistics.nRecords > this.maximumIterations) { + logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords)); + done = true; + } } } + GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex); // AlignmentContext locus = new AlignmentContext(window, reads, null); // if ( readIter.getSourceInfo().getDownsampleToCoverage() != null ) @@ -99,9 +98,8 @@ public class TraverseLocusWindows extends TraversalEngine { * Temporary override of printOnTraversalDone. * TODO: Add some sort of TE.getName() function once all TraversalEngines are ported. * @param sum Result of the computation. - * @param Type of the result. */ - public void printOnTraversalDone( T sum ) { + public void printOnTraversalDone( T sum ) { printOnTraversalDone(LOCUS_WINDOW_STRING, sum ); } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index 58e1d319d..01a0c2cd8 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -3,14 +3,10 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.WalkerManager; -import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView; -import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView; -import org.broadinstitute.sting.gatk.datasources.providers.ReadView; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.*; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLocParser; /* @@ -47,7 +43,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; *

* This class handles traversing by reads in the new shardable style */ -public class TraverseReads extends TraversalEngine { +public class TraverseReads extends TraversalEngine,ReadShardDataProvider> { /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraverseReads.class); @@ -60,23 +56,17 @@ public class TraverseReads extends TraversalEngine { * @param walker the walker to traverse with * @param dataProvider the provider of the reads data * @param sum the value of type T, specified by the walker, to feed to the walkers reduce function - * @param the map type of the walker - * @param the reduce type of the walker * @return the reduce variable of the read walker */ - public T traverse(Walker walker, - ShardDataProvider dataProvider, - T sum) { + public T traverse(ReadWalker walker, + ReadShardDataProvider dataProvider, + T sum) { logger.debug(String.format("TraverseReads.traverse Covered dataset is %s", dataProvider)); - if (!(walker instanceof ReadWalker)) - throw new IllegalArgumentException("Walker isn't a read walker!"); - if( !dataProvider.hasReads() ) throw new IllegalArgumentException("Unable to traverse reads; no read data is available."); - ReadWalker readWalker = (ReadWalker) walker; boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES); ReadView reads = new ReadView(dataProvider); @@ -101,10 +91,10 @@ public class TraverseReads extends TraversalEngine { // if the read is mapped, create a metadata tracker ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null; - final boolean keepMeP = readWalker.filter(refSeq, read); + final boolean keepMeP = walker.filter(refSeq, read); if (keepMeP) { - M x = readWalker.map(refSeq, read, tracker); // the tracker can be null - sum = readWalker.reduce(x, sum); + M x = walker.map(refSeq, read, tracker); // the tracker can be null + sum = walker.reduce(x, sum); } printProgress(READS_STRING, @@ -119,9 +109,8 @@ public class TraverseReads extends TraversalEngine { * Temporary override of printOnTraversalDone. * TODO: Add some sort of TE.getName() function once all TraversalEngines are ported. * @param sum Result of the computation. - * @param Type of the result. */ - public void printOnTraversalDone( T sum ) { + public void printOnTraversalDone( T sum ) { printOnTraversalDone(READS_STRING, sum ); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index e33421488..bdbc0b666 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -16,8 +16,6 @@ import org.apache.log4j.Logger; * To change this template use File | Settings | File Templates. */ public abstract class Walker { - // TODO: Can a walker be templatized so that map and reduce live here? - protected static Logger logger = Logger.getLogger(Walker.class); /** diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java index e3af57cdf..b58398a15 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java @@ -4,7 +4,6 @@ import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.apache.log4j.Logger; @@ -43,7 +42,7 @@ import net.sf.samtools.SAMFileHeader; * * this class acts as a fake reads traversal engine for testing out reads based traversals. */ -public class ArtificialReadsTraversal extends TraversalEngine { +public class ArtificialReadsTraversal extends TraversalEngine,ShardDataProvider> { public int startingChr = 1; public int endingChr = 5; @@ -77,12 +76,10 @@ public class ArtificialReadsTraversal extends TraversalEngine { * @param walker the walker to traverse with * @param dataProvider the provider of the reads data * @param sum the value of type T, specified by the walker, to feed to the walkers reduce function - * @param the map type of the walker - * @param the reduce type of the walker * * @return the reduce variable of the read walker */ - public T traverse( Walker walker, + public T traverse( Walker walker, ShardDataProvider dataProvider, T sum ) { @@ -126,9 +123,8 @@ public class ArtificialReadsTraversal extends TraversalEngine { * TODO: Add some sort of TE.getName() function once all TraversalEngines are ported. * * @param sum Result of the computation. - * @param Type of the result. */ - public void printOnTraversalDone( T sum ) { + public void printOnTraversalDone( T sum ) { printOnTraversalDone("reads", sum); } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewTest.java index f320c87ba..5797dcd4a 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewTest.java @@ -26,7 +26,7 @@ import java.util.List; public class AllLocusViewTest extends LocusViewTemplate { @Override - protected LocusView createView(ShardDataProvider provider) { + protected LocusView createView(LocusShardDataProvider provider) { return new AllLocusView(provider); } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewTest.java index 085dced20..7edb0f854 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewTest.java @@ -29,7 +29,7 @@ public class CoveredLocusViewTest extends LocusViewTemplate { * Retrieve a covered locus view. */ @Override - protected LocusView createView(ShardDataProvider provider) { + protected LocusView createView(LocusShardDataProvider provider) { return new CoveredLocusView(provider); } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewTest.java index 2e7b7b646..9aef9e56a 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewTest.java @@ -58,7 +58,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate { public void testOverlappingReferenceBases() { Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length()))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, sequenceFile, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); char[] results = view.getReferenceBases(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length() + 9)); @@ -75,7 +75,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate { public void testBoundsFailure() { Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, 1, 50))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, sequenceFile, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); view.getReferenceContext(GenomeLocParser.createGenomeLoc(0, 51)).getBase(); @@ -91,7 +91,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate { Shard shard = new LocusShard(Collections.singletonList(loc)); GenomeLocusIterator shardIterator = new GenomeLocusIterator(loc); - ShardDataProvider dataProvider = new ShardDataProvider(shard, loc, null, sequenceFile, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, loc, null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); while (shardIterator.hasNext()) { diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index 8cb9c16f3..db326675b 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -5,6 +5,7 @@ import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.*; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.executive.WindowMaker; import org.broadinstitute.sting.gatk.datasources.shards.LocusShard; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; @@ -46,7 +47,9 @@ public abstract class LocusViewTemplate extends BaseTest { GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); Shard shard = new LocusShard(Collections.singletonList(shardBounds)); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -60,7 +63,9 @@ public abstract class LocusViewTemplate extends BaseTest { GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); Shard shard = new LocusShard(Collections.singletonList(shardBounds)); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -73,7 +78,9 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -85,7 +92,9 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -97,7 +106,9 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -109,7 +120,9 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -121,7 +134,9 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -134,7 +149,9 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -151,7 +168,9 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -168,7 +187,9 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -187,7 +208,9 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -213,7 +236,9 @@ public abstract class LocusViewTemplate extends BaseTest { read07, read08, read09, read10, read11, read12); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -227,7 +252,7 @@ public abstract class LocusViewTemplate extends BaseTest { * * @return The correct view to test. */ - protected abstract LocusView createView(ShardDataProvider provider); + protected abstract LocusView createView(LocusShardDataProvider provider); /** * Test the reads according to an independently derived context. diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewTest.java index f378f68cf..5ff4842e6 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewTest.java @@ -70,7 +70,7 @@ public class ReadReferenceViewTest extends ReferenceViewTemplate { final long contigStart = selectedContig.getSequenceLength() - (readLength - overlap - 1); final long contigStop = selectedContig.getSequenceLength() + overlap; - ShardDataProvider dataProvider = new ShardDataProvider(null,null,null,sequenceFile,null); + ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,null,sequenceFile,null); ReadReferenceView view = new ReadReferenceView(dataProvider); SAMRecord rec = buildSAMRecord(selectedContig.getSequenceName(),(int)contigStart,(int)contigStop); @@ -97,7 +97,7 @@ public class ReadReferenceViewTest extends ReferenceViewTemplate { protected void validateLocation( GenomeLoc loc ) { SAMRecord read = buildSAMRecord( loc.getContig(), (int)loc.getStart(), (int)loc.getStop() ); - ShardDataProvider dataProvider = new ShardDataProvider(null,null,null,sequenceFile,null); + ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,null,sequenceFile,null); ReadReferenceView view = new ReadReferenceView(dataProvider); ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(loc.getContig(),loc.getStart(),loc.getStop()); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java index 858cff690..81e1e7b7f 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java @@ -53,7 +53,7 @@ public class ReferenceOrderedViewTest extends BaseTest { @Test public void testNoBindings() { Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); - ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Collections.emptyList()); + ShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Collections.emptyList()); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",10)); @@ -71,7 +71,7 @@ public class ReferenceOrderedViewTest extends BaseTest { Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); - ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource)); + ShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource)); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20)); @@ -97,7 +97,7 @@ public class ReferenceOrderedViewTest extends BaseTest { Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); - ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2)); + ShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2)); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20)); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderTest.java index faf20c7c9..5ccca1961 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderTest.java @@ -34,7 +34,7 @@ public class ShardDataProviderTest extends BaseTest { @Before public void createProvider() { - provider = new ShardDataProvider( null,null,null,null,null ); + provider = new LocusShardDataProvider( null,null,null,null,null,null ); } /** diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java index 39d12aa5d..69a6195dc 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java @@ -4,6 +4,7 @@ import net.sf.picard.reference.ReferenceSequenceFile; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; @@ -132,7 +133,7 @@ public class TraverseReadsTest extends BaseTest { fail("Shard == null"); } - ShardDataProvider dataProvider = new ShardDataProvider(shard,null,dataSource.seek(shard),null,null); + ShardDataProvider dataProvider = new ReadShardDataProvider(shard,dataSource.seek(shard),null,null); accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator); dataProvider.close(); @@ -178,7 +179,7 @@ public class TraverseReadsTest extends BaseTest { fail("Shard == null"); } - ShardDataProvider dataProvider = new ShardDataProvider(shard,null,dataSource.seek(shard),null,null); + ShardDataProvider dataProvider = new ReadShardDataProvider(shard,dataSource.seek(shard),null,null); accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator); dataProvider.close(); }