From 199b43fcf2abb365a7dc5bf9907f49b749a784e5 Mon Sep 17 00:00:00 2001 From: hanna Date: Thu, 25 Feb 2010 00:16:50 +0000 Subject: [PATCH] Reduce by interval alterations to interface with new sharding system. This checkin with be followed by a simplification of some of the locus traversal code. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2886 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GenomeAnalysisEngine.java | 22 +++- .../datasources/providers/AllLocusView.java | 3 +- .../providers/LocusReferenceView.java | 14 +-- .../gatk/datasources/providers/LocusView.java | 17 +-- .../datasources/providers/RodLocusView.java | 16 ++- .../providers/ShardDataProvider.java | 39 ++++-- .../IndexDrivenSAMDataSource.java | 8 +- .../simpleDataSources/ReadStreamPointer.java | 14 +-- .../ReferenceOrderedDataSource.java | 2 +- .../simpleDataSources/ResourcePool.java | 29 +---- .../sting/gatk/executive/Accumulator.java | 54 ++++++-- .../executive/HierarchicalMicroScheduler.java | 11 +- .../gatk/executive/LinearMicroScheduler.java | 33 +++-- .../sting/gatk/executive/MicroScheduler.java | 36 +++--- .../sting/gatk/executive/ShardTraverser.java | 5 +- .../sting/gatk/executive/WindowMaker.java | 116 ++++++++++++++++++ .../gatk/traversals/TraversalEngine.java | 2 - .../gatk/traversals/TraverseDuplicates.java | 2 - .../sting/gatk/traversals/TraverseLoci.java | 3 +- .../gatk/traversals/TraverseLocusWindows.java | 5 +- .../sting/gatk/traversals/TraverseReads.java | 7 +- .../utils/sam/ArtificialReadsTraversal.java | 2 - .../providers/LocusReferenceViewTest.java | 6 +- .../providers/LocusViewTemplate.java | 24 ++-- .../providers/ReadReferenceViewTest.java | 4 +- .../providers/ReferenceOrderedViewTest.java | 6 +- .../providers/ShardDataProviderTest.java | 2 +- .../ArtificialResourcePool.java | 2 +- .../ReferenceOrderedDataPoolTest.java | 14 +-- .../gatk/traversals/TraverseReadsTest.java | 8 +- .../gatk/walkers/PrintReadsWalkerTest.java | 2 +- 31 files changed, 323 insertions(+), 185 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 9b85ccb8a..aea61ba0a 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -88,6 +88,11 @@ public class GenomeAnalysisEngine { */ private Map inputs = new HashMap(); + /** + * Collection of intervals used by the walker. + */ + private GenomeLocSortedSet intervals = null; + /** * Collection of outputs used by the walker. */ @@ -156,14 +161,13 @@ public class GenomeAnalysisEngine { // create the output streams initializeOutputStreams(my_walker, microScheduler.getOutputTracker()); - GenomeLocSortedSet locs = null; if (argCollection.intervals != null && argCollection.intervalMerging.check()) { - locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals)); + intervals = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals)); } ShardStrategy shardStrategy = getShardStrategy(my_walker, microScheduler.getReference(), - locs, + intervals, argCollection.maximumEngineIterations, readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null); @@ -281,11 +285,11 @@ public class GenomeAnalysisEngine { // we need to verify different parameter based on the walker type if (my_walker instanceof LocusWalker || my_walker instanceof LocusWindowWalker) { // create the MicroScheduler - microScheduler = MicroScheduler.create(my_walker, readsDataSource, referenceDataSource, rodDataSources, argCollection.numberOfThreads); + microScheduler = MicroScheduler.create(this,my_walker,readsDataSource,referenceDataSource,rodDataSources,argCollection.numberOfThreads); } else if (my_walker instanceof ReadWalker || my_walker instanceof DuplicateWalker) { if (argCollection.referenceFile == null) Utils.scareUser(String.format("Read-based traversals require a reference file but none was given")); - microScheduler = MicroScheduler.create(my_walker, readsDataSource, referenceDataSource, rodDataSources, argCollection.numberOfThreads); + microScheduler = MicroScheduler.create(this,my_walker,readsDataSource,referenceDataSource,rodDataSources,argCollection.numberOfThreads); } else { Utils.scareUser(String.format("Unable to create the appropriate TraversalEngine for analysis type %s", walkerManager.getName(my_walker.getClass()))); } @@ -803,6 +807,14 @@ public class GenomeAnalysisEngine { return this.argCollection; } + /** + * Get the list of intervals passed to the engine. + * @return List of intervals. + */ + public GenomeLocSortedSet getIntervals() { + return this.intervals; + } + /** * Gets the list of filters employed by this walker. * @return Collection of filters (actual instances) used by this walker. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java index 6770f8e7d..d4978373a 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import java.util.NoSuchElementException; import java.util.ArrayList; +import java.util.Collections; import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -46,7 +47,7 @@ public class AllLocusView extends LocusView { public AllLocusView(ShardDataProvider provider) { super( provider ); // Seed the state tracking members with the first possible seek position and the first possible locus context. - locusIterator = new GenomeLocusIterator( provider.getShard().getGenomeLocs() ); + locusIterator = new GenomeLocusIterator( Collections.singletonList(provider.getLocus()) ); if( locusIterator.hasNext() ) { nextPosition = locusIterator.next(); nextLocus = hasNextLocus() ? nextLocus() : createEmptyLocus(nextPosition); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java index aa5ec272e..64b38bf6b 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java @@ -134,19 +134,7 @@ public class LocusReferenceView extends ReferenceView { } private void initializeBounds(ShardDataProvider provider) { - List loci = provider.getShard().getGenomeLocs(); - - if(loci.isEmpty()) { - bounds = null; - return; - } - - GenomeLoc firstLocus = loci.get(0); - GenomeLoc lastLocus = loci.get(loci.size()-1); - if(firstLocus.getContigIndex() != lastLocus.getContigIndex()) - throw new StingException("LocusReferenceView currently only supports multiple intervals on the same contig."); - - bounds = GenomeLocParser.createGenomeLoc(firstLocus.getContig(),firstLocus.getStart(),lastLocus.getStop()); + bounds = provider.getLocus(); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index 4e5e2cc1b..f4ec2958f 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -5,7 +5,6 @@ import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; @@ -34,9 +33,9 @@ import java.util.NoSuchElementException; public abstract class LocusView extends LocusIterator implements View { /** - * The shard bounding this view. + * The locus bounding this view. */ - protected Shard shard; + protected GenomeLoc locus; /** * Source info for this view. Informs the class about downsampling requirements. @@ -55,7 +54,7 @@ public abstract class LocusView extends LocusIterator implements View { private AlignmentContext nextLocus = null; public LocusView(ShardDataProvider provider) { - this.shard = provider.getShard(); + this.locus = provider.getLocus(); Iterator reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc()); this.sourceInfo = provider.getReadIterator().getSourceInfo(); @@ -79,7 +78,7 @@ public abstract class LocusView extends LocusIterator implements View { */ public void close() { // Set everything to null with the hope of failing fast. - shard = null; + locus = null; sourceInfo = null; loci = null; @@ -151,7 +150,7 @@ public abstract class LocusView extends LocusIterator implements View { nextLocus = loci.next(); // If the location of this shard is available, trim the data stream to match the shard. - if(!shard.getGenomeLocs().isEmpty()) { + if(locus != null) { // Iterate through any elements not contained within this shard. while( nextLocus != null && !isContainedInShard(nextLocus.getLocation()) && loci.hasNext() ) nextLocus = loci.next(); @@ -168,11 +167,7 @@ public abstract class LocusView extends LocusIterator implements View { * @return True if the given location is contained within the shard. False otherwise. */ private boolean isContainedInShard(GenomeLoc location) { - for(GenomeLoc shardLocation: shard.getGenomeLocs()) { - if(shardLocation.containsP(location)) - return true; - } - return false; + return locus.containsP(location); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 17db1ab4f..990cc9346 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -57,17 +57,17 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { public RodLocusView( ShardDataProvider provider ) { super(provider); - GenomeLoc firstLoc = provider.getShard().getGenomeLocs().get(0); + GenomeLoc loc = provider.getLocus(); List< Iterator> > iterators = new LinkedList< Iterator> >(); for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) { - if ( DEBUG ) System.out.printf("Shard is %s%n", provider.getShard().getGenomeLocs()); + if ( DEBUG ) System.out.printf("Shard is %s%n", provider.getLocus()); // grab the ROD iterator from the data source, and compute the first location in this shard, forwarding // the iterator to immediately before it, so that it can be added to the merging iterator primed for // next() to return the first real ROD in this shard SeekableRODIterator it = (SeekableRODIterator)dataSource.seek(provider.getShard()); - it.seekForward(GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(), firstLoc.getStart()-1)); + it.seekForward(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart()-1)); states.add(new ReferenceOrderedDataState(dataSource,it)); @@ -94,8 +94,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { if ( ! rodQueue.hasNext() ) return false; else { - GenomeLoc lastLocus = shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1); - return ! rodQueue.peekLocation().isPast(lastLocus); + return ! rodQueue.peekLocation().isPast(locus); } } @@ -160,12 +159,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { */ private long getSkippedBases( GenomeLoc currentPos ) { // the minus - is because if lastLoc == null, you haven't yet seen anything in this interval, so it should also be counted as skipped - Long compStop = lastLoc == null ? shard.getGenomeLocs().get(0).getStart() - 1 : lastLoc.getStop(); + Long compStop = lastLoc == null ? locus.getStart() - 1 : lastLoc.getStop(); long skippedBases = currentPos.getStart() - compStop - 1; if ( skippedBases < -1 ) { // minus 1 value is ok throw new RuntimeException(String.format("BUG: skipped bases=%d is < 0: cur=%s vs. last=%s, shard=%s", - skippedBases, currentPos, lastLoc, shard.getGenomeLocs())); + skippedBases, currentPos, lastLoc, locus)); } return Math.max(skippedBases, 0); } @@ -175,8 +174,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { * @return */ public GenomeLoc getLocOneBeyondShard() { - GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null; - return GenomeLocParser.createGenomeLoc(lastLocus.getContigIndex(),lastLocus.getStop()+1); + return GenomeLocParser.createGenomeLoc(locus.getContigIndex(),locus.getStop()+1); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java index 4c0ce57e5..06e627436 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java @@ -1,18 +1,15 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.iterators.NullSAMIterator; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLoc; import java.util.ArrayList; import java.util.List; import java.util.Collection; -import java.io.File; /** * User: hanna * Date: May 8, 2009 @@ -41,6 +38,11 @@ public class ShardDataProvider { */ private final Shard shard; + /** + * The particular locus for which data is provided. Should be contained within shard.getGenomeLocs(). + */ + private final GenomeLoc locus; + /** * The raw collection of reads. */ @@ -64,6 +66,14 @@ public class ShardDataProvider { return shard; } + /** + * Gets the locus associated with this shard data provider. + * @return The locus. + */ + public GenomeLoc getLocus() { + return locus; + } + /** * Can this data source provide reads? * @return True if reads are available, false otherwise. @@ -111,10 +121,10 @@ public class ShardDataProvider { * @param reads A window into the reads for a given region. * @param reference A getter for a section of the reference. */ - public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods) { + public ShardDataProvider(Shard shard,GenomeLoc locus,StingSAMIterator reads,IndexedFastaSequenceFile reference,Collection rods) { this.shard = shard; - // Provide basic reads information. - this.reads = (reads != null) ? reads.seek( shard ) : new NullSAMIterator(new Reads(new ArrayList())); + this.locus = locus; + this.reads = reads; this.reference = reference; this.referenceOrderedData = rods; } @@ -124,11 +134,8 @@ public class ShardDataProvider { * @param shard the shard * @param reads reads iterator. */ - ShardDataProvider( Shard shard, StingSAMIterator reads ) { - this.shard = shard; - this.reads = reads; - this.reference = null; - this.referenceOrderedData = null; + ShardDataProvider(Shard shard,GenomeLoc locus,StingSAMIterator reads) { + this(shard,locus,reads,null,null); } /** @@ -171,6 +178,12 @@ public class ShardDataProvider { // to views, which can in turn hold state. registeredViews.clear(); - reads.close(); + if(reads != null) + reads.close(); + } + + @Override + public String toString() { + return shard.toString(); } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java index a922e8aeb..34fabdd1d 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java @@ -217,7 +217,7 @@ public class IndexDrivenSAMDataSource extends SAMDataSource { throw new StingException("This SAMDataSource does not support multiple intervals within a single shard"); GenomeLoc shardGenomeLoc = shard.getGenomeLocs().get(0); - return createIterator( new MappedStreamSegment(Collections.singletonList(shardGenomeLoc)) ); + return createIterator( new MappedStreamSegment(shardGenomeLoc) ); } /** @@ -243,11 +243,11 @@ public class IndexDrivenSAMDataSource extends SAMDataSource { if (!intoUnmappedReads) { if (lastReadPos == null) { lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE); - iter = createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos))); + iter = createIterator(new MappedStreamSegment(lastReadPos)); return InitialReadIterator(readShard.getSize(), iter); } else { lastReadPos = GenomeLocParser.setStop(lastReadPos,-1); - iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos))))); + iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos)))); } if (intoUnmappedReads && !includeUnmappedReads) @@ -347,7 +347,7 @@ public class IndexDrivenSAMDataSource extends SAMDataSource { readsTaken = readCount; readsSeenAtLastPos = 0; lastReadPos = GenomeLocParser.setStop(lastReadPos,-1); - CloseableIterator ret = createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos))); + CloseableIterator ret = createIterator(new MappedStreamSegment(lastReadPos)); return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount); } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java index 269d8ae84..b60bdd7f9 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java @@ -146,10 +146,9 @@ class MappedReadStreamPointer extends ReadStreamPointer { // The getStop() + 1 is a hack to work around an old bug in the way Picard created SAM files where queries // over a given interval would occasionally not pick up the last read in that interval. - GenomeLoc bounds = mappedSegment.getBounds(); - mergingIterator.queryOverlapping( bounds.getContig(), - (int)bounds.getStart(), - (int)bounds.getStop()+ PlusOneFixIterator.PLUS_ONE_FIX_CONSTANT); + mergingIterator.queryOverlapping( mappedSegment.locus.getContig(), + (int)mappedSegment.locus.getStart(), + (int)mappedSegment.locus.getStop()+ PlusOneFixIterator.PLUS_ONE_FIX_CONSTANT); return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator); } @@ -165,10 +164,9 @@ class MappedReadStreamPointer extends ReadStreamPointer { MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo ); // NOTE: explicitly not using the queryOverlapping hack above since, according to the above criteria, // we'd only miss reads that are one base long when performing a contained query. - GenomeLoc bounds = mappedSegment.getBounds(); - mergingIterator.queryContained( bounds.getContig(), - (int)bounds.getStart(), - (int)bounds.getStop()+1); + mergingIterator.queryContained( mappedSegment.locus.getContig(), + (int)mappedSegment.locus.getStart(), + (int)mappedSegment.locus.getStop()+1); return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index 9204d92cc..8cab5ffc1 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -67,7 +67,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * @return Iterator through the data. */ public Iterator seek( Shard shard ) { - DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs()) : new EntireStream(); + DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream(); SeekableRODIterator iterator = iteratorPool.iterator(dataStreamSegment); return iterator; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java index 7ab281f9e..91d92a7b2 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java @@ -170,39 +170,18 @@ class EntireStream implements DataStreamSegment { * Models a mapped position within a stream of GATK input data. */ class MappedStreamSegment implements DataStreamSegment { - public final List loci; + public final GenomeLoc locus; /** * Retrieves the first location covered by a mapped stream segment. * @return Location of the first base in this segment. */ public GenomeLoc getFirstLocation() { - GenomeLoc firstLocus = loci.get(0); - return GenomeLocParser.createGenomeLoc(firstLocus.getContigIndex(),firstLocus.getStart()); + return GenomeLocParser.createGenomeLoc(locus.getContigIndex(),locus.getStart()); } - /** - * Get the total range of the given mapped stream segment. - * @return A GenomeLoc consisting of the first base of the first locus to the last base of the last locus, inclusive. - */ - public GenomeLoc getBounds() { - GenomeLoc firstLocus = loci.get(0); - GenomeLoc lastLocus = loci.get(loci.size()-1); - return GenomeLocParser.createGenomeLoc(getFirstLocation().getContigIndex(),firstLocus.getStart(),lastLocus.getStop()); - } - - public MappedStreamSegment( List loci ) { - // Validate that the list of loci is non-empty. - if(loci.size() == 0) - throw new StingException("Cannot map to a locus of length 0."); - - // Validate that all loci in the given list are from the same contig. - int contigIndex = loci.get(0).getContigIndex(); - for(GenomeLoc locus: loci) { - if(contigIndex != locus.getContigIndex()) - throw new StingException("All loci in a MappedStreamSegment must be on the same contig."); - } - this.loci = loci; + public MappedStreamSegment(GenomeLoc locus) { + this.locus = locus; } } diff --git a/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java b/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java index 57dbe64bf..8022cbcce 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java @@ -2,11 +2,15 @@ package org.broadinstitute.sting.gatk.executive; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; import java.util.ArrayList; import java.util.List; +import java.util.Iterator; /** * User: hanna * Date: May 18, 2009 @@ -43,9 +47,9 @@ public abstract class Accumulator { * @param walker Walker for which to build an accumulator. * @return Accumulator suitable for this walker.s */ - public static Accumulator create( Walker walker ) { + public static Accumulator create( GenomeAnalysisEngine engine, Walker walker ) { if( walker.isReduceByInterval() ) - return new IntervalAccumulator( walker ); + return new IntervalAccumulator( walker, engine.getIntervals() ); else return new StandardAccumulator( walker ); } @@ -61,7 +65,7 @@ public abstract class Accumulator { * @param result Result of the most recent accumulation. * @return the newest accumulation of the given data. */ - public abstract void accumulate( Shard shard, Object result ); + public abstract void accumulate( ShardDataProvider provider, Object result ); /** * Finishes off the traversal. Submits accumulated results to @@ -103,7 +107,7 @@ public abstract class Accumulator { * The result of the accumulator in a non-intervals walker * already takes the accumulation into account. return the result. */ - public void accumulate( Shard shard, Object result ) { this.accumulator = result; } + public void accumulate( ShardDataProvider provider, Object result ) { this.accumulator = result; } /** * The result of the traversal is the list of accumulated intervals. @@ -119,25 +123,51 @@ public abstract class Accumulator { * and aggregates those results into a single list. */ private static class IntervalAccumulator extends Accumulator { - private List> intervalAccumulator = new ArrayList>(); + /** + * An iterator through all intervals in the series. + */ + private final Iterator intervalIterator; - protected IntervalAccumulator( Walker walker ) { + /** + * For which interval is the accumulator currently accumulating? + */ + private GenomeLoc currentInterval = null; + + /** + * The actual mapping of interval to accumulator. + */ + private final List> intervalAccumulator = new ArrayList>(); + + private Object nextReduceInit = null; + + protected IntervalAccumulator(Walker walker, GenomeLocSortedSet intervals) { super(walker); + this.intervalIterator = intervals.iterator(); + if(intervalIterator.hasNext()) currentInterval = intervalIterator.next(); + nextReduceInit = walker.reduceInit(); } /** * Interval accumulator always feeds reduceInit into every new traversal. */ - public Object getReduceInit() { return walker.reduceInit(); } + public Object getReduceInit() { return nextReduceInit; } /** * Create a holder for interval results if none exists. Add the result to the holder. */ - public void accumulate( Shard shard, Object result ) { - // TODO: The following code is actually wrong we'll be doubly assigning results to locations. - // Fix before the new sharding system comes online. - for(GenomeLoc genomeLoc: shard.getGenomeLocs()) - intervalAccumulator.add( new Pair( genomeLoc, result ) ); + public void accumulate( ShardDataProvider provider, Object result ) { + GenomeLoc location = provider.getLocus(); + + // Pull the interval iterator ahead to the interval overlapping this shard fragment. + while((currentInterval == null || currentInterval.isBefore(location)) && intervalIterator.hasNext()) + currentInterval = intervalIterator.next(); + + if(currentInterval != null && currentInterval.getContig().equals(location.getContig()) && currentInterval.getStop() == location.getStop()) { + intervalAccumulator.add(new Pair(currentInterval,result)); + nextReduceInit = walker.reduceInit(); + } + else + nextReduceInit = result; } /** diff --git a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index bd6e2a2a9..8ee821ae7 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -6,8 +6,11 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.io.*; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor; @@ -73,12 +76,13 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar /** * Create a new hierarchical microscheduler to process the given reads and reference. * + * @param walker the walker used to process the dataset. * @param reads Reads file(s) to process. * @param reference Reference for driving the traversal. * @param nThreadsToUse maximum number of threads to use to do the work */ - protected HierarchicalMicroScheduler( Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods, int nThreadsToUse ) { - super(walker, reads, reference, rods); + protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods, int nThreadsToUse ) { + super(engine, walker, reads, reference, rods); this.threadPool = Executors.newFixedThreadPool(nThreadsToUse); try { @@ -282,8 +286,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar ShardTraverser traverser = new ShardTraverser(this, traversalEngine, walker, - shard, - getShardDataProvider(shard), + new ShardDataProvider(shard,shard.getGenomeLocs().get(0),getReadIterator(shard),reference,rods), outputTracker); Future traverseResult = threadPool.submit(traverser); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index d96d1b745..4957b914d 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -3,12 +3,15 @@ package org.broadinstitute.sting.gatk.executive; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; +import org.broadinstitute.sting.gatk.datasources.shards.ReadShard; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.io.DirectOutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; import java.util.Collection; @@ -28,8 +31,8 @@ public class LinearMicroScheduler extends MicroScheduler { * @param reference Reference for driving the traversal. * @param rods Reference-ordered data. */ - protected LinearMicroScheduler( Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods ) { - super(walker, reads, reference, rods); + protected LinearMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods ) { + super(engine, walker, reads, reference, rods); } /** @@ -44,15 +47,27 @@ public class LinearMicroScheduler extends MicroScheduler { traversalEngine.setMaximumIterations(maxIterations); walker.initialize(); - Accumulator accumulator = Accumulator.create(walker); + Accumulator accumulator = Accumulator.create(engine,walker); for (Shard shard : shardStrategy) { - ShardDataProvider dataProvider = getShardDataProvider( shard ); - - Object result = traversalEngine.traverse(walker, shard, dataProvider, accumulator.getReduceInit()); - accumulator.accumulate( shard, result ); - - dataProvider.close(); + // New experimental code for managing locus intervals. + // TODO: we'll need a similar but slightly different strategy for dealing with read intervals, so generalize this code. + if(shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) { + WindowMaker windowMaker = new WindowMaker(getReadIterator(shard),shard.getGenomeLocs()); + for(WindowMaker.WindowMakerIterator iterator: windowMaker) { + ShardDataProvider dataProvider = new ShardDataProvider(shard,iterator.getLocus(),iterator,reference,rods); + Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); + accumulator.accumulate(dataProvider,result); + dataProvider.close(); + } + windowMaker.close(); + } + else { + ShardDataProvider dataProvider = new ShardDataProvider(shard,null,getReadIterator(shard),reference,rods); + Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); + accumulator.accumulate(dataProvider,result); + dataProvider.close(); + } } Object result = accumulator.finishTraversal(); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 27654640f..7e3b0d64c 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.executive; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; @@ -34,9 +33,15 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.io.OutputTracker; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.iterators.NullSAMIterator; +import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; import java.util.*; +import java.io.File; /** @@ -51,11 +56,16 @@ import java.util.*; public abstract class MicroScheduler { protected static Logger logger = Logger.getLogger(MicroScheduler.class); + /** + * The engine invoking this scheduler. + */ + protected final GenomeAnalysisEngine engine; + protected final TraversalEngine traversalEngine; protected final IndexedFastaSequenceFile reference; private final SAMDataSource reads; - private final Collection rods; + protected final Collection rods; /** * MicroScheduler factory function. Create a microscheduler appropriate for reducing the @@ -69,13 +79,13 @@ public abstract class MicroScheduler { * * @return The best-fit microscheduler. */ - public static MicroScheduler create(Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods, int nThreadsToUse) { + public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods, int nThreadsToUse) { if (walker instanceof TreeReducible && nThreadsToUse > 1) { logger.info("Creating hierarchical microscheduler"); - return new HierarchicalMicroScheduler(walker, reads, reference, rods, nThreadsToUse); + return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, nThreadsToUse); } else { logger.info("Creating linear microscheduler"); - return new LinearMicroScheduler(walker, reads, reference, rods); + return new LinearMicroScheduler(engine, walker, reads, reference, rods); } } @@ -87,7 +97,8 @@ public abstract class MicroScheduler { * @param reference The reference. * @param rods the rods to include in the traversal */ - protected MicroScheduler(Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods) { + protected MicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods) { + this.engine = engine; this.reads = reads; this.reference = reference; this.rods = rods; @@ -123,16 +134,13 @@ public abstract class MicroScheduler { */ public abstract OutputTracker getOutputTracker(); - /** - * Gets an window into all the data that can be viewed as a single shard. - * - * @param shard The section of data to view. - * - * @return An accessor for all the data in this shard. + * Gets the an iterator over the given reads, which will iterate over the reads in the given shard. + * @param shard the shard to use when querying reads. + * @return an iterator over the reads specified in the shard. */ - protected ShardDataProvider getShardDataProvider(Shard shard) { - return new ShardDataProvider(shard, reads, reference, rods); + protected StingSAMIterator getReadIterator(Shard shard) { + return (reads != null) ? reads.seek(shard) : new NullSAMIterator(new Reads(new ArrayList())); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index 8eada9df1..abaf00ff6 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -27,7 +27,6 @@ public class ShardTraverser implements Callable { private HierarchicalMicroScheduler microScheduler; private Walker walker; private TraversalEngine traversalEngine; - private Shard shard; private ShardDataProvider dataProvider; private ThreadLocalOutputTracker outputTracker; private OutputMergeTask outputMergeTask; @@ -40,13 +39,11 @@ public class ShardTraverser implements Callable { public ShardTraverser( HierarchicalMicroScheduler microScheduler, TraversalEngine traversalEngine, Walker walker, - Shard shard, ShardDataProvider dataProvider, ThreadLocalOutputTracker outputTracker ) { this.microScheduler = microScheduler; this.walker = walker; this.traversalEngine = traversalEngine; - this.shard = shard; this.dataProvider = dataProvider; this.outputTracker = outputTracker; } @@ -56,7 +53,7 @@ public class ShardTraverser implements Callable { Object accumulator = walker.reduceInit(); try { - accumulator = traversalEngine.traverse( walker, shard, dataProvider, accumulator ); + accumulator = traversalEngine.traverse( walker, dataProvider, accumulator ); } finally { dataProvider.close(); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java new file mode 100644 index 000000000..f7400d3df --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -0,0 +1,116 @@ +package org.broadinstitute.sting.gatk.executive; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.Reads; + +import java.util.List; +import java.util.Queue; +import java.util.ArrayDeque; +import java.util.Iterator; + +import net.sf.samtools.SAMRecord; +import net.sf.picard.util.PeekableIterator; + +/** + * Buffer shards of data which may or may not contain multiple loci into + * iterators of all data which cover an interval. Its existence is an homage + * to Mark's stillborn WindowMaker, RIP 2009. + * + * @author mhanna + * @version 0.1 + */ +public class WindowMaker implements Iterable, Iterator { + /** + * The data source for reads. Will probably come directly from the BAM file. + */ + private final StingSAMIterator sourceIterator; + + /** + * Stores the sequence of intervals that the windowmaker should be tracking. + */ + private final PeekableIterator intervalIterator; + + /** + * Which reads should be saved to go into the next interval? + */ + private Queue overlappingReads = new ArrayDeque(); + + /** + * Create a new window maker with the given iterator as a data source, covering + * the given inteervals. + * @param iterator The data source for this window. + * @param intervals The set of intervals over which to traverse. + */ + public WindowMaker(StingSAMIterator iterator, List intervals) { + this.sourceIterator = iterator; + this.intervalIterator = new PeekableIterator(intervals.iterator()); + } + + public Iterator iterator() { + return this; + } + + public boolean hasNext() { + return intervalIterator.hasNext(); + } + + public WindowMakerIterator next() { + return new WindowMakerIterator(intervalIterator.next()); + } + + public void remove() { + throw new UnsupportedOperationException("Cannot remove from a window maker."); + } + + public void close() { + this.sourceIterator.close(); + } + + public class WindowMakerIterator implements StingSAMIterator { + /** + * The locus for which this iterator is currently returning reads. + */ + private final GenomeLoc locus; + + /** + * Which reads should be saved to go into the next interval? + */ + private final Queue pendingOverlaps = new ArrayDeque(); + + public WindowMakerIterator(GenomeLoc locus) { + this.locus = locus; + } + + public Reads getSourceInfo() { + return sourceIterator.getSourceInfo(); + } + + public GenomeLoc getLocus() { + return locus; + } + + public WindowMakerIterator iterator() { + return this; + } + + public boolean hasNext() { + return overlappingReads.size() > 0 || sourceIterator.hasNext(); + } + + public SAMRecord next() { + SAMRecord nextRead = overlappingReads.size() > 0 ? overlappingReads.remove() : sourceIterator.next(); + if(intervalIterator.hasNext() && nextRead.getAlignmentEnd() >= intervalIterator.peek().getStart()) + pendingOverlaps.add(nextRead); + return nextRead; + } + + public void close() { + overlappingReads = pendingOverlaps; + } + + public void remove() { + throw new UnsupportedOperationException("Unable to remove from a window maker iterator."); + } + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index ff526f267..68ba501fc 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -113,7 +113,6 @@ public abstract class TraversalEngine { * this method must be implemented by all traversal engines * * @param walker the walker to run with - * @param shard a shard of data * @param dataProvider the data provider that generates data given the shard * @param sum the accumulator * @param an object of the map type @@ -122,7 +121,6 @@ public abstract class TraversalEngine { * @return an object of the reduce type */ public abstract T traverse(Walker walker, - Shard shard, ShardDataProvider dataProvider, T sum); } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index 37d0999f6..2e2934d0e 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -195,7 +195,6 @@ public class TraverseDuplicates extends TraversalEngine { * Traverse by reads, given the data and the walker * * @param walker the walker to execute over - * @param shard the shard of data to feed the walker * @param sum of type T, the return from the walker * @param the generic type * @param the return type of the reduce function @@ -203,7 +202,6 @@ public class TraverseDuplicates extends TraversalEngine { * @return the result type T, the product of all the reduce calls */ public T traverse(Walker walker, - Shard shard, ShardDataProvider dataProvider, T sum) { // safety first :-) diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 049bb9880..b5a7a1aa1 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -38,10 +38,9 @@ public class TraverseLoci extends TraversalEngine { @Override public T traverse( Walker walker, - Shard shard, ShardDataProvider dataProvider, T sum ) { - logger.debug(String.format("TraverseLoci.traverse: Shard is %s", shard)); + logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider)); if ( !(walker instanceof LocusWalker) ) throw new IllegalArgumentException("Walker isn't a loci walker!"); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java index 1e1230ae4..cacca751c 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java @@ -28,7 +28,6 @@ public class TraverseLocusWindows extends TraversalEngine { private static final String LOCUS_WINDOW_STRING = "intervals"; public T traverse( Walker walker, - Shard shard, ShardDataProvider dataProvider, T sum ) { @@ -37,9 +36,7 @@ public class TraverseLocusWindows extends TraversalEngine { LocusWindowWalker locusWindowWalker = (LocusWindowWalker)walker; - if(shard.getGenomeLocs().size() > 1) - throw new StingException("This traversal does not support multiple intervals within a single shard"); - GenomeLoc interval = shard.getGenomeLocs().get(0); + GenomeLoc interval = dataProvider.getLocus(); ReadView readView = new ReadView( dataProvider ); LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider ); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index fa2a4ff0a..24c80a018 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -6,9 +6,6 @@ import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView; import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.shards.IntervalShard; -import org.broadinstitute.sting.gatk.datasources.shards.ReadShard; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Walker; @@ -59,7 +56,6 @@ public class TraverseReads extends TraversalEngine { * Traverse by reads, given the data and the walker * * @param walker the walker to traverse with - * @param shard the shard, specifying the range of data to iterate over * @param dataProvider the provider of the reads data * @param sum the value of type T, specified by the walker, to feed to the walkers reduce function * @param the map type of the walker @@ -67,11 +63,10 @@ public class TraverseReads extends TraversalEngine { * @return the reduce variable of the read walker */ public T traverse(Walker walker, - Shard shard, ShardDataProvider dataProvider, T sum) { - logger.debug(String.format("TraverseReads.traverse Covered dataset is %s", shard)); + logger.debug(String.format("TraverseReads.traverse Covered dataset is %s", dataProvider)); if (!(walker instanceof ReadWalker)) throw new IllegalArgumentException("Walker isn't a read walker!"); diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java index fcf44459d..3848ac5e3 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java @@ -75,7 +75,6 @@ public class ArtificialReadsTraversal extends TraversalEngine { * Traverse by reads, given the data and the walker * * @param walker the walker to traverse with - * @param shard the shard, specifying the range of data to iterate over * @param dataProvider the provider of the reads data * @param sum the value of type T, specified by the walker, to feed to the walkers reduce function * @param the map type of the walker @@ -84,7 +83,6 @@ public class ArtificialReadsTraversal extends TraversalEngine { * @return the reduce variable of the read walker */ public T traverse( Walker walker, - Shard shard, ShardDataProvider dataProvider, T sum ) { diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewTest.java index 569a48804..c161f444e 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewTest.java @@ -58,7 +58,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate { public void testOverlappingReferenceBases() { Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length()))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); char[] results = view.getReferenceBases(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length() + 9)); @@ -75,7 +75,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate { public void testBoundsFailure() { Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, 1, 50))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); view.getReferenceContext(GenomeLocParser.createGenomeLoc(0, 51)).getBase(); @@ -91,7 +91,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate { Shard shard = new LocusShard(Collections.singletonList(loc)); GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLocs()); - ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); while (shardIterator.hasNext()) { diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index ed2378e64..8cb9c16f3 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -46,7 +46,7 @@ public abstract class LocusViewTemplate extends BaseTest { GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); Shard shard = new LocusShard(Collections.singletonList(shardBounds)); - ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); LocusView view = createView(dataProvider); @@ -60,7 +60,7 @@ public abstract class LocusViewTemplate extends BaseTest { GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); Shard shard = new LocusShard(Collections.singletonList(shardBounds)); - ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); LocusView view = createView(dataProvider); @@ -73,7 +73,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -85,7 +85,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -97,7 +97,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -109,7 +109,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -121,7 +121,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -134,7 +134,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -151,7 +151,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -168,7 +168,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -187,7 +187,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -213,7 +213,7 @@ public abstract class LocusViewTemplate extends BaseTest { read07, read08, read09, read10, read11, read12); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15))); - ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); + ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewTest.java index e8d169429..f378f68cf 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewTest.java @@ -70,7 +70,7 @@ public class ReadReferenceViewTest extends ReferenceViewTemplate { final long contigStart = selectedContig.getSequenceLength() - (readLength - overlap - 1); final long contigStop = selectedContig.getSequenceLength() + overlap; - ShardDataProvider dataProvider = new ShardDataProvider(null,null,sequenceFile,null); + ShardDataProvider dataProvider = new ShardDataProvider(null,null,null,sequenceFile,null); ReadReferenceView view = new ReadReferenceView(dataProvider); SAMRecord rec = buildSAMRecord(selectedContig.getSequenceName(),(int)contigStart,(int)contigStop); @@ -97,7 +97,7 @@ public class ReadReferenceViewTest extends ReferenceViewTemplate { protected void validateLocation( GenomeLoc loc ) { SAMRecord read = buildSAMRecord( loc.getContig(), (int)loc.getStart(), (int)loc.getStop() ); - ShardDataProvider dataProvider = new ShardDataProvider(null,null,sequenceFile,null); + ShardDataProvider dataProvider = new ShardDataProvider(null,null,null,sequenceFile,null); ReadReferenceView view = new ReadReferenceView(dataProvider); ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(loc.getContig(),loc.getStart(),loc.getStop()); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java index 9bb0e4f20..931f23457 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java @@ -53,7 +53,7 @@ public class ReferenceOrderedViewTest extends BaseTest { @Test public void testNoBindings() { Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); - ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Collections.emptyList()); + ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Collections.emptyList()); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",10)); @@ -71,7 +71,7 @@ public class ReferenceOrderedViewTest extends BaseTest { Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); - ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Collections.singletonList(dataSource)); + ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource)); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20)); @@ -97,7 +97,7 @@ public class ReferenceOrderedViewTest extends BaseTest { Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); - ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Arrays.asList(dataSource1,dataSource2)); + ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2)); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20)); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderTest.java index 437912d1c..faf20c7c9 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderTest.java @@ -34,7 +34,7 @@ public class ShardDataProviderTest extends BaseTest { @Before public void createProvider() { - provider = new ShardDataProvider( null,null,null,null ); + provider = new ShardDataProvider( null,null,null,null,null ); } /** diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ArtificialResourcePool.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ArtificialResourcePool.java index 6cbc5b070..566c7c891 100644 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ArtificialResourcePool.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ArtificialResourcePool.java @@ -69,7 +69,7 @@ public class ArtificialResourcePool extends SAMResourcePool { if (segment instanceof MappedStreamSegment && iterator instanceof ArtificialSAMQueryIterator) { ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator; MappedStreamSegment mappedSegment = (MappedStreamSegment)segment; - GenomeLoc bounds = mappedSegment.getBounds(); + GenomeLoc bounds = mappedSegment.locus; if (!this.queryOverlapping) { queryIterator.queryContained(bounds.getContig(), (int)bounds.getStart(), (int)bounds.getStop()); } else { diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java index ab783a348..c4d51be7b 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java @@ -55,7 +55,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testCreateSingleIterator() { ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod); - SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) ); + SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); @@ -76,10 +76,10 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testCreateMultipleIterators() { ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); - SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) ); + SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); // Create a new iterator at position 2. - SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite2)) ); + SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); @@ -126,7 +126,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testIteratorConservation() { ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); - SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) ); + SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); @@ -140,7 +140,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { iteratorPool.release(iterator); // Create another iterator after the current iterator. - iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite3)) ); + iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); // Make sure that the previously acquired iterator was reused. Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); @@ -161,7 +161,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testIteratorCreation() { ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); - SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite3)) ); + SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); @@ -175,7 +175,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { iteratorPool.release(iterator); // Create another iterator after the current iterator. - iterator = iteratorPool.iterator(new MappedStreamSegment(Collections.singletonList(testSite1)) ); + iterator = iteratorPool.iterator(new MappedStreamSegment(testSite1) ); // Make sure that the previously acquired iterator was reused. Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java index e9d5f77c7..b9affd87a 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java @@ -132,8 +132,8 @@ public class TraverseReadsTest extends BaseTest { fail("Shard == null"); } - ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null,null); - accumulator = traversalEngine.traverse(countReadWalker, shard, dataProvider, accumulator); + ShardDataProvider dataProvider = new ShardDataProvider(shard,null,dataSource.seek(shard),null,null); + accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator); dataProvider.close(); } @@ -178,8 +178,8 @@ public class TraverseReadsTest extends BaseTest { fail("Shard == null"); } - ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null,null); - accumulator = traversalEngine.traverse(countReadWalker, shard, dataProvider, accumulator); + ShardDataProvider dataProvider = new ShardDataProvider(shard,null,dataSource.seek(shard),null,null); + accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator); dataProvider.close(); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java index 44c6eaac3..b74701c46 100644 --- a/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java @@ -67,7 +67,7 @@ public class PrintReadsWalkerTest extends BaseTest { public void testReadCount() { PrintReadsWalker walker = new PrintReadsWalker(); ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); - trav.traverse(walker, (Shard) null, (ShardDataProvider) null, writer); + trav.traverse(walker, null, writer); assertEquals(readTotal, writer.getRecords().size()); }