Reduce by interval alterations to interface with new sharding system. This checkin with be followed by a

simplification of some of the locus traversal code.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2886 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-02-25 00:16:50 +00:00
parent 2572c24935
commit 199b43fcf2
31 changed files with 323 additions and 185 deletions

View File

@ -88,6 +88,11 @@ public class GenomeAnalysisEngine {
*/
private Map<ArgumentSource, Object> inputs = new HashMap<ArgumentSource, Object>();
/**
* Collection of intervals used by the walker.
*/
private GenomeLocSortedSet intervals = null;
/**
* Collection of outputs used by the walker.
*/
@ -156,14 +161,13 @@ public class GenomeAnalysisEngine {
// create the output streams
initializeOutputStreams(my_walker, microScheduler.getOutputTracker());
GenomeLocSortedSet locs = null;
if (argCollection.intervals != null && argCollection.intervalMerging.check()) {
locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals));
intervals = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals));
}
ShardStrategy shardStrategy = getShardStrategy(my_walker,
microScheduler.getReference(),
locs,
intervals,
argCollection.maximumEngineIterations,
readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null);
@ -281,11 +285,11 @@ public class GenomeAnalysisEngine {
// we need to verify different parameter based on the walker type
if (my_walker instanceof LocusWalker || my_walker instanceof LocusWindowWalker) {
// create the MicroScheduler
microScheduler = MicroScheduler.create(my_walker, readsDataSource, referenceDataSource, rodDataSources, argCollection.numberOfThreads);
microScheduler = MicroScheduler.create(this,my_walker,readsDataSource,referenceDataSource,rodDataSources,argCollection.numberOfThreads);
} else if (my_walker instanceof ReadWalker || my_walker instanceof DuplicateWalker) {
if (argCollection.referenceFile == null)
Utils.scareUser(String.format("Read-based traversals require a reference file but none was given"));
microScheduler = MicroScheduler.create(my_walker, readsDataSource, referenceDataSource, rodDataSources, argCollection.numberOfThreads);
microScheduler = MicroScheduler.create(this,my_walker,readsDataSource,referenceDataSource,rodDataSources,argCollection.numberOfThreads);
} else {
Utils.scareUser(String.format("Unable to create the appropriate TraversalEngine for analysis type %s", walkerManager.getName(my_walker.getClass())));
}
@ -803,6 +807,14 @@ public class GenomeAnalysisEngine {
return this.argCollection;
}
/**
* Get the list of intervals passed to the engine.
* @return List of intervals.
*/
public GenomeLocSortedSet getIntervals() {
return this.intervals;
}
/**
* Gets the list of filters employed by this walker.
* @return Collection of filters (actual instances) used by this walker.

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
import java.util.NoSuchElementException;
import java.util.ArrayList;
import java.util.Collections;
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -46,7 +47,7 @@ public class AllLocusView extends LocusView {
public AllLocusView(ShardDataProvider provider) {
super( provider );
// Seed the state tracking members with the first possible seek position and the first possible locus context.
locusIterator = new GenomeLocusIterator( provider.getShard().getGenomeLocs() );
locusIterator = new GenomeLocusIterator( Collections.singletonList(provider.getLocus()) );
if( locusIterator.hasNext() ) {
nextPosition = locusIterator.next();
nextLocus = hasNextLocus() ? nextLocus() : createEmptyLocus(nextPosition);

View File

@ -134,19 +134,7 @@ public class LocusReferenceView extends ReferenceView {
}
private void initializeBounds(ShardDataProvider provider) {
List<GenomeLoc> loci = provider.getShard().getGenomeLocs();
if(loci.isEmpty()) {
bounds = null;
return;
}
GenomeLoc firstLocus = loci.get(0);
GenomeLoc lastLocus = loci.get(loci.size()-1);
if(firstLocus.getContigIndex() != lastLocus.getContigIndex())
throw new StingException("LocusReferenceView currently only supports multiple intervals on the same contig.");
bounds = GenomeLocParser.createGenomeLoc(firstLocus.getContig(),firstLocus.getStart(),lastLocus.getStop());
bounds = provider.getLocus();
}
/**

View File

@ -5,7 +5,6 @@ import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
@ -34,9 +33,9 @@ import java.util.NoSuchElementException;
public abstract class LocusView extends LocusIterator implements View {
/**
* The shard bounding this view.
* The locus bounding this view.
*/
protected Shard shard;
protected GenomeLoc locus;
/**
* Source info for this view. Informs the class about downsampling requirements.
@ -55,7 +54,7 @@ public abstract class LocusView extends LocusIterator implements View {
private AlignmentContext nextLocus = null;
public LocusView(ShardDataProvider provider) {
this.shard = provider.getShard();
this.locus = provider.getLocus();
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc());
this.sourceInfo = provider.getReadIterator().getSourceInfo();
@ -79,7 +78,7 @@ public abstract class LocusView extends LocusIterator implements View {
*/
public void close() {
// Set everything to null with the hope of failing fast.
shard = null;
locus = null;
sourceInfo = null;
loci = null;
@ -151,7 +150,7 @@ public abstract class LocusView extends LocusIterator implements View {
nextLocus = loci.next();
// If the location of this shard is available, trim the data stream to match the shard.
if(!shard.getGenomeLocs().isEmpty()) {
if(locus != null) {
// Iterate through any elements not contained within this shard.
while( nextLocus != null && !isContainedInShard(nextLocus.getLocation()) && loci.hasNext() )
nextLocus = loci.next();
@ -168,11 +167,7 @@ public abstract class LocusView extends LocusIterator implements View {
* @return True if the given location is contained within the shard. False otherwise.
*/
private boolean isContainedInShard(GenomeLoc location) {
for(GenomeLoc shardLocation: shard.getGenomeLocs()) {
if(shardLocation.containsP(location))
return true;
}
return false;
return locus.containsP(location);
}
/**

View File

@ -57,17 +57,17 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
public RodLocusView( ShardDataProvider provider ) {
super(provider);
GenomeLoc firstLoc = provider.getShard().getGenomeLocs().get(0);
GenomeLoc loc = provider.getLocus();
List< Iterator<List<ReferenceOrderedDatum>> > iterators = new LinkedList< Iterator<List<ReferenceOrderedDatum>> >();
for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) {
if ( DEBUG ) System.out.printf("Shard is %s%n", provider.getShard().getGenomeLocs());
if ( DEBUG ) System.out.printf("Shard is %s%n", provider.getLocus());
// grab the ROD iterator from the data source, and compute the first location in this shard, forwarding
// the iterator to immediately before it, so that it can be added to the merging iterator primed for
// next() to return the first real ROD in this shard
SeekableRODIterator it = (SeekableRODIterator)dataSource.seek(provider.getShard());
it.seekForward(GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(), firstLoc.getStart()-1));
it.seekForward(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart()-1));
states.add(new ReferenceOrderedDataState(dataSource,it));
@ -94,8 +94,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
if ( ! rodQueue.hasNext() )
return false;
else {
GenomeLoc lastLocus = shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1);
return ! rodQueue.peekLocation().isPast(lastLocus);
return ! rodQueue.peekLocation().isPast(locus);
}
}
@ -160,12 +159,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
*/
private long getSkippedBases( GenomeLoc currentPos ) {
// the minus - is because if lastLoc == null, you haven't yet seen anything in this interval, so it should also be counted as skipped
Long compStop = lastLoc == null ? shard.getGenomeLocs().get(0).getStart() - 1 : lastLoc.getStop();
Long compStop = lastLoc == null ? locus.getStart() - 1 : lastLoc.getStop();
long skippedBases = currentPos.getStart() - compStop - 1;
if ( skippedBases < -1 ) { // minus 1 value is ok
throw new RuntimeException(String.format("BUG: skipped bases=%d is < 0: cur=%s vs. last=%s, shard=%s",
skippedBases, currentPos, lastLoc, shard.getGenomeLocs()));
skippedBases, currentPos, lastLoc, locus));
}
return Math.max(skippedBases, 0);
}
@ -175,8 +174,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
* @return
*/
public GenomeLoc getLocOneBeyondShard() {
GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
return GenomeLocParser.createGenomeLoc(lastLocus.getContigIndex(),lastLocus.getStop()+1);
return GenomeLocParser.createGenomeLoc(locus.getContigIndex(),locus.getStop()+1);
}
/**

View File

@ -1,18 +1,15 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.ArrayList;
import java.util.List;
import java.util.Collection;
import java.io.File;
/**
* User: hanna
* Date: May 8, 2009
@ -41,6 +38,11 @@ public class ShardDataProvider {
*/
private final Shard shard;
/**
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
*/
private final GenomeLoc locus;
/**
* The raw collection of reads.
*/
@ -64,6 +66,14 @@ public class ShardDataProvider {
return shard;
}
/**
* Gets the locus associated with this shard data provider.
* @return The locus.
*/
public GenomeLoc getLocus() {
return locus;
}
/**
* Can this data source provide reads?
* @return True if reads are available, false otherwise.
@ -111,10 +121,10 @@ public class ShardDataProvider {
* @param reads A window into the reads for a given region.
* @param reference A getter for a section of the reference.
*/
public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
public ShardDataProvider(Shard shard,GenomeLoc locus,StingSAMIterator reads,IndexedFastaSequenceFile reference,Collection<ReferenceOrderedDataSource> rods) {
this.shard = shard;
// Provide basic reads information.
this.reads = (reads != null) ? reads.seek( shard ) : new NullSAMIterator(new Reads(new ArrayList<File>()));
this.locus = locus;
this.reads = reads;
this.reference = reference;
this.referenceOrderedData = rods;
}
@ -124,11 +134,8 @@ public class ShardDataProvider {
* @param shard the shard
* @param reads reads iterator.
*/
ShardDataProvider( Shard shard, StingSAMIterator reads ) {
this.shard = shard;
this.reads = reads;
this.reference = null;
this.referenceOrderedData = null;
ShardDataProvider(Shard shard,GenomeLoc locus,StingSAMIterator reads) {
this(shard,locus,reads,null,null);
}
/**
@ -171,6 +178,12 @@ public class ShardDataProvider {
// to views, which can in turn hold state.
registeredViews.clear();
reads.close();
if(reads != null)
reads.close();
}
@Override
public String toString() {
return shard.toString();
}
}

View File

@ -217,7 +217,7 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
throw new StingException("This SAMDataSource does not support multiple intervals within a single shard");
GenomeLoc shardGenomeLoc = shard.getGenomeLocs().get(0);
return createIterator( new MappedStreamSegment(Collections.singletonList(shardGenomeLoc)) );
return createIterator( new MappedStreamSegment(shardGenomeLoc) );
}
/**
@ -243,11 +243,11 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
if (!intoUnmappedReads) {
if (lastReadPos == null) {
lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
iter = createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos)));
iter = createIterator(new MappedStreamSegment(lastReadPos));
return InitialReadIterator(readShard.getSize(), iter);
} else {
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos)))));
iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos))));
}
if (intoUnmappedReads && !includeUnmappedReads)
@ -347,7 +347,7 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
readsTaken = readCount;
readsSeenAtLastPos = 0;
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
CloseableIterator<SAMRecord> ret = createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos)));
CloseableIterator<SAMRecord> ret = createIterator(new MappedStreamSegment(lastReadPos));
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount);
}
}

View File

@ -146,10 +146,9 @@ class MappedReadStreamPointer extends ReadStreamPointer {
// The getStop() + 1 is a hack to work around an old bug in the way Picard created SAM files where queries
// over a given interval would occasionally not pick up the last read in that interval.
GenomeLoc bounds = mappedSegment.getBounds();
mergingIterator.queryOverlapping( bounds.getContig(),
(int)bounds.getStart(),
(int)bounds.getStop()+ PlusOneFixIterator.PLUS_ONE_FIX_CONSTANT);
mergingIterator.queryOverlapping( mappedSegment.locus.getContig(),
(int)mappedSegment.locus.getStart(),
(int)mappedSegment.locus.getStop()+ PlusOneFixIterator.PLUS_ONE_FIX_CONSTANT);
return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator);
}
@ -165,10 +164,9 @@ class MappedReadStreamPointer extends ReadStreamPointer {
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
// NOTE: explicitly not using the queryOverlapping hack above since, according to the above criteria,
// we'd only miss reads that are one base long when performing a contained query.
GenomeLoc bounds = mappedSegment.getBounds();
mergingIterator.queryContained( bounds.getContig(),
(int)bounds.getStart(),
(int)bounds.getStop()+1);
mergingIterator.queryContained( mappedSegment.locus.getContig(),
(int)mappedSegment.locus.getStart(),
(int)mappedSegment.locus.getStop()+1);
return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator);
}

View File

@ -67,7 +67,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* @return Iterator through the data.
*/
public Iterator seek( Shard shard ) {
DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs()) : new EntireStream();
DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream();
SeekableRODIterator iterator = iteratorPool.iterator(dataStreamSegment);
return iterator;
}

View File

@ -170,39 +170,18 @@ class EntireStream implements DataStreamSegment {
* Models a mapped position within a stream of GATK input data.
*/
class MappedStreamSegment implements DataStreamSegment {
public final List<GenomeLoc> loci;
public final GenomeLoc locus;
/**
* Retrieves the first location covered by a mapped stream segment.
* @return Location of the first base in this segment.
*/
public GenomeLoc getFirstLocation() {
GenomeLoc firstLocus = loci.get(0);
return GenomeLocParser.createGenomeLoc(firstLocus.getContigIndex(),firstLocus.getStart());
return GenomeLocParser.createGenomeLoc(locus.getContigIndex(),locus.getStart());
}
/**
* Get the total range of the given mapped stream segment.
* @return A GenomeLoc consisting of the first base of the first locus to the last base of the last locus, inclusive.
*/
public GenomeLoc getBounds() {
GenomeLoc firstLocus = loci.get(0);
GenomeLoc lastLocus = loci.get(loci.size()-1);
return GenomeLocParser.createGenomeLoc(getFirstLocation().getContigIndex(),firstLocus.getStart(),lastLocus.getStop());
}
public MappedStreamSegment( List<GenomeLoc> loci ) {
// Validate that the list of loci is non-empty.
if(loci.size() == 0)
throw new StingException("Cannot map to a locus of length 0.");
// Validate that all loci in the given list are from the same contig.
int contigIndex = loci.get(0).getContigIndex();
for(GenomeLoc locus: loci) {
if(contigIndex != locus.getContigIndex())
throw new StingException("All loci in a MappedStreamSegment must be on the same contig.");
}
this.loci = loci;
public MappedStreamSegment(GenomeLoc locus) {
this.locus = locus;
}
}

View File

@ -2,11 +2,15 @@ package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.ArrayList;
import java.util.List;
import java.util.Iterator;
/**
* User: hanna
* Date: May 18, 2009
@ -43,9 +47,9 @@ public abstract class Accumulator {
* @param walker Walker for which to build an accumulator.
* @return Accumulator suitable for this walker.s
*/
public static Accumulator create( Walker walker ) {
public static Accumulator create( GenomeAnalysisEngine engine, Walker walker ) {
if( walker.isReduceByInterval() )
return new IntervalAccumulator( walker );
return new IntervalAccumulator( walker, engine.getIntervals() );
else
return new StandardAccumulator( walker );
}
@ -61,7 +65,7 @@ public abstract class Accumulator {
* @param result Result of the most recent accumulation.
* @return the newest accumulation of the given data.
*/
public abstract void accumulate( Shard shard, Object result );
public abstract void accumulate( ShardDataProvider provider, Object result );
/**
* Finishes off the traversal. Submits accumulated results to
@ -103,7 +107,7 @@ public abstract class Accumulator {
* The result of the accumulator in a non-intervals walker
* already takes the accumulation into account. return the result.
*/
public void accumulate( Shard shard, Object result ) { this.accumulator = result; }
public void accumulate( ShardDataProvider provider, Object result ) { this.accumulator = result; }
/**
* The result of the traversal is the list of accumulated intervals.
@ -119,25 +123,51 @@ public abstract class Accumulator {
* and aggregates those results into a single list.
*/
private static class IntervalAccumulator extends Accumulator {
private List<Pair<GenomeLoc,Object>> intervalAccumulator = new ArrayList<Pair<GenomeLoc,Object>>();
/**
* An iterator through all intervals in the series.
*/
private final Iterator<GenomeLoc> intervalIterator;
protected IntervalAccumulator( Walker walker ) {
/**
* For which interval is the accumulator currently accumulating?
*/
private GenomeLoc currentInterval = null;
/**
* The actual mapping of interval to accumulator.
*/
private final List<Pair<GenomeLoc,Object>> intervalAccumulator = new ArrayList<Pair<GenomeLoc,Object>>();
private Object nextReduceInit = null;
protected IntervalAccumulator(Walker walker, GenomeLocSortedSet intervals) {
super(walker);
this.intervalIterator = intervals.iterator();
if(intervalIterator.hasNext()) currentInterval = intervalIterator.next();
nextReduceInit = walker.reduceInit();
}
/**
* Interval accumulator always feeds reduceInit into every new traversal.
*/
public Object getReduceInit() { return walker.reduceInit(); }
public Object getReduceInit() { return nextReduceInit; }
/**
* Create a holder for interval results if none exists. Add the result to the holder.
*/
public void accumulate( Shard shard, Object result ) {
// TODO: The following code is actually wrong we'll be doubly assigning results to locations.
// Fix before the new sharding system comes online.
for(GenomeLoc genomeLoc: shard.getGenomeLocs())
intervalAccumulator.add( new Pair<GenomeLoc,Object>( genomeLoc, result ) );
public void accumulate( ShardDataProvider provider, Object result ) {
GenomeLoc location = provider.getLocus();
// Pull the interval iterator ahead to the interval overlapping this shard fragment.
while((currentInterval == null || currentInterval.isBefore(location)) && intervalIterator.hasNext())
currentInterval = intervalIterator.next();
if(currentInterval != null && currentInterval.getContig().equals(location.getContig()) && currentInterval.getStop() == location.getStop()) {
intervalAccumulator.add(new Pair<GenomeLoc,Object>(currentInterval,result));
nextReduceInit = walker.reduceInit();
}
else
nextReduceInit = result;
}
/**

View File

@ -6,8 +6,11 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.io.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
@ -73,12 +76,13 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
/**
* Create a new hierarchical microscheduler to process the given reads and reference.
*
* @param walker the walker used to process the dataset.
* @param reads Reads file(s) to process.
* @param reference Reference for driving the traversal.
* @param nThreadsToUse maximum number of threads to use to do the work
*/
protected HierarchicalMicroScheduler( Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
super(walker, reads, reference, rods);
protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
super(engine, walker, reads, reference, rods);
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
try {
@ -282,8 +286,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
ShardTraverser traverser = new ShardTraverser(this,
traversalEngine,
walker,
shard,
getShardDataProvider(shard),
new ShardDataProvider(shard,shard.getGenomeLocs().get(0),getReadIterator(shard),reference,rods),
outputTracker);
Future traverseResult = threadPool.submit(traverser);

View File

@ -3,12 +3,15 @@ package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.Collection;
@ -28,8 +31,8 @@ public class LinearMicroScheduler extends MicroScheduler {
* @param reference Reference for driving the traversal.
* @param rods Reference-ordered data.
*/
protected LinearMicroScheduler( Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods ) {
super(walker, reads, reference, rods);
protected LinearMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods ) {
super(engine, walker, reads, reference, rods);
}
/**
@ -44,15 +47,27 @@ public class LinearMicroScheduler extends MicroScheduler {
traversalEngine.setMaximumIterations(maxIterations);
walker.initialize();
Accumulator accumulator = Accumulator.create(walker);
Accumulator accumulator = Accumulator.create(engine,walker);
for (Shard shard : shardStrategy) {
ShardDataProvider dataProvider = getShardDataProvider( shard );
Object result = traversalEngine.traverse(walker, shard, dataProvider, accumulator.getReduceInit());
accumulator.accumulate( shard, result );
dataProvider.close();
// New experimental code for managing locus intervals.
// TODO: we'll need a similar but slightly different strategy for dealing with read intervals, so generalize this code.
if(shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) {
WindowMaker windowMaker = new WindowMaker(getReadIterator(shard),shard.getGenomeLocs());
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
ShardDataProvider dataProvider = new ShardDataProvider(shard,iterator.getLocus(),iterator,reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
accumulator.accumulate(dataProvider,result);
dataProvider.close();
}
windowMaker.close();
}
else {
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,getReadIterator(shard),reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
accumulator.accumulate(dataProvider,result);
dataProvider.close();
}
}
Object result = accumulator.finishTraversal();

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.executive;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
@ -34,9 +33,15 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource
import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.*;
import java.io.File;
/**
@ -51,11 +56,16 @@ import java.util.*;
public abstract class MicroScheduler {
protected static Logger logger = Logger.getLogger(MicroScheduler.class);
/**
* The engine invoking this scheduler.
*/
protected final GenomeAnalysisEngine engine;
protected final TraversalEngine traversalEngine;
protected final IndexedFastaSequenceFile reference;
private final SAMDataSource reads;
private final Collection<ReferenceOrderedDataSource> rods;
protected final Collection<ReferenceOrderedDataSource> rods;
/**
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
@ -69,13 +79,13 @@ public abstract class MicroScheduler {
*
* @return The best-fit microscheduler.
*/
public static MicroScheduler create(Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse) {
public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse) {
if (walker instanceof TreeReducible && nThreadsToUse > 1) {
logger.info("Creating hierarchical microscheduler");
return new HierarchicalMicroScheduler(walker, reads, reference, rods, nThreadsToUse);
return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, nThreadsToUse);
} else {
logger.info("Creating linear microscheduler");
return new LinearMicroScheduler(walker, reads, reference, rods);
return new LinearMicroScheduler(engine, walker, reads, reference, rods);
}
}
@ -87,7 +97,8 @@ public abstract class MicroScheduler {
* @param reference The reference.
* @param rods the rods to include in the traversal
*/
protected MicroScheduler(Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
protected MicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
this.engine = engine;
this.reads = reads;
this.reference = reference;
this.rods = rods;
@ -123,16 +134,13 @@ public abstract class MicroScheduler {
*/
public abstract OutputTracker getOutputTracker();
/**
* Gets an window into all the data that can be viewed as a single shard.
*
* @param shard The section of data to view.
*
* @return An accessor for all the data in this shard.
* Gets the an iterator over the given reads, which will iterate over the reads in the given shard.
* @param shard the shard to use when querying reads.
* @return an iterator over the reads specified in the shard.
*/
protected ShardDataProvider getShardDataProvider(Shard shard) {
return new ShardDataProvider(shard, reads, reference, rods);
protected StingSAMIterator getReadIterator(Shard shard) {
return (reads != null) ? reads.seek(shard) : new NullSAMIterator(new Reads(new ArrayList<File>()));
}
/**

View File

@ -27,7 +27,6 @@ public class ShardTraverser implements Callable {
private HierarchicalMicroScheduler microScheduler;
private Walker walker;
private TraversalEngine traversalEngine;
private Shard shard;
private ShardDataProvider dataProvider;
private ThreadLocalOutputTracker outputTracker;
private OutputMergeTask outputMergeTask;
@ -40,13 +39,11 @@ public class ShardTraverser implements Callable {
public ShardTraverser( HierarchicalMicroScheduler microScheduler,
TraversalEngine traversalEngine,
Walker walker,
Shard shard,
ShardDataProvider dataProvider,
ThreadLocalOutputTracker outputTracker ) {
this.microScheduler = microScheduler;
this.walker = walker;
this.traversalEngine = traversalEngine;
this.shard = shard;
this.dataProvider = dataProvider;
this.outputTracker = outputTracker;
}
@ -56,7 +53,7 @@ public class ShardTraverser implements Callable {
Object accumulator = walker.reduceInit();
try {
accumulator = traversalEngine.traverse( walker, shard, dataProvider, accumulator );
accumulator = traversalEngine.traverse( walker, dataProvider, accumulator );
}
finally {
dataProvider.close();

View File

@ -0,0 +1,116 @@
package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.Reads;
import java.util.List;
import java.util.Queue;
import java.util.ArrayDeque;
import java.util.Iterator;
import net.sf.samtools.SAMRecord;
import net.sf.picard.util.PeekableIterator;
/**
* Buffer shards of data which may or may not contain multiple loci into
* iterators of all data which cover an interval. Its existence is an homage
* to Mark's stillborn WindowMaker, RIP 2009.
*
* @author mhanna
* @version 0.1
*/
public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, Iterator<WindowMaker.WindowMakerIterator> {
/**
* The data source for reads. Will probably come directly from the BAM file.
*/
private final StingSAMIterator sourceIterator;
/**
* Stores the sequence of intervals that the windowmaker should be tracking.
*/
private final PeekableIterator<GenomeLoc> intervalIterator;
/**
* Which reads should be saved to go into the next interval?
*/
private Queue<SAMRecord> overlappingReads = new ArrayDeque<SAMRecord>();
/**
* Create a new window maker with the given iterator as a data source, covering
* the given inteervals.
* @param iterator The data source for this window.
* @param intervals The set of intervals over which to traverse.
*/
public WindowMaker(StingSAMIterator iterator, List<GenomeLoc> intervals) {
this.sourceIterator = iterator;
this.intervalIterator = new PeekableIterator<GenomeLoc>(intervals.iterator());
}
public Iterator<WindowMakerIterator> iterator() {
return this;
}
public boolean hasNext() {
return intervalIterator.hasNext();
}
public WindowMakerIterator next() {
return new WindowMakerIterator(intervalIterator.next());
}
public void remove() {
throw new UnsupportedOperationException("Cannot remove from a window maker.");
}
public void close() {
this.sourceIterator.close();
}
public class WindowMakerIterator implements StingSAMIterator {
/**
* The locus for which this iterator is currently returning reads.
*/
private final GenomeLoc locus;
/**
* Which reads should be saved to go into the next interval?
*/
private final Queue<SAMRecord> pendingOverlaps = new ArrayDeque<SAMRecord>();
public WindowMakerIterator(GenomeLoc locus) {
this.locus = locus;
}
public Reads getSourceInfo() {
return sourceIterator.getSourceInfo();
}
public GenomeLoc getLocus() {
return locus;
}
public WindowMakerIterator iterator() {
return this;
}
public boolean hasNext() {
return overlappingReads.size() > 0 || sourceIterator.hasNext();
}
public SAMRecord next() {
SAMRecord nextRead = overlappingReads.size() > 0 ? overlappingReads.remove() : sourceIterator.next();
if(intervalIterator.hasNext() && nextRead.getAlignmentEnd() >= intervalIterator.peek().getStart())
pendingOverlaps.add(nextRead);
return nextRead;
}
public void close() {
overlappingReads = pendingOverlaps;
}
public void remove() {
throw new UnsupportedOperationException("Unable to remove from a window maker iterator.");
}
}
}

View File

@ -113,7 +113,6 @@ public abstract class TraversalEngine {
* this method must be implemented by all traversal engines
*
* @param walker the walker to run with
* @param shard a shard of data
* @param dataProvider the data provider that generates data given the shard
* @param sum the accumulator
* @param <M> an object of the map type
@ -122,7 +121,6 @@ public abstract class TraversalEngine {
* @return an object of the reduce type
*/
public abstract <M, T> T traverse(Walker<M, T> walker,
Shard shard,
ShardDataProvider dataProvider,
T sum);
}

View File

@ -195,7 +195,6 @@ public class TraverseDuplicates extends TraversalEngine {
* Traverse by reads, given the data and the walker
*
* @param walker the walker to execute over
* @param shard the shard of data to feed the walker
* @param sum of type T, the return from the walker
* @param <M> the generic type
* @param <T> the return type of the reduce function
@ -203,7 +202,6 @@ public class TraverseDuplicates extends TraversalEngine {
* @return the result type T, the product of all the reduce calls
*/
public <M, T> T traverse(Walker<M, T> walker,
Shard shard,
ShardDataProvider dataProvider,
T sum) {
// safety first :-)

View File

@ -38,10 +38,9 @@ public class TraverseLoci extends TraversalEngine {
@Override
public <M,T> T traverse( Walker<M,T> walker,
Shard shard,
ShardDataProvider dataProvider,
T sum ) {
logger.debug(String.format("TraverseLoci.traverse: Shard is %s", shard));
logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
if ( !(walker instanceof LocusWalker) )
throw new IllegalArgumentException("Walker isn't a loci walker!");

View File

@ -28,7 +28,6 @@ public class TraverseLocusWindows extends TraversalEngine {
private static final String LOCUS_WINDOW_STRING = "intervals";
public <M,T> T traverse( Walker<M,T> walker,
Shard shard,
ShardDataProvider dataProvider,
T sum ) {
@ -37,9 +36,7 @@ public class TraverseLocusWindows extends TraversalEngine {
LocusWindowWalker<M, T> locusWindowWalker = (LocusWindowWalker<M, T>)walker;
if(shard.getGenomeLocs().size() > 1)
throw new StingException("This traversal does not support multiple intervals within a single shard");
GenomeLoc interval = shard.getGenomeLocs().get(0);
GenomeLoc interval = dataProvider.getLocus();
ReadView readView = new ReadView( dataProvider );
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );

View File

@ -6,9 +6,6 @@ import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.IntervalShard;
import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
@ -59,7 +56,6 @@ public class TraverseReads extends TraversalEngine {
* Traverse by reads, given the data and the walker
*
* @param walker the walker to traverse with
* @param shard the shard, specifying the range of data to iterate over
* @param dataProvider the provider of the reads data
* @param sum the value of type T, specified by the walker, to feed to the walkers reduce function
* @param <M> the map type of the walker
@ -67,11 +63,10 @@ public class TraverseReads extends TraversalEngine {
* @return the reduce variable of the read walker
*/
public <M, T> T traverse(Walker<M, T> walker,
Shard shard,
ShardDataProvider dataProvider,
T sum) {
logger.debug(String.format("TraverseReads.traverse Covered dataset is %s", shard));
logger.debug(String.format("TraverseReads.traverse Covered dataset is %s", dataProvider));
if (!(walker instanceof ReadWalker))
throw new IllegalArgumentException("Walker isn't a read walker!");

View File

@ -75,7 +75,6 @@ public class ArtificialReadsTraversal extends TraversalEngine {
* Traverse by reads, given the data and the walker
*
* @param walker the walker to traverse with
* @param shard the shard, specifying the range of data to iterate over
* @param dataProvider the provider of the reads data
* @param sum the value of type T, specified by the walker, to feed to the walkers reduce function
* @param <M> the map type of the walker
@ -84,7 +83,6 @@ public class ArtificialReadsTraversal extends TraversalEngine {
* @return the reduce variable of the read walker
*/
public <M, T> T traverse( Walker<M, T> walker,
Shard shard,
ShardDataProvider dataProvider,
T sum ) {

View File

@ -58,7 +58,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
public void testOverlappingReferenceBases() {
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length())));
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, sequenceFile, null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
char[] results = view.getReferenceBases(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length() + 9));
@ -75,7 +75,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
public void testBoundsFailure() {
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, 1, 50)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, sequenceFile, null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
view.getReferenceContext(GenomeLocParser.createGenomeLoc(0, 51)).getBase();
@ -91,7 +91,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
Shard shard = new LocusShard(Collections.singletonList(loc));
GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLocs());
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, sequenceFile, null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
while (shardIterator.hasNext()) {

View File

@ -46,7 +46,7 @@ public abstract class LocusViewTemplate extends BaseTest {
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new LocusShard(Collections.singletonList(shardBounds));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
LocusView view = createView(dataProvider);
@ -60,7 +60,7 @@ public abstract class LocusViewTemplate extends BaseTest {
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new LocusShard(Collections.singletonList(shardBounds));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
LocusView view = createView(dataProvider);
@ -73,7 +73,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
@ -85,7 +85,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
@ -97,7 +97,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
@ -109,7 +109,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
@ -121,7 +121,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
@ -134,7 +134,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
@ -151,7 +151,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
@ -168,7 +168,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
@ -187,7 +187,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
@ -213,7 +213,7 @@ public abstract class LocusViewTemplate extends BaseTest {
read07, read08, read09, read10, read11, read12);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();

View File

@ -70,7 +70,7 @@ public class ReadReferenceViewTest extends ReferenceViewTemplate {
final long contigStart = selectedContig.getSequenceLength() - (readLength - overlap - 1);
final long contigStop = selectedContig.getSequenceLength() + overlap;
ShardDataProvider dataProvider = new ShardDataProvider(null,null,sequenceFile,null);
ShardDataProvider dataProvider = new ShardDataProvider(null,null,null,sequenceFile,null);
ReadReferenceView view = new ReadReferenceView(dataProvider);
SAMRecord rec = buildSAMRecord(selectedContig.getSequenceName(),(int)contigStart,(int)contigStop);
@ -97,7 +97,7 @@ public class ReadReferenceViewTest extends ReferenceViewTemplate {
protected void validateLocation( GenomeLoc loc ) {
SAMRecord read = buildSAMRecord( loc.getContig(), (int)loc.getStart(), (int)loc.getStop() );
ShardDataProvider dataProvider = new ShardDataProvider(null,null,sequenceFile,null);
ShardDataProvider dataProvider = new ShardDataProvider(null,null,null,sequenceFile,null);
ReadReferenceView view = new ReadReferenceView(dataProvider);
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(loc.getContig(),loc.getStart(),loc.getStop());

View File

@ -53,7 +53,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
@Test
public void testNoBindings() {
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Collections.<ReferenceOrderedDataSource>emptyList());
ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Collections.<ReferenceOrderedDataSource>emptyList());
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",10));
@ -71,7 +71,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Collections.singletonList(dataSource));
ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource));
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20));
@ -97,7 +97,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Arrays.asList(dataSource1,dataSource2));
ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2));
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20));

View File

@ -34,7 +34,7 @@ public class ShardDataProviderTest extends BaseTest {
@Before
public void createProvider() {
provider = new ShardDataProvider( null,null,null,null );
provider = new ShardDataProvider( null,null,null,null,null );
}
/**

View File

@ -69,7 +69,7 @@ public class ArtificialResourcePool extends SAMResourcePool {
if (segment instanceof MappedStreamSegment && iterator instanceof ArtificialSAMQueryIterator) {
ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator;
MappedStreamSegment mappedSegment = (MappedStreamSegment)segment;
GenomeLoc bounds = mappedSegment.getBounds();
GenomeLoc bounds = mappedSegment.locus;
if (!this.queryOverlapping) {
queryIterator.queryContained(bounds.getContig(), (int)bounds.getStart(), (int)bounds.getStop());
} else {

View File

@ -55,7 +55,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testCreateSingleIterator() {
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod);
SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) );
SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -76,10 +76,10 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testCreateMultipleIterators() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) );
SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
// Create a new iterator at position 2.
SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite2)) );
SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -126,7 +126,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testIteratorConservation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) );
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -140,7 +140,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
iteratorPool.release(iterator);
// Create another iterator after the current iterator.
iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite3)) );
iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
// Make sure that the previously acquired iterator was reused.
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
@ -161,7 +161,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testIteratorCreation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite3)) );
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -175,7 +175,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
iteratorPool.release(iterator);
// Create another iterator after the current iterator.
iterator = iteratorPool.iterator(new MappedStreamSegment(Collections.singletonList(testSite1)) );
iterator = iteratorPool.iterator(new MappedStreamSegment(testSite1) );
// Make sure that the previously acquired iterator was reused.
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());

View File

@ -132,8 +132,8 @@ public class TraverseReadsTest extends BaseTest {
fail("Shard == null");
}
ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null,null);
accumulator = traversalEngine.traverse(countReadWalker, shard, dataProvider, accumulator);
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,dataSource.seek(shard),null,null);
accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator);
dataProvider.close();
}
@ -178,8 +178,8 @@ public class TraverseReadsTest extends BaseTest {
fail("Shard == null");
}
ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null,null);
accumulator = traversalEngine.traverse(countReadWalker, shard, dataProvider, accumulator);
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,dataSource.seek(shard),null,null);
accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator);
dataProvider.close();
}

View File

@ -67,7 +67,7 @@ public class PrintReadsWalkerTest extends BaseTest {
public void testReadCount() {
PrintReadsWalker walker = new PrintReadsWalker();
ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
trav.traverse(walker, (Shard) null, (ShardDataProvider) null, writer);
trav.traverse(walker, null, writer);
assertEquals(readTotal, writer.getRecords().size());
}