Rework the way the MicroScheduler handles locus shards to handle intervals that span shards

with less memory consumption.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2981 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-03-11 18:40:31 +00:00
parent 4a05757a2a
commit a7ba88e649
30 changed files with 389 additions and 301 deletions

View File

@ -44,7 +44,7 @@ public class AllLocusView extends LocusView {
* Create a new queue of locus contexts.
* @param provider
*/
public AllLocusView(ShardDataProvider provider) {
public AllLocusView(LocusShardDataProvider provider) {
super( provider );
// Seed the state tracking members with the first possible seek position and the first possible locus context.
locusIterator = new GenomeLocusIterator(provider.getLocus());

View File

@ -27,7 +27,7 @@ public class CoveredLocusView extends LocusView {
* Create a new queue of locus contexts.
* @param provider
*/
public CoveredLocusView(ShardDataProvider provider) {
public CoveredLocusView(LocusShardDataProvider provider) {
super(provider);
}

View File

@ -69,7 +69,7 @@ public class LocusReferenceView extends ReferenceView {
* the walkers, etc.
* @param provider source for locus data.
*/
public LocusReferenceView( ShardDataProvider provider ) {
public LocusReferenceView( LocusShardDataProvider provider ) {
super(provider);
initializeBounds(provider);
windowStart = windowStop = 0;
@ -80,7 +80,7 @@ public class LocusReferenceView extends ReferenceView {
* Create a new locus reference view.
* @param provider source for locus data.
*/
public LocusReferenceView( Walker walker, ShardDataProvider provider ) {
public LocusReferenceView( Walker walker, LocusShardDataProvider provider ) {
super( provider );
initializeBounds(provider);
@ -137,7 +137,7 @@ public class LocusReferenceView extends ReferenceView {
* Initialize the bounds of this shard, trimming the bounds so that they match the reference.
* @param provider Provider covering the appropriate locus.
*/
private void initializeBounds(ShardDataProvider provider) {
private void initializeBounds(LocusShardDataProvider provider) {
if(provider.getLocus() != null) {
long sequenceLength = reference.getSequenceDictionary().getSequence(provider.getLocus().getContig()).getSequenceLength();
bounds = GenomeLocParser.createGenomeLoc(provider.getLocus().getContig(),

View File

@ -0,0 +1,74 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.Reads;
import java.util.Collection;
/**
* Presents data sharded by locus to the traversal engine.
*
* @author mhanna
* @version 0.1
*/
public class LocusShardDataProvider extends ShardDataProvider {
/**
* Information about the source of the read data.
*/
private final Reads sourceInfo;
/**
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
*/
private final GenomeLoc locus;
/**
* The raw collection of reads.
*/
private final LocusIterator locusIterator;
/**
* Create a data provider for the shard given the reads and reference.
* @param shard The chunk of data over which traversals happen.
* @param reference A getter for a section of the reference.
*/
public LocusShardDataProvider(Shard shard, Reads sourceInfo, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
super(shard,reference,rods);
this.sourceInfo = sourceInfo;
this.locus = locus;
this.locusIterator = locusIterator;
}
/**
* Returns information about the source of the reads.
* @return Info about the source of the reads.
*/
public Reads getSourceInfo() {
return sourceInfo;
}
/**
* Gets the locus associated with this shard data provider.
* @return The locus.
*/
public GenomeLoc getLocus() {
return locus;
}
/**
* Gets an iterator over all the reads bound by this shard.
* @return An iterator over all reads in this shard.
*/
public LocusIterator getLocusIterator() {
return locusIterator;
}
@Override
public void close() {
super.close();
}
}

View File

@ -53,13 +53,12 @@ public abstract class LocusView extends LocusIterator implements View {
*/
private AlignmentContext nextLocus = null;
public LocusView(ShardDataProvider provider) {
public LocusView(LocusShardDataProvider provider) {
this.locus = provider.getLocus();
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc());
this.sourceInfo = provider.getReadIterator().getSourceInfo();
this.sourceInfo = provider.getSourceInfo();
this.loci = provider.getLocusIterator();
this.loci = new LocusIteratorByState(reads, sourceInfo);
seedNextLocus();
provider.register(this);
@ -150,6 +149,7 @@ public abstract class LocusView extends LocusIterator implements View {
nextLocus = loci.next();
// If the location of this shard is available, trim the data stream to match the shard.
// TODO: Much of this functionality is being replaced by the WindowMaker.
if(locus != null) {
// Iterate through any elements not contained within this shard.
while( nextLocus != null && !isContainedInShard(nextLocus.getLocation()) && loci.hasNext() )
@ -169,44 +169,4 @@ public abstract class LocusView extends LocusIterator implements View {
private boolean isContainedInShard(GenomeLoc location) {
return locus.containsP(location);
}
/**
* Class to filter out un-handle-able reads from the stream. We currently are skipping
* unmapped reads, non-primary reads, unaligned reads, and duplicate reads.
*/
private static class LocusStreamFilterFunc implements SamRecordFilter {
SAMRecord lastRead = null;
public boolean filterOut(SAMRecord rec) {
boolean result = false;
String why = "";
if (rec.getReadUnmappedFlag()) {
TraversalStatistics.nUnmappedReads++;
result = true;
why = "Unmapped";
} else if (rec.getNotPrimaryAlignmentFlag()) {
TraversalStatistics.nNotPrimary++;
result = true;
why = "Not Primary";
} else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
TraversalStatistics.nBadAlignments++;
result = true;
why = "No alignment start";
} else if (rec.getDuplicateReadFlag()) {
TraversalStatistics.nDuplicates++;
result = true;
why = "Duplicate reads";
}
else {
result = false;
}
if (result) {
TraversalStatistics.nSkippedReads++;
//System.out.printf(" [filter] %s => %b %s", rec.getReadName(), result, why);
} else {
TraversalStatistics.nReads++;
}
return result;
}
}
}

View File

@ -0,0 +1,57 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import java.util.Collection;
/**
* Present data sharded by read to a traversal engine.
*
* @author mhanna
* @version 0.1
*/
public class ReadShardDataProvider extends ShardDataProvider {
/**
* The raw collection of reads.
*/
private final StingSAMIterator reads;
/**
* Create a data provider for the shard given the reads and reference.
* @param shard The chunk of data over which traversals happen.
* @param reference A getter for a section of the reference.
*/
public ReadShardDataProvider(Shard shard, StingSAMIterator reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
super(shard,reference,rods);
this.reads = reads;
}
/**
* Can this data source provide reads?
* @return True if reads are available, false otherwise.
*/
public boolean hasReads() {
return reads != null;
}
/**
* Gets an iterator over all the reads bound by this shard.
* @return An iterator over all reads in this shard.
*/
public StingSAMIterator getReadIterator() {
return reads;
}
@Override
public void close() {
super.close();
if(reads != null)
reads.close();
}
}

View File

@ -32,7 +32,7 @@ public class ReadView implements View, Iterable<SAMRecord> {
* Create a new view of the reads given the current data set.
* @param provider Source for the data.
*/
public ReadView( ShardDataProvider provider ) {
public ReadView( ReadShardDataProvider provider ) {
reads = provider.getReadIterator();
}

View File

@ -56,7 +56,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
*
* @param provider
*/
public RodLocusView( ShardDataProvider provider ) {
public RodLocusView( LocusShardDataProvider provider ) {
super(provider);
GenomeLoc loc = provider.getLocus();

View File

@ -1,11 +1,9 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.ArrayList;
import java.util.List;
@ -27,7 +25,7 @@ import java.util.Collection;
* An umbrella class that examines the data passed to the microscheduler and
* tries to assemble as much as possible with it.
*/
public class ShardDataProvider {
public abstract class ShardDataProvider {
/**
* An ArrayList of all the views that are examining this data.
*/
@ -38,16 +36,6 @@ public class ShardDataProvider {
*/
private final Shard shard;
/**
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
*/
private final GenomeLoc locus;
/**
* The raw collection of reads.
*/
private final StingSAMIterator reads;
/**
* Provider of reference data for this particular shard.
*/
@ -66,22 +54,6 @@ public class ShardDataProvider {
return shard;
}
/**
* Gets the locus associated with this shard data provider.
* @return The locus.
*/
public GenomeLoc getLocus() {
return locus;
}
/**
* Can this data source provide reads?
* @return True if reads are available, false otherwise.
*/
public boolean hasReads() {
return reads != null;
}
/**
* Can this data source provide reference information?
* @return True if possible, false otherwise.
@ -90,13 +62,6 @@ public class ShardDataProvider {
return reference != null;
}
/**
* Gets an iterator over all the reads bound by this shard.
* @return An iterator over all reads in this shard.
*/
public StingSAMIterator getReadIterator() {
return reads;
}
/**
* Gets a pointer into the given indexed fasta sequence file.
@ -118,13 +83,10 @@ public class ShardDataProvider {
/**
* Create a data provider for the shard given the reads and reference.
* @param shard The chunk of data over which traversals happen.
* @param reads A window into the reads for a given region.
* @param reference A getter for a section of the reference.
*/
public ShardDataProvider(Shard shard,GenomeLoc locus,StingSAMIterator reads,IndexedFastaSequenceFile reference,Collection<ReferenceOrderedDataSource> rods) {
public ShardDataProvider(Shard shard,IndexedFastaSequenceFile reference,Collection<ReferenceOrderedDataSource> rods) {
this.shard = shard;
this.locus = locus;
this.reads = reads;
this.reference = reference;
this.referenceOrderedData = rods;
}
@ -132,10 +94,9 @@ public class ShardDataProvider {
/**
* Skeletal, package protected constructor for unit tests which require a ShardDataProvider.
* @param shard the shard
* @param reads reads iterator.
*/
ShardDataProvider(Shard shard,GenomeLoc locus,StingSAMIterator reads) {
this(shard,locus,reads,null,null);
ShardDataProvider(Shard shard) {
this(shard,null,null);
}
/**
@ -177,9 +138,6 @@ public class ShardDataProvider {
// Explicitly purge registered views to ensure that we don't end up with circular references
// to views, which can in turn hold state.
registeredViews.clear();
if(reads != null)
reads.close();
}
@Override

View File

@ -411,7 +411,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
*/
public SAMReaders(Reads sourceInfo) {
for(File readsFile: sourceInfo.getReadsFiles()) {
SAMFileReader2 reader = new SAMFileReader2(readsFile,true);
SAMFileReader2 reader = new SAMFileReader2(readsFile,false);
reader.setValidationStringency(sourceInfo.getValidationStringency());
// If no read group is present, hallucinate one.

View File

@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.StingException;
import java.util.ArrayList;
import java.util.List;
@ -170,7 +172,10 @@ public abstract class Accumulator {
* Create a holder for interval results if none exists. Add the result to the holder.
*/
public void accumulate( ShardDataProvider provider, Object result ) {
GenomeLoc location = provider.getLocus();
if(!(provider instanceof LocusShardDataProvider))
throw new StingException("Unable to reduce by interval on reads traversals at this time.");
GenomeLoc location = ((LocusShardDataProvider)provider).getLocus();
// Pull the interval iterator ahead to the interval overlapping this shard fragment.
while((currentInterval == null || currentInterval.isBefore(location)) && intervalIterator.hasNext())

View File

@ -6,11 +6,9 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.io.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
@ -277,7 +275,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
* @param walker Walker to apply to the dataset.
* @param reduceTree Tree of reduces to which to add this shard traverse.
*/
protected Future queueNextShardTraverse( Walker walker, ReduceTree reduceTree ) {
protected void queueNextShardTraverse( Walker walker, ReduceTree reduceTree ) {
if (traverseTasks.size() == 0)
throw new IllegalStateException("Cannot traverse; no pending traversals exist.");
@ -286,7 +284,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
ShardTraverser traverser = new ShardTraverser(this,
traversalEngine,
walker,
new ShardDataProvider(shard,shard.getGenomeLocs().get(0),getReadIterator(shard),reference,rods),
shard,
outputTracker);
Future traverseResult = threadPool.submit(traverser);
@ -298,8 +296,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
// No more data? Let the reduce tree know so it can finish processing what it's got.
if (!isShardTraversePending())
reduceTree.complete();
return traverseResult;
}
/** Pulls the next reduce from the queue and runs it. */

View File

@ -1,6 +1,8 @@
package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
@ -9,10 +11,15 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import java.util.Collection;
import net.sf.picard.filter.FilteringIterator;
/** A micro-scheduling manager for single-threaded execution of a traversal. */
public class LinearMicroScheduler extends MicroScheduler {
@ -49,12 +56,10 @@ public class LinearMicroScheduler extends MicroScheduler {
for (Shard shard : shardStrategy) {
// New experimental code for managing locus intervals.
// TODO: we'll need a similar but slightly different strategy for dealing with read intervals, so generalize this code.
if((shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) &&
shard.getGenomeLocs().size() > 0) {
if(shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) {
WindowMaker windowMaker = new WindowMaker(getReadIterator(shard),shard.getGenomeLocs());
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
ShardDataProvider dataProvider = new ShardDataProvider(shard,iterator.getLocus(),iterator,reference,rods);
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
accumulator.accumulate(dataProvider,result);
dataProvider.close();
@ -62,7 +67,7 @@ public class LinearMicroScheduler extends MicroScheduler {
windowMaker.close();
}
else {
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,getReadIterator(shard),reference,rods);
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,getReadIterator(shard),reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
accumulator.accumulate(dataProvider,result);
dataProvider.close();

View File

@ -1,10 +1,12 @@
package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.StingException;
import java.util.concurrent.Callable;
@ -26,8 +28,8 @@ import java.util.concurrent.Callable;
public class ShardTraverser implements Callable {
private HierarchicalMicroScheduler microScheduler;
private Walker walker;
private Shard shard;
private TraversalEngine traversalEngine;
private ShardDataProvider dataProvider;
private ThreadLocalOutputTracker outputTracker;
private OutputMergeTask outputMergeTask;
@ -39,12 +41,12 @@ public class ShardTraverser implements Callable {
public ShardTraverser( HierarchicalMicroScheduler microScheduler,
TraversalEngine traversalEngine,
Walker walker,
ShardDataProvider dataProvider,
Shard shard,
ThreadLocalOutputTracker outputTracker ) {
this.microScheduler = microScheduler;
this.walker = walker;
this.traversalEngine = traversalEngine;
this.dataProvider = dataProvider;
this.shard = shard;
this.outputTracker = outputTracker;
}
@ -52,11 +54,16 @@ public class ShardTraverser implements Callable {
long startTime = System.currentTimeMillis();
Object accumulator = walker.reduceInit();
WindowMaker windowMaker = new WindowMaker(microScheduler.getReadIterator(shard),shard.getGenomeLocs());
try {
accumulator = traversalEngine.traverse( walker, dataProvider, accumulator );
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
accumulator = traversalEngine.traverse( walker, dataProvider, accumulator );
dataProvider.close();
}
}
finally {
dataProvider.close();
windowMaker.close();
outputMergeTask = outputTracker.closeStorage();
synchronized(this) {

View File

@ -2,12 +2,19 @@ package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.*;
import net.sf.samtools.SAMRecord;
import net.sf.picard.util.PeekableIterator;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
/**
* Buffer shards of data which may or may not contain multiple loci into
@ -23,10 +30,20 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
*/
private final Reads sourceInfo;
/**
* Hold the read iterator so that it can be closed later.
*/
private final StingSAMIterator readIterator;
/**
* The locus overflow tracker.
*/
private final LocusOverflowTracker locusOverflowTracker;
/**
* The data source for reads. Will probably come directly from the BAM file.
*/
private final PeekableIterator<SAMRecord> sourceIterator;
private final PeekableIterator<AlignmentContext> sourceIterator;
/**
* Stores the sequence of intervals that the windowmaker should be tracking.
@ -34,9 +51,9 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
private final PeekableIterator<GenomeLoc> intervalIterator;
/**
* Which reads should be saved to go into the next interval?
* In the case of monolithic sharding, this case returns whether the only shard has been generated.
*/
private Queue<SAMRecord> overlappingReads = new ArrayDeque<SAMRecord>();
private boolean shardGenerated = false;
/**
* Create a new window maker with the given iterator as a data source, covering
@ -46,8 +63,13 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
*/
public WindowMaker(StingSAMIterator iterator, List<GenomeLoc> intervals) {
this.sourceInfo = iterator.getSourceInfo();
this.sourceIterator = new PeekableIterator<SAMRecord>(iterator);
this.intervalIterator = new PeekableIterator<GenomeLoc>(intervals.iterator());
this.readIterator = iterator;
LocusIterator locusIterator = new LocusIteratorByState(new FilteringIterator(iterator,new LocusStreamFilterFunc()),sourceInfo);
this.locusOverflowTracker = locusIterator.getLocusOverflowTracker();
this.sourceIterator = new PeekableIterator<AlignmentContext>(locusIterator);
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
}
public Iterator<WindowMakerIterator> iterator() {
@ -55,11 +77,12 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
}
public boolean hasNext() {
return intervalIterator.hasNext();
return (intervalIterator != null && intervalIterator.hasNext()) || !shardGenerated;
}
public WindowMakerIterator next() {
return new WindowMakerIterator(intervalIterator.next());
shardGenerated = true;
return new WindowMakerIterator(intervalIterator != null ? intervalIterator.next() : null);
}
public void remove() {
@ -67,22 +90,18 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
}
public void close() {
this.sourceIterator.close();
this.readIterator.close();
}
public class WindowMakerIterator implements StingSAMIterator {
public class WindowMakerIterator extends LocusIterator {
/**
* The locus for which this iterator is currently returning reads.
*/
private final GenomeLoc locus;
/**
* Which reads should be saved to go into the next interval?
*/
private final Queue<SAMRecord> pendingOverlaps = new ArrayDeque<SAMRecord>();
public WindowMakerIterator(GenomeLoc locus) {
this.locus = locus;
seedNextLocus();
}
public Reads getSourceInfo() {
@ -98,32 +117,68 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
}
public boolean hasNext() {
if(overlappingReads.size() > 0) return true;
if(sourceIterator.hasNext()) {
SAMRecord nextRead = sourceIterator.peek();
if((nextRead.getAlignmentStart() >= locus.getStart() && nextRead.getAlignmentStart() <= locus.getStop()) ||
(nextRead.getAlignmentEnd() >= locus.getStart() && nextRead.getAlignmentEnd() <= locus.getStop()) ||
(nextRead.getAlignmentStart() < locus.getStart() && nextRead.getAlignmentEnd() > locus.getStop()))
return true;
}
return false;
// locus == null when doing monolithic sharding.
// TODO: Move the monolithic sharding iterator so that we don't have to special case here.
return sourceIterator.hasNext() && (locus == null || sourceIterator.peek().getLocation().overlapsP(locus));
}
public SAMRecord next() {
public AlignmentContext next() {
if(!hasNext()) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval.");
SAMRecord nextRead = overlappingReads.size() > 0 ? overlappingReads.remove() : sourceIterator.next();
if(intervalIterator.hasNext() && nextRead.getAlignmentEnd() >= intervalIterator.peek().getStart())
pendingOverlaps.add(nextRead);
return nextRead;
return sourceIterator.next();
}
public void close() {
overlappingReads = pendingOverlaps;
public LocusOverflowTracker getLocusOverflowTracker() {
return locusOverflowTracker;
}
public void remove() {
throw new UnsupportedOperationException("Unable to remove from a window maker iterator.");
}
public void seedNextLocus() {
// locus == null when doing monolithic sharding.
// TODO: Move the monolithic sharding iterator so that we don't have to special case here.
if(locus == null) return;
while(sourceIterator.hasNext() && sourceIterator.peek().getLocation().isBefore(locus))
sourceIterator.next();
}
}
/**
* Class to filter out un-handle-able reads from the stream. We currently are skipping
* unmapped reads, non-primary reads, unaligned reads, and duplicate reads.
*/
private static class LocusStreamFilterFunc implements SamRecordFilter {
SAMRecord lastRead = null;
public boolean filterOut(SAMRecord rec) {
boolean result = false;
String why = "";
if (rec.getReadUnmappedFlag()) {
TraversalStatistics.nUnmappedReads++;
result = true;
why = "Unmapped";
} else if (rec.getNotPrimaryAlignmentFlag()) {
TraversalStatistics.nNotPrimary++;
result = true;
why = "Not Primary";
} else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
TraversalStatistics.nBadAlignments++;
result = true;
why = "No alignment start";
} else if (rec.getDuplicateReadFlag()) {
TraversalStatistics.nDuplicates++;
result = true;
why = "Duplicate reads";
}
else {
result = false;
}
if (result) {
TraversalStatistics.nSkippedReads++;
//System.out.printf(" [filter] %s => %b %s", rec.getReadName(), result, why);
} else {
TraversalStatistics.nReads++;
}
return result;
}
}
}

View File

@ -2,11 +2,10 @@ package org.broadinstitute.sting.gatk.traversals;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
public abstract class TraversalEngine {
public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,ProviderType extends ShardDataProvider> {
// Time in milliseconds since we initialized this engine
private long startTime = -1;
private long lastProgressPrintTime = -1; // When was the last time we printed our progress?
@ -76,18 +75,16 @@ public abstract class TraversalEngine {
* A passthrough method so that subclasses can report which types of traversals they're using.
*
* @param sum Result of the computation.
* @param <T> Type of the computation.
*/
public abstract <T> void printOnTraversalDone(T sum);
public abstract void printOnTraversalDone(T sum);
/**
* Called after a traversal to print out information about the traversal process
*
* @param type describing this type of traversal
* @param sum The reduce result of the traversal
* @param <T> ReduceType of the traversal
*/
protected <T> void printOnTraversalDone(final String type, T sum) {
protected void printOnTraversalDone(final String type, T sum) {
printProgress(true, type, null);
logger.info("Traversal reduce result is " + sum);
final long curTime = System.currentTimeMillis();
@ -115,12 +112,10 @@ public abstract class TraversalEngine {
* @param walker the walker to run with
* @param dataProvider the data provider that generates data given the shard
* @param sum the accumulator
* @param <M> an object of the map type
* @param <T> an object of the reduce type
*
* @return an object of the reduce type
*/
public abstract <M, T> T traverse(Walker<M, T> walker,
ShardDataProvider dataProvider,
T sum);
public abstract T traverse(WalkerType walker,
ProviderType dataProvider,
T sum);
}

View File

@ -31,17 +31,11 @@ import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ManagingReferenceOrderedView;
import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.*;
@ -54,7 +48,7 @@ import java.util.*;
* <p/>
* This class handles traversing lists of duplicate reads in the new shardable style
*/
public class TraverseDuplicates extends TraversalEngine {
public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker<M,T>,ReadShardDataProvider> {
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraverseDuplicates.class);
@ -196,19 +190,12 @@ public class TraverseDuplicates extends TraversalEngine {
*
* @param walker the walker to execute over
* @param sum of type T, the return from the walker
* @param <M> the generic type
* @param <T> the return type of the reduce function
*
* @return the result type T, the product of all the reduce calls
*/
public <M, T> T traverse(Walker<M, T> walker,
ShardDataProvider dataProvider,
T sum) {
// safety first :-)
if (!(walker instanceof DuplicateWalker))
throw new IllegalArgumentException("Walker isn't a duplicate walker!");
DuplicateWalker<M, T> dupWalker = (DuplicateWalker<M, T>) walker;
public T traverse(DuplicateWalker<M, T> walker,
ReadShardDataProvider dataProvider,
T sum) {
FilteringIterator filterIter = new FilteringIterator(new ReadView(dataProvider).iterator(), new duplicateStreamFilterFunc());
PushbackIterator<SAMRecord> iter = new PushbackIterator<SAMRecord>(filterIter);
@ -233,10 +220,10 @@ public class TraverseDuplicates extends TraversalEngine {
TraversalStatistics.nRecords++;
// actually call filter and map, accumulating sum
final boolean keepMeP = dupWalker.filter(site, locus, readSets);
final boolean keepMeP = walker.filter(site, locus, readSets);
if (keepMeP) {
M x = dupWalker.map(site, locus, readSets);
sum = dupWalker.reduce(x, sum);
M x = walker.map(site, locus, readSets);
sum = walker.reduce(x, sum);
}
printProgress(DUPS_STRING, site);
@ -254,9 +241,8 @@ public class TraverseDuplicates extends TraversalEngine {
* Temporary override of printOnTraversalDone.
*
* @param sum Result of the computation.
* @param <T> Type of the result.
*/
public <T> void printOnTraversalDone(T sum) {
public void printOnTraversalDone(T sum) {
printOnTraversalDone(DUPS_STRING, sum);
}
}

View File

@ -2,26 +2,21 @@ package org.broadinstitute.sting.gatk.traversals;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.*;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.ArrayList;
/**
* A simple solution to iterating over all reference positions over a series of genomic locations.
*/
public class TraverseLoci extends TraversalEngine {
public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,LocusShardDataProvider> {
final private static String LOCI_STRING = "sites";
/**
@ -29,24 +24,12 @@ public class TraverseLoci extends TraversalEngine {
*/
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
public <M,T> T traverse(Walker<M,T> walker, ArrayList<GenomeLoc> locations) {
if ( locations.isEmpty() )
Utils.scareUser("Requested all locations be processed without providing locations to be processed!");
throw new UnsupportedOperationException("This traversal type not supported by TraverseLoci");
}
@Override
public <M,T> T traverse( Walker<M,T> walker,
ShardDataProvider dataProvider,
T sum ) {
public T traverse( LocusWalker<M,T> walker,
LocusShardDataProvider dataProvider,
T sum ) {
logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
if ( !(walker instanceof LocusWalker) )
throw new IllegalArgumentException("Walker isn't a loci walker!");
LocusWalker<M, T> locusWalker = (LocusWalker<M, T>)walker;
LocusView locusView = getLocusView( walker, dataProvider );
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
@ -87,10 +70,10 @@ public class TraverseLoci extends TraversalEngine {
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
ReferenceContext refContext = referenceView.getReferenceContext(location);
final boolean keepMeP = locusWalker.filter(tracker, refContext, locus);
final boolean keepMeP = walker.filter(tracker, refContext, locus);
if (keepMeP) {
M x = locusWalker.map(tracker, refContext, locus);
sum = locusWalker.reduce(x, sum);
M x = walker.map(tracker, refContext, locus);
sum = walker.reduce(x, sum);
}
if (this.maximumIterations > 0 && TraversalStatistics.nRecords > this.maximumIterations) {
@ -110,8 +93,8 @@ public class TraverseLoci extends TraversalEngine {
if ( nSkipped > 0 ) {
GenomeLoc site = rodLocusView.getLocOneBeyondShard();
AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileup(site), nSkipped);
M x = locusWalker.map(null, null, ac);
sum = locusWalker.reduce(x, sum);
M x = walker.map(null, null, ac);
sum = walker.reduce(x, sum);
}
}
@ -122,9 +105,8 @@ public class TraverseLoci extends TraversalEngine {
* Temporary override of printOnTraversalDone.
*
* @param sum Result of the computation.
* @param <T> Type of the result.
*/
public <T> void printOnTraversalDone( T sum ) {
public void printOnTraversalDone( T sum ) {
printOnTraversalDone(LOCI_STRING, sum );
}
@ -132,8 +114,9 @@ public class TraverseLoci extends TraversalEngine {
* Gets the best view of loci for this walker given the available data.
* @param walker walker to interrogate.
* @param dataProvider Data which which to drive the locus view.
* @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
*/
private LocusView getLocusView( Walker walker, ShardDataProvider dataProvider ) {
private LocusView getLocusView( Walker<M,T> walker, LocusShardDataProvider dataProvider ) {
DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
if( dataSource == DataSource.READS )
return new CoveredLocusView(dataProvider);

View File

@ -1,20 +1,17 @@
package org.broadinstitute.sting.gatk.traversals;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.providers.*;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.StingException;
import java.util.ArrayList;
import java.util.List;
import java.util.*;
/**
* Created by IntelliJ IDEA.
@ -23,26 +20,20 @@ import java.util.List;
* Time: 10:26:03 AM
* To change this template use File | Settings | File Templates.
*/
public class TraverseLocusWindows extends TraversalEngine {
public class TraverseLocusWindows<M,T> extends TraversalEngine<M,T,LocusWindowWalker<M,T>,LocusShardDataProvider> {
/** descriptor of the type */
private static final String LOCUS_WINDOW_STRING = "intervals";
public <M,T> T traverse( Walker<M,T> walker,
ShardDataProvider dataProvider,
T sum ) {
if ( !(walker instanceof LocusWindowWalker) )
throw new IllegalArgumentException("Walker isn't a locus window walker!");
LocusWindowWalker<M, T> locusWindowWalker = (LocusWindowWalker<M, T>)walker;
public T traverse( LocusWindowWalker<M,T> walker,
LocusShardDataProvider dataProvider,
T sum ) {
GenomeLoc interval = dataProvider.getLocus();
ReadView readView = new ReadView( dataProvider );
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
ReferenceOrderedView referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
Pair<GenomeLoc, List<SAMRecord>> locus = getLocusContext(readView.iterator(), interval);
Pair<GenomeLoc, List<SAMRecord>> locus = getLocusContext(dataProvider.getLocusIterator(), interval);
// The TraverseByLocusWindow expands intervals to cover all reads in a non-standard way.
// TODO: Convert this approach to the standard.
@ -58,8 +49,8 @@ public class TraverseLocusWindows extends TraversalEngine {
//
//final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus);
//if (keepMeP) {
M x = locusWindowWalker.map(tracker, referenceSubsequence, locus.getFirst(), locus.getSecond());
sum = locusWindowWalker.reduce(x, sum);
M x = walker.map(tracker, referenceSubsequence, locus.getFirst(), locus.getSecond());
sum = walker.reduce(x, sum);
//}
printProgress(LOCUS_WINDOW_STRING, locus.getFirst());
@ -67,26 +58,34 @@ public class TraverseLocusWindows extends TraversalEngine {
return sum;
}
private Pair<GenomeLoc, List<SAMRecord>> getLocusContext(StingSAMIterator readIter, GenomeLoc interval) {
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
private Pair<GenomeLoc, List<SAMRecord>> getLocusContext(LocusIterator locusIter, GenomeLoc interval) {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
boolean done = false;
long leftmostIndex = interval.getStart(),
rightmostIndex = interval.getStop();
while (readIter.hasNext() && !done) {
TraversalStatistics.nRecords++;
SAMRecord read = readIter.next();
reads.add(read);
if ( read.getAlignmentStart() < leftmostIndex )
leftmostIndex = read.getAlignmentStart();
if ( read.getAlignmentEnd() > rightmostIndex )
rightmostIndex = read.getAlignmentEnd();
if ( this.maximumIterations > 0 && TraversalStatistics.nRecords > this.maximumIterations) {
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
done = true;
while(locusIter.hasNext() && !done) {
AlignmentContext alignment = locusIter.next();
Iterator<SAMRecord> readIter = alignment.getReads().iterator();
while (readIter.hasNext() && !done) {
TraversalStatistics.nRecords++;
SAMRecord read = readIter.next();
if(reads.contains(read)) continue;
reads.add(read);
if ( read.getAlignmentStart() < leftmostIndex )
leftmostIndex = read.getAlignmentStart();
if ( read.getAlignmentEnd() > rightmostIndex )
rightmostIndex = read.getAlignmentEnd();
if ( this.maximumIterations > 0 && TraversalStatistics.nRecords > this.maximumIterations) {
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
done = true;
}
}
}
GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
// AlignmentContext locus = new AlignmentContext(window, reads, null);
// if ( readIter.getSourceInfo().getDownsampleToCoverage() != null )
@ -99,9 +98,8 @@ public class TraverseLocusWindows extends TraversalEngine {
* Temporary override of printOnTraversalDone.
* TODO: Add some sort of TE.getName() function once all TraversalEngines are ported.
* @param sum Result of the computation.
* @param <T> Type of the result.
*/
public <T> void printOnTraversalDone( T sum ) {
public void printOnTraversalDone( T sum ) {
printOnTraversalDone(LOCUS_WINDOW_STRING, sum );
}

View File

@ -3,14 +3,10 @@ package org.broadinstitute.sting.gatk.traversals;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView;
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.*;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLocParser;
/*
@ -47,7 +43,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
* <p/>
* This class handles traversing by reads in the new shardable style
*/
public class TraverseReads extends TraversalEngine {
public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraverseReads.class);
@ -60,23 +56,17 @@ public class TraverseReads extends TraversalEngine {
* @param walker the walker to traverse with
* @param dataProvider the provider of the reads data
* @param sum the value of type T, specified by the walker, to feed to the walkers reduce function
* @param <M> the map type of the walker
* @param <T> the reduce type of the walker
* @return the reduce variable of the read walker
*/
public <M, T> T traverse(Walker<M, T> walker,
ShardDataProvider dataProvider,
T sum) {
public T traverse(ReadWalker<M,T> walker,
ReadShardDataProvider dataProvider,
T sum) {
logger.debug(String.format("TraverseReads.traverse Covered dataset is %s", dataProvider));
if (!(walker instanceof ReadWalker))
throw new IllegalArgumentException("Walker isn't a read walker!");
if( !dataProvider.hasReads() )
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
ReadWalker<M, T> readWalker = (ReadWalker<M, T>) walker;
boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES);
ReadView reads = new ReadView(dataProvider);
@ -101,10 +91,10 @@ public class TraverseReads extends TraversalEngine {
// if the read is mapped, create a metadata tracker
ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null;
final boolean keepMeP = readWalker.filter(refSeq, read);
final boolean keepMeP = walker.filter(refSeq, read);
if (keepMeP) {
M x = readWalker.map(refSeq, read, tracker); // the tracker can be null
sum = readWalker.reduce(x, sum);
M x = walker.map(refSeq, read, tracker); // the tracker can be null
sum = walker.reduce(x, sum);
}
printProgress(READS_STRING,
@ -119,9 +109,8 @@ public class TraverseReads extends TraversalEngine {
* Temporary override of printOnTraversalDone.
* TODO: Add some sort of TE.getName() function once all TraversalEngines are ported.
* @param sum Result of the computation.
* @param <T> Type of the result.
*/
public <T> void printOnTraversalDone( T sum ) {
public void printOnTraversalDone( T sum ) {
printOnTraversalDone(READS_STRING, sum );
}
}

View File

@ -16,8 +16,6 @@ import org.apache.log4j.Logger;
* To change this template use File | Settings | File Templates.
*/
public abstract class Walker<MapType, ReduceType> {
// TODO: Can a walker be templatized so that map and reduce live here?
protected static Logger logger = Logger.getLogger(Walker.class);
/**

View File

@ -4,7 +4,6 @@ import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.apache.log4j.Logger;
@ -43,7 +42,7 @@ import net.sf.samtools.SAMFileHeader;
*
* this class acts as a fake reads traversal engine for testing out reads based traversals.
*/
public class ArtificialReadsTraversal extends TraversalEngine {
public class ArtificialReadsTraversal<M,T> extends TraversalEngine<M,T,Walker<M,T>,ShardDataProvider> {
public int startingChr = 1;
public int endingChr = 5;
@ -77,12 +76,10 @@ public class ArtificialReadsTraversal extends TraversalEngine {
* @param walker the walker to traverse with
* @param dataProvider the provider of the reads data
* @param sum the value of type T, specified by the walker, to feed to the walkers reduce function
* @param <M> the map type of the walker
* @param <T> the reduce type of the walker
*
* @return the reduce variable of the read walker
*/
public <M, T> T traverse( Walker<M, T> walker,
public T traverse( Walker<M, T> walker,
ShardDataProvider dataProvider,
T sum ) {
@ -126,9 +123,8 @@ public class ArtificialReadsTraversal extends TraversalEngine {
* TODO: Add some sort of TE.getName() function once all TraversalEngines are ported.
*
* @param sum Result of the computation.
* @param <T> Type of the result.
*/
public <T> void printOnTraversalDone( T sum ) {
public void printOnTraversalDone( T sum ) {
printOnTraversalDone("reads", sum);
}

View File

@ -26,7 +26,7 @@ import java.util.List;
public class AllLocusViewTest extends LocusViewTemplate {
@Override
protected LocusView createView(ShardDataProvider provider) {
protected LocusView createView(LocusShardDataProvider provider) {
return new AllLocusView(provider);
}

View File

@ -29,7 +29,7 @@ public class CoveredLocusViewTest extends LocusViewTemplate {
* Retrieve a covered locus view.
*/
@Override
protected LocusView createView(ShardDataProvider provider) {
protected LocusView createView(LocusShardDataProvider provider) {
return new CoveredLocusView(provider);
}

View File

@ -58,7 +58,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
public void testOverlappingReferenceBases() {
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length())));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, sequenceFile, null);
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, sequenceFile, null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
char[] results = view.getReferenceBases(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length() + 9));
@ -75,7 +75,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
public void testBoundsFailure() {
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, 1, 50)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, sequenceFile, null);
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, sequenceFile, null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
view.getReferenceContext(GenomeLocParser.createGenomeLoc(0, 51)).getBase();
@ -91,7 +91,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
Shard shard = new LocusShard(Collections.singletonList(loc));
GenomeLocusIterator shardIterator = new GenomeLocusIterator(loc);
ShardDataProvider dataProvider = new ShardDataProvider(shard, loc, null, sequenceFile, null);
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, loc, null, sequenceFile, null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
while (shardIterator.hasNext()) {

View File

@ -5,6 +5,7 @@ import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
@ -46,7 +47,9 @@ public abstract class LocusViewTemplate extends BaseTest {
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new LocusShard(Collections.singletonList(shardBounds));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -60,7 +63,9 @@ public abstract class LocusViewTemplate extends BaseTest {
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new LocusShard(Collections.singletonList(shardBounds));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -73,7 +78,9 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
@ -85,7 +92,9 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
@ -97,7 +106,9 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
@ -109,7 +120,9 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
@ -121,7 +134,9 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
@ -134,7 +149,9 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
@ -151,7 +168,9 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
@ -168,7 +187,9 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
@ -187,7 +208,9 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
@ -213,7 +236,9 @@ public abstract class LocusViewTemplate extends BaseTest {
read07, read08, read09, read10, read11, read12);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15)));
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
@ -227,7 +252,7 @@ public abstract class LocusViewTemplate extends BaseTest {
*
* @return The correct view to test.
*/
protected abstract LocusView createView(ShardDataProvider provider);
protected abstract LocusView createView(LocusShardDataProvider provider);
/**
* Test the reads according to an independently derived context.

View File

@ -70,7 +70,7 @@ public class ReadReferenceViewTest extends ReferenceViewTemplate {
final long contigStart = selectedContig.getSequenceLength() - (readLength - overlap - 1);
final long contigStop = selectedContig.getSequenceLength() + overlap;
ShardDataProvider dataProvider = new ShardDataProvider(null,null,null,sequenceFile,null);
ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,null,sequenceFile,null);
ReadReferenceView view = new ReadReferenceView(dataProvider);
SAMRecord rec = buildSAMRecord(selectedContig.getSequenceName(),(int)contigStart,(int)contigStop);
@ -97,7 +97,7 @@ public class ReadReferenceViewTest extends ReferenceViewTemplate {
protected void validateLocation( GenomeLoc loc ) {
SAMRecord read = buildSAMRecord( loc.getContig(), (int)loc.getStart(), (int)loc.getStop() );
ShardDataProvider dataProvider = new ShardDataProvider(null,null,null,sequenceFile,null);
ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,null,sequenceFile,null);
ReadReferenceView view = new ReadReferenceView(dataProvider);
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(loc.getContig(),loc.getStart(),loc.getStop());

View File

@ -53,7 +53,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
@Test
public void testNoBindings() {
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Collections.<ReferenceOrderedDataSource>emptyList());
ShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Collections.<ReferenceOrderedDataSource>emptyList());
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",10));
@ -71,7 +71,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource));
ShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource));
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20));
@ -97,7 +97,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2));
ShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2));
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20));

View File

@ -34,7 +34,7 @@ public class ShardDataProviderTest extends BaseTest {
@Before
public void createProvider() {
provider = new ShardDataProvider( null,null,null,null,null );
provider = new LocusShardDataProvider( null,null,null,null,null,null );
}
/**

View File

@ -4,6 +4,7 @@ import net.sf.picard.reference.ReferenceSequenceFile;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
@ -132,7 +133,7 @@ public class TraverseReadsTest extends BaseTest {
fail("Shard == null");
}
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,dataSource.seek(shard),null,null);
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,dataSource.seek(shard),null,null);
accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator);
dataProvider.close();
@ -178,7 +179,7 @@ public class TraverseReadsTest extends BaseTest {
fail("Shard == null");
}
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,dataSource.seek(shard),null,null);
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,dataSource.seek(shard),null,null);
accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator);
dataProvider.close();
}