Rework the way the MicroScheduler handles locus shards to handle intervals that span shards
with less memory consumption. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2981 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4a05757a2a
commit
a7ba88e649
|
|
@ -44,7 +44,7 @@ public class AllLocusView extends LocusView {
|
|||
* Create a new queue of locus contexts.
|
||||
* @param provider
|
||||
*/
|
||||
public AllLocusView(ShardDataProvider provider) {
|
||||
public AllLocusView(LocusShardDataProvider provider) {
|
||||
super( provider );
|
||||
// Seed the state tracking members with the first possible seek position and the first possible locus context.
|
||||
locusIterator = new GenomeLocusIterator(provider.getLocus());
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ public class CoveredLocusView extends LocusView {
|
|||
* Create a new queue of locus contexts.
|
||||
* @param provider
|
||||
*/
|
||||
public CoveredLocusView(ShardDataProvider provider) {
|
||||
public CoveredLocusView(LocusShardDataProvider provider) {
|
||||
super(provider);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ public class LocusReferenceView extends ReferenceView {
|
|||
* the walkers, etc.
|
||||
* @param provider source for locus data.
|
||||
*/
|
||||
public LocusReferenceView( ShardDataProvider provider ) {
|
||||
public LocusReferenceView( LocusShardDataProvider provider ) {
|
||||
super(provider);
|
||||
initializeBounds(provider);
|
||||
windowStart = windowStop = 0;
|
||||
|
|
@ -80,7 +80,7 @@ public class LocusReferenceView extends ReferenceView {
|
|||
* Create a new locus reference view.
|
||||
* @param provider source for locus data.
|
||||
*/
|
||||
public LocusReferenceView( Walker walker, ShardDataProvider provider ) {
|
||||
public LocusReferenceView( Walker walker, LocusShardDataProvider provider ) {
|
||||
super( provider );
|
||||
initializeBounds(provider);
|
||||
|
||||
|
|
@ -137,7 +137,7 @@ public class LocusReferenceView extends ReferenceView {
|
|||
* Initialize the bounds of this shard, trimming the bounds so that they match the reference.
|
||||
* @param provider Provider covering the appropriate locus.
|
||||
*/
|
||||
private void initializeBounds(ShardDataProvider provider) {
|
||||
private void initializeBounds(LocusShardDataProvider provider) {
|
||||
if(provider.getLocus() != null) {
|
||||
long sequenceLength = reference.getSequenceDictionary().getSequence(provider.getLocus().getContig()).getSequenceLength();
|
||||
bounds = GenomeLocParser.createGenomeLoc(provider.getLocus().getContig(),
|
||||
|
|
|
|||
|
|
@ -0,0 +1,74 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Presents data sharded by locus to the traversal engine.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class LocusShardDataProvider extends ShardDataProvider {
|
||||
/**
|
||||
* Information about the source of the read data.
|
||||
*/
|
||||
private final Reads sourceInfo;
|
||||
|
||||
/**
|
||||
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
|
||||
*/
|
||||
private final GenomeLoc locus;
|
||||
|
||||
/**
|
||||
* The raw collection of reads.
|
||||
*/
|
||||
private final LocusIterator locusIterator;
|
||||
|
||||
/**
|
||||
* Create a data provider for the shard given the reads and reference.
|
||||
* @param shard The chunk of data over which traversals happen.
|
||||
* @param reference A getter for a section of the reference.
|
||||
*/
|
||||
public LocusShardDataProvider(Shard shard, Reads sourceInfo, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||
super(shard,reference,rods);
|
||||
this.sourceInfo = sourceInfo;
|
||||
this.locus = locus;
|
||||
this.locusIterator = locusIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns information about the source of the reads.
|
||||
* @return Info about the source of the reads.
|
||||
*/
|
||||
public Reads getSourceInfo() {
|
||||
return sourceInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the locus associated with this shard data provider.
|
||||
* @return The locus.
|
||||
*/
|
||||
public GenomeLoc getLocus() {
|
||||
return locus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an iterator over all the reads bound by this shard.
|
||||
* @return An iterator over all reads in this shard.
|
||||
*/
|
||||
public LocusIterator getLocusIterator() {
|
||||
return locusIterator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
super.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -53,13 +53,12 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
*/
|
||||
private AlignmentContext nextLocus = null;
|
||||
|
||||
public LocusView(ShardDataProvider provider) {
|
||||
public LocusView(LocusShardDataProvider provider) {
|
||||
this.locus = provider.getLocus();
|
||||
|
||||
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc());
|
||||
this.sourceInfo = provider.getReadIterator().getSourceInfo();
|
||||
this.sourceInfo = provider.getSourceInfo();
|
||||
this.loci = provider.getLocusIterator();
|
||||
|
||||
this.loci = new LocusIteratorByState(reads, sourceInfo);
|
||||
seedNextLocus();
|
||||
|
||||
provider.register(this);
|
||||
|
|
@ -150,6 +149,7 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
nextLocus = loci.next();
|
||||
|
||||
// If the location of this shard is available, trim the data stream to match the shard.
|
||||
// TODO: Much of this functionality is being replaced by the WindowMaker.
|
||||
if(locus != null) {
|
||||
// Iterate through any elements not contained within this shard.
|
||||
while( nextLocus != null && !isContainedInShard(nextLocus.getLocation()) && loci.hasNext() )
|
||||
|
|
@ -169,44 +169,4 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
private boolean isContainedInShard(GenomeLoc location) {
|
||||
return locus.containsP(location);
|
||||
}
|
||||
|
||||
/**
|
||||
* Class to filter out un-handle-able reads from the stream. We currently are skipping
|
||||
* unmapped reads, non-primary reads, unaligned reads, and duplicate reads.
|
||||
*/
|
||||
private static class LocusStreamFilterFunc implements SamRecordFilter {
|
||||
SAMRecord lastRead = null;
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
boolean result = false;
|
||||
String why = "";
|
||||
if (rec.getReadUnmappedFlag()) {
|
||||
TraversalStatistics.nUnmappedReads++;
|
||||
result = true;
|
||||
why = "Unmapped";
|
||||
} else if (rec.getNotPrimaryAlignmentFlag()) {
|
||||
TraversalStatistics.nNotPrimary++;
|
||||
result = true;
|
||||
why = "Not Primary";
|
||||
} else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
|
||||
TraversalStatistics.nBadAlignments++;
|
||||
result = true;
|
||||
why = "No alignment start";
|
||||
} else if (rec.getDuplicateReadFlag()) {
|
||||
TraversalStatistics.nDuplicates++;
|
||||
result = true;
|
||||
why = "Duplicate reads";
|
||||
}
|
||||
else {
|
||||
result = false;
|
||||
}
|
||||
|
||||
if (result) {
|
||||
TraversalStatistics.nSkippedReads++;
|
||||
//System.out.printf(" [filter] %s => %b %s", rec.getReadName(), result, why);
|
||||
} else {
|
||||
TraversalStatistics.nReads++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,57 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Present data sharded by read to a traversal engine.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class ReadShardDataProvider extends ShardDataProvider {
|
||||
/**
|
||||
* The raw collection of reads.
|
||||
*/
|
||||
private final StingSAMIterator reads;
|
||||
|
||||
/**
|
||||
* Create a data provider for the shard given the reads and reference.
|
||||
* @param shard The chunk of data over which traversals happen.
|
||||
* @param reference A getter for a section of the reference.
|
||||
*/
|
||||
public ReadShardDataProvider(Shard shard, StingSAMIterator reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||
super(shard,reference,rods);
|
||||
this.reads = reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can this data source provide reads?
|
||||
* @return True if reads are available, false otherwise.
|
||||
*/
|
||||
public boolean hasReads() {
|
||||
return reads != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an iterator over all the reads bound by this shard.
|
||||
* @return An iterator over all reads in this shard.
|
||||
*/
|
||||
public StingSAMIterator getReadIterator() {
|
||||
return reads;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
super.close();
|
||||
|
||||
if(reads != null)
|
||||
reads.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -32,7 +32,7 @@ public class ReadView implements View, Iterable<SAMRecord> {
|
|||
* Create a new view of the reads given the current data set.
|
||||
* @param provider Source for the data.
|
||||
*/
|
||||
public ReadView( ShardDataProvider provider ) {
|
||||
public ReadView( ReadShardDataProvider provider ) {
|
||||
reads = provider.getReadIterator();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
*
|
||||
* @param provider
|
||||
*/
|
||||
public RodLocusView( ShardDataProvider provider ) {
|
||||
public RodLocusView( LocusShardDataProvider provider ) {
|
||||
super(provider);
|
||||
|
||||
GenomeLoc loc = provider.getLocus();
|
||||
|
|
|
|||
|
|
@ -1,11 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
|
@ -27,7 +25,7 @@ import java.util.Collection;
|
|||
* An umbrella class that examines the data passed to the microscheduler and
|
||||
* tries to assemble as much as possible with it.
|
||||
*/
|
||||
public class ShardDataProvider {
|
||||
public abstract class ShardDataProvider {
|
||||
/**
|
||||
* An ArrayList of all the views that are examining this data.
|
||||
*/
|
||||
|
|
@ -38,16 +36,6 @@ public class ShardDataProvider {
|
|||
*/
|
||||
private final Shard shard;
|
||||
|
||||
/**
|
||||
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
|
||||
*/
|
||||
private final GenomeLoc locus;
|
||||
|
||||
/**
|
||||
* The raw collection of reads.
|
||||
*/
|
||||
private final StingSAMIterator reads;
|
||||
|
||||
/**
|
||||
* Provider of reference data for this particular shard.
|
||||
*/
|
||||
|
|
@ -66,22 +54,6 @@ public class ShardDataProvider {
|
|||
return shard;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the locus associated with this shard data provider.
|
||||
* @return The locus.
|
||||
*/
|
||||
public GenomeLoc getLocus() {
|
||||
return locus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can this data source provide reads?
|
||||
* @return True if reads are available, false otherwise.
|
||||
*/
|
||||
public boolean hasReads() {
|
||||
return reads != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can this data source provide reference information?
|
||||
* @return True if possible, false otherwise.
|
||||
|
|
@ -90,13 +62,6 @@ public class ShardDataProvider {
|
|||
return reference != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an iterator over all the reads bound by this shard.
|
||||
* @return An iterator over all reads in this shard.
|
||||
*/
|
||||
public StingSAMIterator getReadIterator() {
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a pointer into the given indexed fasta sequence file.
|
||||
|
|
@ -118,13 +83,10 @@ public class ShardDataProvider {
|
|||
/**
|
||||
* Create a data provider for the shard given the reads and reference.
|
||||
* @param shard The chunk of data over which traversals happen.
|
||||
* @param reads A window into the reads for a given region.
|
||||
* @param reference A getter for a section of the reference.
|
||||
*/
|
||||
public ShardDataProvider(Shard shard,GenomeLoc locus,StingSAMIterator reads,IndexedFastaSequenceFile reference,Collection<ReferenceOrderedDataSource> rods) {
|
||||
public ShardDataProvider(Shard shard,IndexedFastaSequenceFile reference,Collection<ReferenceOrderedDataSource> rods) {
|
||||
this.shard = shard;
|
||||
this.locus = locus;
|
||||
this.reads = reads;
|
||||
this.reference = reference;
|
||||
this.referenceOrderedData = rods;
|
||||
}
|
||||
|
|
@ -132,10 +94,9 @@ public class ShardDataProvider {
|
|||
/**
|
||||
* Skeletal, package protected constructor for unit tests which require a ShardDataProvider.
|
||||
* @param shard the shard
|
||||
* @param reads reads iterator.
|
||||
*/
|
||||
ShardDataProvider(Shard shard,GenomeLoc locus,StingSAMIterator reads) {
|
||||
this(shard,locus,reads,null,null);
|
||||
ShardDataProvider(Shard shard) {
|
||||
this(shard,null,null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -177,9 +138,6 @@ public class ShardDataProvider {
|
|||
// Explicitly purge registered views to ensure that we don't end up with circular references
|
||||
// to views, which can in turn hold state.
|
||||
registeredViews.clear();
|
||||
|
||||
if(reads != null)
|
||||
reads.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -411,7 +411,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
*/
|
||||
public SAMReaders(Reads sourceInfo) {
|
||||
for(File readsFile: sourceInfo.getReadsFiles()) {
|
||||
SAMFileReader2 reader = new SAMFileReader2(readsFile,true);
|
||||
SAMFileReader2 reader = new SAMFileReader2(readsFile,false);
|
||||
reader.setValidationStringency(sourceInfo.getValidationStringency());
|
||||
|
||||
// If no read group is present, hallucinate one.
|
||||
|
|
|
|||
|
|
@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.executive;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
|
@ -170,7 +172,10 @@ public abstract class Accumulator {
|
|||
* Create a holder for interval results if none exists. Add the result to the holder.
|
||||
*/
|
||||
public void accumulate( ShardDataProvider provider, Object result ) {
|
||||
GenomeLoc location = provider.getLocus();
|
||||
if(!(provider instanceof LocusShardDataProvider))
|
||||
throw new StingException("Unable to reduce by interval on reads traversals at this time.");
|
||||
|
||||
GenomeLoc location = ((LocusShardDataProvider)provider).getLocus();
|
||||
|
||||
// Pull the interval iterator ahead to the interval overlapping this shard fragment.
|
||||
while((currentInterval == null || currentInterval.isBefore(location)) && intervalIterator.hasNext())
|
||||
|
|
|
|||
|
|
@ -6,11 +6,9 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
|||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.io.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
|
||||
|
||||
|
|
@ -277,7 +275,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
* @param walker Walker to apply to the dataset.
|
||||
* @param reduceTree Tree of reduces to which to add this shard traverse.
|
||||
*/
|
||||
protected Future queueNextShardTraverse( Walker walker, ReduceTree reduceTree ) {
|
||||
protected void queueNextShardTraverse( Walker walker, ReduceTree reduceTree ) {
|
||||
if (traverseTasks.size() == 0)
|
||||
throw new IllegalStateException("Cannot traverse; no pending traversals exist.");
|
||||
|
||||
|
|
@ -286,7 +284,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
ShardTraverser traverser = new ShardTraverser(this,
|
||||
traversalEngine,
|
||||
walker,
|
||||
new ShardDataProvider(shard,shard.getGenomeLocs().get(0),getReadIterator(shard),reference,rods),
|
||||
shard,
|
||||
outputTracker);
|
||||
|
||||
Future traverseResult = threadPool.submit(traverser);
|
||||
|
|
@ -298,8 +296,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
// No more data? Let the reduce tree know so it can finish processing what it's got.
|
||||
if (!isShardTraversePending())
|
||||
reduceTree.complete();
|
||||
|
||||
return traverseResult;
|
||||
}
|
||||
|
||||
/** Pulls the next reduce from the queue and runs it. */
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.executive;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
|
|
@ -9,10 +11,15 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
|||
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
import net.sf.picard.filter.FilteringIterator;
|
||||
|
||||
/** A micro-scheduling manager for single-threaded execution of a traversal. */
|
||||
public class LinearMicroScheduler extends MicroScheduler {
|
||||
|
||||
|
|
@ -49,12 +56,10 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
|
||||
for (Shard shard : shardStrategy) {
|
||||
// New experimental code for managing locus intervals.
|
||||
// TODO: we'll need a similar but slightly different strategy for dealing with read intervals, so generalize this code.
|
||||
if((shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) &&
|
||||
shard.getGenomeLocs().size() > 0) {
|
||||
if(shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) {
|
||||
WindowMaker windowMaker = new WindowMaker(getReadIterator(shard),shard.getGenomeLocs());
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,iterator.getLocus(),iterator,reference,rods);
|
||||
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,reference,rods);
|
||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||
accumulator.accumulate(dataProvider,result);
|
||||
dataProvider.close();
|
||||
|
|
@ -62,7 +67,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
windowMaker.close();
|
||||
}
|
||||
else {
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,getReadIterator(shard),reference,rods);
|
||||
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,getReadIterator(shard),reference,rods);
|
||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||
accumulator.accumulate(dataProvider,result);
|
||||
dataProvider.close();
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.executive;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.concurrent.Callable;
|
||||
|
|
@ -26,8 +28,8 @@ import java.util.concurrent.Callable;
|
|||
public class ShardTraverser implements Callable {
|
||||
private HierarchicalMicroScheduler microScheduler;
|
||||
private Walker walker;
|
||||
private Shard shard;
|
||||
private TraversalEngine traversalEngine;
|
||||
private ShardDataProvider dataProvider;
|
||||
private ThreadLocalOutputTracker outputTracker;
|
||||
private OutputMergeTask outputMergeTask;
|
||||
|
||||
|
|
@ -39,12 +41,12 @@ public class ShardTraverser implements Callable {
|
|||
public ShardTraverser( HierarchicalMicroScheduler microScheduler,
|
||||
TraversalEngine traversalEngine,
|
||||
Walker walker,
|
||||
ShardDataProvider dataProvider,
|
||||
Shard shard,
|
||||
ThreadLocalOutputTracker outputTracker ) {
|
||||
this.microScheduler = microScheduler;
|
||||
this.walker = walker;
|
||||
this.traversalEngine = traversalEngine;
|
||||
this.dataProvider = dataProvider;
|
||||
this.shard = shard;
|
||||
this.outputTracker = outputTracker;
|
||||
}
|
||||
|
||||
|
|
@ -52,11 +54,16 @@ public class ShardTraverser implements Callable {
|
|||
long startTime = System.currentTimeMillis();
|
||||
|
||||
Object accumulator = walker.reduceInit();
|
||||
WindowMaker windowMaker = new WindowMaker(microScheduler.getReadIterator(shard),shard.getGenomeLocs());
|
||||
try {
|
||||
accumulator = traversalEngine.traverse( walker, dataProvider, accumulator );
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
|
||||
accumulator = traversalEngine.traverse( walker, dataProvider, accumulator );
|
||||
dataProvider.close();
|
||||
}
|
||||
}
|
||||
finally {
|
||||
dataProvider.close();
|
||||
windowMaker.close();
|
||||
outputMergeTask = outputTracker.closeStorage();
|
||||
|
||||
synchronized(this) {
|
||||
|
|
|
|||
|
|
@ -2,12 +2,19 @@ package org.broadinstitute.sting.gatk.executive;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.picard.filter.FilteringIterator;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
||||
/**
|
||||
* Buffer shards of data which may or may not contain multiple loci into
|
||||
|
|
@ -23,10 +30,20 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
*/
|
||||
private final Reads sourceInfo;
|
||||
|
||||
/**
|
||||
* Hold the read iterator so that it can be closed later.
|
||||
*/
|
||||
private final StingSAMIterator readIterator;
|
||||
|
||||
/**
|
||||
* The locus overflow tracker.
|
||||
*/
|
||||
private final LocusOverflowTracker locusOverflowTracker;
|
||||
|
||||
/**
|
||||
* The data source for reads. Will probably come directly from the BAM file.
|
||||
*/
|
||||
private final PeekableIterator<SAMRecord> sourceIterator;
|
||||
private final PeekableIterator<AlignmentContext> sourceIterator;
|
||||
|
||||
/**
|
||||
* Stores the sequence of intervals that the windowmaker should be tracking.
|
||||
|
|
@ -34,9 +51,9 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
private final PeekableIterator<GenomeLoc> intervalIterator;
|
||||
|
||||
/**
|
||||
* Which reads should be saved to go into the next interval?
|
||||
* In the case of monolithic sharding, this case returns whether the only shard has been generated.
|
||||
*/
|
||||
private Queue<SAMRecord> overlappingReads = new ArrayDeque<SAMRecord>();
|
||||
private boolean shardGenerated = false;
|
||||
|
||||
/**
|
||||
* Create a new window maker with the given iterator as a data source, covering
|
||||
|
|
@ -46,8 +63,13 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
*/
|
||||
public WindowMaker(StingSAMIterator iterator, List<GenomeLoc> intervals) {
|
||||
this.sourceInfo = iterator.getSourceInfo();
|
||||
this.sourceIterator = new PeekableIterator<SAMRecord>(iterator);
|
||||
this.intervalIterator = new PeekableIterator<GenomeLoc>(intervals.iterator());
|
||||
this.readIterator = iterator;
|
||||
|
||||
LocusIterator locusIterator = new LocusIteratorByState(new FilteringIterator(iterator,new LocusStreamFilterFunc()),sourceInfo);
|
||||
this.locusOverflowTracker = locusIterator.getLocusOverflowTracker();
|
||||
|
||||
this.sourceIterator = new PeekableIterator<AlignmentContext>(locusIterator);
|
||||
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
|
||||
}
|
||||
|
||||
public Iterator<WindowMakerIterator> iterator() {
|
||||
|
|
@ -55,11 +77,12 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return intervalIterator.hasNext();
|
||||
return (intervalIterator != null && intervalIterator.hasNext()) || !shardGenerated;
|
||||
}
|
||||
|
||||
public WindowMakerIterator next() {
|
||||
return new WindowMakerIterator(intervalIterator.next());
|
||||
shardGenerated = true;
|
||||
return new WindowMakerIterator(intervalIterator != null ? intervalIterator.next() : null);
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
|
|
@ -67,22 +90,18 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
}
|
||||
|
||||
public void close() {
|
||||
this.sourceIterator.close();
|
||||
this.readIterator.close();
|
||||
}
|
||||
|
||||
public class WindowMakerIterator implements StingSAMIterator {
|
||||
public class WindowMakerIterator extends LocusIterator {
|
||||
/**
|
||||
* The locus for which this iterator is currently returning reads.
|
||||
*/
|
||||
private final GenomeLoc locus;
|
||||
|
||||
/**
|
||||
* Which reads should be saved to go into the next interval?
|
||||
*/
|
||||
private final Queue<SAMRecord> pendingOverlaps = new ArrayDeque<SAMRecord>();
|
||||
|
||||
public WindowMakerIterator(GenomeLoc locus) {
|
||||
this.locus = locus;
|
||||
seedNextLocus();
|
||||
}
|
||||
|
||||
public Reads getSourceInfo() {
|
||||
|
|
@ -98,32 +117,68 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
if(overlappingReads.size() > 0) return true;
|
||||
if(sourceIterator.hasNext()) {
|
||||
SAMRecord nextRead = sourceIterator.peek();
|
||||
if((nextRead.getAlignmentStart() >= locus.getStart() && nextRead.getAlignmentStart() <= locus.getStop()) ||
|
||||
(nextRead.getAlignmentEnd() >= locus.getStart() && nextRead.getAlignmentEnd() <= locus.getStop()) ||
|
||||
(nextRead.getAlignmentStart() < locus.getStart() && nextRead.getAlignmentEnd() > locus.getStop()))
|
||||
return true;
|
||||
|
||||
}
|
||||
return false;
|
||||
// locus == null when doing monolithic sharding.
|
||||
// TODO: Move the monolithic sharding iterator so that we don't have to special case here.
|
||||
return sourceIterator.hasNext() && (locus == null || sourceIterator.peek().getLocation().overlapsP(locus));
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
public AlignmentContext next() {
|
||||
if(!hasNext()) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval.");
|
||||
SAMRecord nextRead = overlappingReads.size() > 0 ? overlappingReads.remove() : sourceIterator.next();
|
||||
if(intervalIterator.hasNext() && nextRead.getAlignmentEnd() >= intervalIterator.peek().getStart())
|
||||
pendingOverlaps.add(nextRead);
|
||||
return nextRead;
|
||||
return sourceIterator.next();
|
||||
}
|
||||
|
||||
public void close() {
|
||||
overlappingReads = pendingOverlaps;
|
||||
public LocusOverflowTracker getLocusOverflowTracker() {
|
||||
return locusOverflowTracker;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Unable to remove from a window maker iterator.");
|
||||
}
|
||||
public void seedNextLocus() {
|
||||
// locus == null when doing monolithic sharding.
|
||||
// TODO: Move the monolithic sharding iterator so that we don't have to special case here.
|
||||
if(locus == null) return;
|
||||
|
||||
while(sourceIterator.hasNext() && sourceIterator.peek().getLocation().isBefore(locus))
|
||||
sourceIterator.next();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Class to filter out un-handle-able reads from the stream. We currently are skipping
|
||||
* unmapped reads, non-primary reads, unaligned reads, and duplicate reads.
|
||||
*/
|
||||
private static class LocusStreamFilterFunc implements SamRecordFilter {
|
||||
SAMRecord lastRead = null;
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
boolean result = false;
|
||||
String why = "";
|
||||
if (rec.getReadUnmappedFlag()) {
|
||||
TraversalStatistics.nUnmappedReads++;
|
||||
result = true;
|
||||
why = "Unmapped";
|
||||
} else if (rec.getNotPrimaryAlignmentFlag()) {
|
||||
TraversalStatistics.nNotPrimary++;
|
||||
result = true;
|
||||
why = "Not Primary";
|
||||
} else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
|
||||
TraversalStatistics.nBadAlignments++;
|
||||
result = true;
|
||||
why = "No alignment start";
|
||||
} else if (rec.getDuplicateReadFlag()) {
|
||||
TraversalStatistics.nDuplicates++;
|
||||
result = true;
|
||||
why = "Duplicate reads";
|
||||
}
|
||||
else {
|
||||
result = false;
|
||||
}
|
||||
|
||||
if (result) {
|
||||
TraversalStatistics.nSkippedReads++;
|
||||
//System.out.printf(" [filter] %s => %b %s", rec.getReadName(), result, why);
|
||||
} else {
|
||||
TraversalStatistics.nReads++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,11 +2,10 @@ package org.broadinstitute.sting.gatk.traversals;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
public abstract class TraversalEngine {
|
||||
public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,ProviderType extends ShardDataProvider> {
|
||||
// Time in milliseconds since we initialized this engine
|
||||
private long startTime = -1;
|
||||
private long lastProgressPrintTime = -1; // When was the last time we printed our progress?
|
||||
|
|
@ -76,18 +75,16 @@ public abstract class TraversalEngine {
|
|||
* A passthrough method so that subclasses can report which types of traversals they're using.
|
||||
*
|
||||
* @param sum Result of the computation.
|
||||
* @param <T> Type of the computation.
|
||||
*/
|
||||
public abstract <T> void printOnTraversalDone(T sum);
|
||||
public abstract void printOnTraversalDone(T sum);
|
||||
|
||||
/**
|
||||
* Called after a traversal to print out information about the traversal process
|
||||
*
|
||||
* @param type describing this type of traversal
|
||||
* @param sum The reduce result of the traversal
|
||||
* @param <T> ReduceType of the traversal
|
||||
*/
|
||||
protected <T> void printOnTraversalDone(final String type, T sum) {
|
||||
protected void printOnTraversalDone(final String type, T sum) {
|
||||
printProgress(true, type, null);
|
||||
logger.info("Traversal reduce result is " + sum);
|
||||
final long curTime = System.currentTimeMillis();
|
||||
|
|
@ -115,12 +112,10 @@ public abstract class TraversalEngine {
|
|||
* @param walker the walker to run with
|
||||
* @param dataProvider the data provider that generates data given the shard
|
||||
* @param sum the accumulator
|
||||
* @param <M> an object of the map type
|
||||
* @param <T> an object of the reduce type
|
||||
*
|
||||
* @return an object of the reduce type
|
||||
*/
|
||||
public abstract <M, T> T traverse(Walker<M, T> walker,
|
||||
ShardDataProvider dataProvider,
|
||||
T sum);
|
||||
public abstract T traverse(WalkerType walker,
|
||||
ProviderType dataProvider,
|
||||
T sum);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,17 +31,11 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ManagingReferenceOrderedView;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -54,7 +48,7 @@ import java.util.*;
|
|||
* <p/>
|
||||
* This class handles traversing lists of duplicate reads in the new shardable style
|
||||
*/
|
||||
public class TraverseDuplicates extends TraversalEngine {
|
||||
public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker<M,T>,ReadShardDataProvider> {
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(TraverseDuplicates.class);
|
||||
|
||||
|
|
@ -196,19 +190,12 @@ public class TraverseDuplicates extends TraversalEngine {
|
|||
*
|
||||
* @param walker the walker to execute over
|
||||
* @param sum of type T, the return from the walker
|
||||
* @param <M> the generic type
|
||||
* @param <T> the return type of the reduce function
|
||||
*
|
||||
* @return the result type T, the product of all the reduce calls
|
||||
*/
|
||||
public <M, T> T traverse(Walker<M, T> walker,
|
||||
ShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
// safety first :-)
|
||||
if (!(walker instanceof DuplicateWalker))
|
||||
throw new IllegalArgumentException("Walker isn't a duplicate walker!");
|
||||
DuplicateWalker<M, T> dupWalker = (DuplicateWalker<M, T>) walker;
|
||||
|
||||
public T traverse(DuplicateWalker<M, T> walker,
|
||||
ReadShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
FilteringIterator filterIter = new FilteringIterator(new ReadView(dataProvider).iterator(), new duplicateStreamFilterFunc());
|
||||
PushbackIterator<SAMRecord> iter = new PushbackIterator<SAMRecord>(filterIter);
|
||||
|
||||
|
|
@ -233,10 +220,10 @@ public class TraverseDuplicates extends TraversalEngine {
|
|||
TraversalStatistics.nRecords++;
|
||||
|
||||
// actually call filter and map, accumulating sum
|
||||
final boolean keepMeP = dupWalker.filter(site, locus, readSets);
|
||||
final boolean keepMeP = walker.filter(site, locus, readSets);
|
||||
if (keepMeP) {
|
||||
M x = dupWalker.map(site, locus, readSets);
|
||||
sum = dupWalker.reduce(x, sum);
|
||||
M x = walker.map(site, locus, readSets);
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
printProgress(DUPS_STRING, site);
|
||||
|
|
@ -254,9 +241,8 @@ public class TraverseDuplicates extends TraversalEngine {
|
|||
* Temporary override of printOnTraversalDone.
|
||||
*
|
||||
* @param sum Result of the computation.
|
||||
* @param <T> Type of the result.
|
||||
*/
|
||||
public <T> void printOnTraversalDone(T sum) {
|
||||
public void printOnTraversalDone(T sum) {
|
||||
printOnTraversalDone(DUPS_STRING, sum);
|
||||
}
|
||||
}
|
||||
|
|
@ -2,26 +2,21 @@ package org.broadinstitute.sting.gatk.traversals;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* A simple solution to iterating over all reference positions over a series of genomic locations.
|
||||
*/
|
||||
public class TraverseLoci extends TraversalEngine {
|
||||
public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,LocusShardDataProvider> {
|
||||
final private static String LOCI_STRING = "sites";
|
||||
|
||||
/**
|
||||
|
|
@ -29,24 +24,12 @@ public class TraverseLoci extends TraversalEngine {
|
|||
*/
|
||||
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
public <M,T> T traverse(Walker<M,T> walker, ArrayList<GenomeLoc> locations) {
|
||||
if ( locations.isEmpty() )
|
||||
Utils.scareUser("Requested all locations be processed without providing locations to be processed!");
|
||||
|
||||
throw new UnsupportedOperationException("This traversal type not supported by TraverseLoci");
|
||||
}
|
||||
|
||||
@Override
|
||||
public <M,T> T traverse( Walker<M,T> walker,
|
||||
ShardDataProvider dataProvider,
|
||||
T sum ) {
|
||||
public T traverse( LocusWalker<M,T> walker,
|
||||
LocusShardDataProvider dataProvider,
|
||||
T sum ) {
|
||||
logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
|
||||
|
||||
if ( !(walker instanceof LocusWalker) )
|
||||
throw new IllegalArgumentException("Walker isn't a loci walker!");
|
||||
|
||||
LocusWalker<M, T> locusWalker = (LocusWalker<M, T>)walker;
|
||||
|
||||
LocusView locusView = getLocusView( walker, dataProvider );
|
||||
|
||||
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||
|
|
@ -87,10 +70,10 @@ public class TraverseLoci extends TraversalEngine {
|
|||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
final boolean keepMeP = locusWalker.filter(tracker, refContext, locus);
|
||||
final boolean keepMeP = walker.filter(tracker, refContext, locus);
|
||||
if (keepMeP) {
|
||||
M x = locusWalker.map(tracker, refContext, locus);
|
||||
sum = locusWalker.reduce(x, sum);
|
||||
M x = walker.map(tracker, refContext, locus);
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
if (this.maximumIterations > 0 && TraversalStatistics.nRecords > this.maximumIterations) {
|
||||
|
|
@ -110,8 +93,8 @@ public class TraverseLoci extends TraversalEngine {
|
|||
if ( nSkipped > 0 ) {
|
||||
GenomeLoc site = rodLocusView.getLocOneBeyondShard();
|
||||
AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileup(site), nSkipped);
|
||||
M x = locusWalker.map(null, null, ac);
|
||||
sum = locusWalker.reduce(x, sum);
|
||||
M x = walker.map(null, null, ac);
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -122,9 +105,8 @@ public class TraverseLoci extends TraversalEngine {
|
|||
* Temporary override of printOnTraversalDone.
|
||||
*
|
||||
* @param sum Result of the computation.
|
||||
* @param <T> Type of the result.
|
||||
*/
|
||||
public <T> void printOnTraversalDone( T sum ) {
|
||||
public void printOnTraversalDone( T sum ) {
|
||||
printOnTraversalDone(LOCI_STRING, sum );
|
||||
}
|
||||
|
||||
|
|
@ -132,8 +114,9 @@ public class TraverseLoci extends TraversalEngine {
|
|||
* Gets the best view of loci for this walker given the available data.
|
||||
* @param walker walker to interrogate.
|
||||
* @param dataProvider Data which which to drive the locus view.
|
||||
* @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
|
||||
*/
|
||||
private LocusView getLocusView( Walker walker, ShardDataProvider dataProvider ) {
|
||||
private LocusView getLocusView( Walker<M,T> walker, LocusShardDataProvider dataProvider ) {
|
||||
DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
||||
if( dataSource == DataSource.READS )
|
||||
return new CoveredLocusView(dataProvider);
|
||||
|
|
|
|||
|
|
@ -1,20 +1,17 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -23,26 +20,20 @@ import java.util.List;
|
|||
* Time: 10:26:03 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class TraverseLocusWindows extends TraversalEngine {
|
||||
public class TraverseLocusWindows<M,T> extends TraversalEngine<M,T,LocusWindowWalker<M,T>,LocusShardDataProvider> {
|
||||
/** descriptor of the type */
|
||||
private static final String LOCUS_WINDOW_STRING = "intervals";
|
||||
|
||||
public <M,T> T traverse( Walker<M,T> walker,
|
||||
ShardDataProvider dataProvider,
|
||||
T sum ) {
|
||||
|
||||
if ( !(walker instanceof LocusWindowWalker) )
|
||||
throw new IllegalArgumentException("Walker isn't a locus window walker!");
|
||||
|
||||
LocusWindowWalker<M, T> locusWindowWalker = (LocusWindowWalker<M, T>)walker;
|
||||
public T traverse( LocusWindowWalker<M,T> walker,
|
||||
LocusShardDataProvider dataProvider,
|
||||
T sum ) {
|
||||
|
||||
GenomeLoc interval = dataProvider.getLocus();
|
||||
|
||||
ReadView readView = new ReadView( dataProvider );
|
||||
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
|
||||
|
||||
Pair<GenomeLoc, List<SAMRecord>> locus = getLocusContext(readView.iterator(), interval);
|
||||
Pair<GenomeLoc, List<SAMRecord>> locus = getLocusContext(dataProvider.getLocusIterator(), interval);
|
||||
|
||||
// The TraverseByLocusWindow expands intervals to cover all reads in a non-standard way.
|
||||
// TODO: Convert this approach to the standard.
|
||||
|
|
@ -58,8 +49,8 @@ public class TraverseLocusWindows extends TraversalEngine {
|
|||
//
|
||||
//final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus);
|
||||
//if (keepMeP) {
|
||||
M x = locusWindowWalker.map(tracker, referenceSubsequence, locus.getFirst(), locus.getSecond());
|
||||
sum = locusWindowWalker.reduce(x, sum);
|
||||
M x = walker.map(tracker, referenceSubsequence, locus.getFirst(), locus.getSecond());
|
||||
sum = walker.reduce(x, sum);
|
||||
//}
|
||||
|
||||
printProgress(LOCUS_WINDOW_STRING, locus.getFirst());
|
||||
|
|
@ -67,26 +58,34 @@ public class TraverseLocusWindows extends TraversalEngine {
|
|||
return sum;
|
||||
}
|
||||
|
||||
private Pair<GenomeLoc, List<SAMRecord>> getLocusContext(StingSAMIterator readIter, GenomeLoc interval) {
|
||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
private Pair<GenomeLoc, List<SAMRecord>> getLocusContext(LocusIterator locusIter, GenomeLoc interval) {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
boolean done = false;
|
||||
long leftmostIndex = interval.getStart(),
|
||||
rightmostIndex = interval.getStop();
|
||||
while (readIter.hasNext() && !done) {
|
||||
TraversalStatistics.nRecords++;
|
||||
|
||||
SAMRecord read = readIter.next();
|
||||
reads.add(read);
|
||||
if ( read.getAlignmentStart() < leftmostIndex )
|
||||
leftmostIndex = read.getAlignmentStart();
|
||||
if ( read.getAlignmentEnd() > rightmostIndex )
|
||||
rightmostIndex = read.getAlignmentEnd();
|
||||
if ( this.maximumIterations > 0 && TraversalStatistics.nRecords > this.maximumIterations) {
|
||||
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
|
||||
done = true;
|
||||
while(locusIter.hasNext() && !done) {
|
||||
AlignmentContext alignment = locusIter.next();
|
||||
Iterator<SAMRecord> readIter = alignment.getReads().iterator();
|
||||
|
||||
while (readIter.hasNext() && !done) {
|
||||
TraversalStatistics.nRecords++;
|
||||
|
||||
SAMRecord read = readIter.next();
|
||||
if(reads.contains(read)) continue;
|
||||
reads.add(read);
|
||||
if ( read.getAlignmentStart() < leftmostIndex )
|
||||
leftmostIndex = read.getAlignmentStart();
|
||||
if ( read.getAlignmentEnd() > rightmostIndex )
|
||||
rightmostIndex = read.getAlignmentEnd();
|
||||
if ( this.maximumIterations > 0 && TraversalStatistics.nRecords > this.maximumIterations) {
|
||||
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
|
||||
// AlignmentContext locus = new AlignmentContext(window, reads, null);
|
||||
// if ( readIter.getSourceInfo().getDownsampleToCoverage() != null )
|
||||
|
|
@ -99,9 +98,8 @@ public class TraverseLocusWindows extends TraversalEngine {
|
|||
* Temporary override of printOnTraversalDone.
|
||||
* TODO: Add some sort of TE.getName() function once all TraversalEngines are ported.
|
||||
* @param sum Result of the computation.
|
||||
* @param <T> Type of the result.
|
||||
*/
|
||||
public <T> void printOnTraversalDone( T sum ) {
|
||||
public void printOnTraversalDone( T sum ) {
|
||||
printOnTraversalDone(LOCUS_WINDOW_STRING, sum );
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,14 +3,10 @@ package org.broadinstitute.sting.gatk.traversals;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
/*
|
||||
|
|
@ -47,7 +43,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
* <p/>
|
||||
* This class handles traversing by reads in the new shardable style
|
||||
*/
|
||||
public class TraverseReads extends TraversalEngine {
|
||||
public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(TraverseReads.class);
|
||||
|
||||
|
|
@ -60,23 +56,17 @@ public class TraverseReads extends TraversalEngine {
|
|||
* @param walker the walker to traverse with
|
||||
* @param dataProvider the provider of the reads data
|
||||
* @param sum the value of type T, specified by the walker, to feed to the walkers reduce function
|
||||
* @param <M> the map type of the walker
|
||||
* @param <T> the reduce type of the walker
|
||||
* @return the reduce variable of the read walker
|
||||
*/
|
||||
public <M, T> T traverse(Walker<M, T> walker,
|
||||
ShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
public T traverse(ReadWalker<M,T> walker,
|
||||
ReadShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
|
||||
logger.debug(String.format("TraverseReads.traverse Covered dataset is %s", dataProvider));
|
||||
|
||||
if (!(walker instanceof ReadWalker))
|
||||
throw new IllegalArgumentException("Walker isn't a read walker!");
|
||||
|
||||
if( !dataProvider.hasReads() )
|
||||
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
|
||||
|
||||
ReadWalker<M, T> readWalker = (ReadWalker<M, T>) walker;
|
||||
boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES);
|
||||
|
||||
ReadView reads = new ReadView(dataProvider);
|
||||
|
|
@ -101,10 +91,10 @@ public class TraverseReads extends TraversalEngine {
|
|||
// if the read is mapped, create a metadata tracker
|
||||
ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null;
|
||||
|
||||
final boolean keepMeP = readWalker.filter(refSeq, read);
|
||||
final boolean keepMeP = walker.filter(refSeq, read);
|
||||
if (keepMeP) {
|
||||
M x = readWalker.map(refSeq, read, tracker); // the tracker can be null
|
||||
sum = readWalker.reduce(x, sum);
|
||||
M x = walker.map(refSeq, read, tracker); // the tracker can be null
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
printProgress(READS_STRING,
|
||||
|
|
@ -119,9 +109,8 @@ public class TraverseReads extends TraversalEngine {
|
|||
* Temporary override of printOnTraversalDone.
|
||||
* TODO: Add some sort of TE.getName() function once all TraversalEngines are ported.
|
||||
* @param sum Result of the computation.
|
||||
* @param <T> Type of the result.
|
||||
*/
|
||||
public <T> void printOnTraversalDone( T sum ) {
|
||||
public void printOnTraversalDone( T sum ) {
|
||||
printOnTraversalDone(READS_STRING, sum );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,8 +16,6 @@ import org.apache.log4j.Logger;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public abstract class Walker<MapType, ReduceType> {
|
||||
// TODO: Can a walker be templatized so that map and reduce live here?
|
||||
|
||||
protected static Logger logger = Logger.getLogger(Walker.class);
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
|||
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
@ -43,7 +42,7 @@ import net.sf.samtools.SAMFileHeader;
|
|||
*
|
||||
* this class acts as a fake reads traversal engine for testing out reads based traversals.
|
||||
*/
|
||||
public class ArtificialReadsTraversal extends TraversalEngine {
|
||||
public class ArtificialReadsTraversal<M,T> extends TraversalEngine<M,T,Walker<M,T>,ShardDataProvider> {
|
||||
|
||||
public int startingChr = 1;
|
||||
public int endingChr = 5;
|
||||
|
|
@ -77,12 +76,10 @@ public class ArtificialReadsTraversal extends TraversalEngine {
|
|||
* @param walker the walker to traverse with
|
||||
* @param dataProvider the provider of the reads data
|
||||
* @param sum the value of type T, specified by the walker, to feed to the walkers reduce function
|
||||
* @param <M> the map type of the walker
|
||||
* @param <T> the reduce type of the walker
|
||||
*
|
||||
* @return the reduce variable of the read walker
|
||||
*/
|
||||
public <M, T> T traverse( Walker<M, T> walker,
|
||||
public T traverse( Walker<M, T> walker,
|
||||
ShardDataProvider dataProvider,
|
||||
T sum ) {
|
||||
|
||||
|
|
@ -126,9 +123,8 @@ public class ArtificialReadsTraversal extends TraversalEngine {
|
|||
* TODO: Add some sort of TE.getName() function once all TraversalEngines are ported.
|
||||
*
|
||||
* @param sum Result of the computation.
|
||||
* @param <T> Type of the result.
|
||||
*/
|
||||
public <T> void printOnTraversalDone( T sum ) {
|
||||
public void printOnTraversalDone( T sum ) {
|
||||
printOnTraversalDone("reads", sum);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ import java.util.List;
|
|||
public class AllLocusViewTest extends LocusViewTemplate {
|
||||
|
||||
@Override
|
||||
protected LocusView createView(ShardDataProvider provider) {
|
||||
protected LocusView createView(LocusShardDataProvider provider) {
|
||||
return new AllLocusView(provider);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class CoveredLocusViewTest extends LocusViewTemplate {
|
|||
* Retrieve a covered locus view.
|
||||
*/
|
||||
@Override
|
||||
protected LocusView createView(ShardDataProvider provider) {
|
||||
protected LocusView createView(LocusShardDataProvider provider) {
|
||||
return new CoveredLocusView(provider);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
|||
public void testOverlappingReferenceBases() {
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length())));
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, sequenceFile, null);
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, sequenceFile, null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
char[] results = view.getReferenceBases(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length() + 9));
|
||||
|
|
@ -75,7 +75,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
|||
public void testBoundsFailure() {
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, 1, 50)));
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, sequenceFile, null);
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, sequenceFile, null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
view.getReferenceContext(GenomeLocParser.createGenomeLoc(0, 51)).getBase();
|
||||
|
|
@ -91,7 +91,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
|||
Shard shard = new LocusShard(Collections.singletonList(loc));
|
||||
GenomeLocusIterator shardIterator = new GenomeLocusIterator(loc);
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, loc, null, sequenceFile, null);
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, loc, null, sequenceFile, null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
while (shardIterator.hasNext()) {
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import net.sf.picard.reference.ReferenceSequenceFile;
|
|||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
|
|
@ -46,7 +47,9 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
|
||||
Shard shard = new LocusShard(Collections.singletonList(shardBounds));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
|
||||
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null);
|
||||
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
|
|
@ -60,7 +63,9 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
|
||||
Shard shard = new LocusShard(Collections.singletonList(shardBounds));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
|
||||
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
|
||||
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
|
|
@ -73,7 +78,9 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
|
||||
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
|
|
@ -85,7 +92,9 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
|
||||
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
|
|
@ -97,7 +106,9 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
|
||||
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
|
|
@ -109,7 +120,9 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15)));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
|
||||
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
|
|
@ -121,7 +134,9 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
|
||||
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
|
|
@ -134,7 +149,9 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2);
|
||||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
|
||||
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
|
|
@ -151,7 +168,9 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
|
||||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
|
||||
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
|
|
@ -168,7 +187,9 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
|
||||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
|
||||
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
|
|
@ -187,7 +208,9 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6);
|
||||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
|
||||
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
|
|
@ -213,7 +236,9 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
read07, read08, read09, read10, read11, read12);
|
||||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15)));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), iterator);
|
||||
WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
|
|
@ -227,7 +252,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
*
|
||||
* @return The correct view to test.
|
||||
*/
|
||||
protected abstract LocusView createView(ShardDataProvider provider);
|
||||
protected abstract LocusView createView(LocusShardDataProvider provider);
|
||||
|
||||
/**
|
||||
* Test the reads according to an independently derived context.
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ public class ReadReferenceViewTest extends ReferenceViewTemplate {
|
|||
final long contigStart = selectedContig.getSequenceLength() - (readLength - overlap - 1);
|
||||
final long contigStop = selectedContig.getSequenceLength() + overlap;
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(null,null,null,sequenceFile,null);
|
||||
ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,null,sequenceFile,null);
|
||||
ReadReferenceView view = new ReadReferenceView(dataProvider);
|
||||
|
||||
SAMRecord rec = buildSAMRecord(selectedContig.getSequenceName(),(int)contigStart,(int)contigStop);
|
||||
|
|
@ -97,7 +97,7 @@ public class ReadReferenceViewTest extends ReferenceViewTemplate {
|
|||
protected void validateLocation( GenomeLoc loc ) {
|
||||
SAMRecord read = buildSAMRecord( loc.getContig(), (int)loc.getStart(), (int)loc.getStop() );
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(null,null,null,sequenceFile,null);
|
||||
ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,null,sequenceFile,null);
|
||||
ReadReferenceView view = new ReadReferenceView(dataProvider);
|
||||
|
||||
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(loc.getContig(),loc.getStart(),loc.getStop());
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
|
|||
@Test
|
||||
public void testNoBindings() {
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
|
||||
ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Collections.<ReferenceOrderedDataSource>emptyList());
|
||||
ShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Collections.<ReferenceOrderedDataSource>emptyList());
|
||||
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
||||
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",10));
|
||||
|
|
@ -71,7 +71,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
|
|||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
|
||||
|
||||
ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource));
|
||||
ShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource));
|
||||
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
||||
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20));
|
||||
|
|
@ -97,7 +97,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
|
|||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
|
||||
|
||||
ShardDataProvider provider = new ShardDataProvider(shard, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2));
|
||||
ShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2));
|
||||
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
||||
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20));
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ public class ShardDataProviderTest extends BaseTest {
|
|||
|
||||
@Before
|
||||
public void createProvider() {
|
||||
provider = new ShardDataProvider( null,null,null,null,null );
|
||||
provider = new LocusShardDataProvider( null,null,null,null,null,null );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import net.sf.picard.reference.ReferenceSequenceFile;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
|
|
@ -132,7 +133,7 @@ public class TraverseReadsTest extends BaseTest {
|
|||
fail("Shard == null");
|
||||
}
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,dataSource.seek(shard),null,null);
|
||||
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,dataSource.seek(shard),null,null);
|
||||
accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator);
|
||||
dataProvider.close();
|
||||
|
||||
|
|
@ -178,7 +179,7 @@ public class TraverseReadsTest extends BaseTest {
|
|||
fail("Shard == null");
|
||||
}
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,dataSource.seek(shard),null,null);
|
||||
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,dataSource.seek(shard),null,null);
|
||||
accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator);
|
||||
dataProvider.close();
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue