2009-04-10 04:28:17 +08:00
|
|
|
package org.broadinstitute.sting.gatk.traversals;
|
|
|
|
|
|
|
|
|
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|
|
|
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
2009-05-16 05:02:12 +08:00
|
|
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
2009-04-10 04:28:17 +08:00
|
|
|
import org.broadinstitute.sting.gatk.LocusContext;
|
2009-05-16 05:02:12 +08:00
|
|
|
import org.broadinstitute.sting.gatk.WalkerManager;
|
2009-04-10 04:28:17 +08:00
|
|
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
2009-05-14 02:51:16 +08:00
|
|
|
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceLocusIterator;
|
2009-05-09 05:27:54 +08:00
|
|
|
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
2009-05-14 05:32:05 +08:00
|
|
|
import org.broadinstitute.sting.gatk.dataSources.providers.SeekableLocusContextQueue;
|
2009-05-16 05:02:12 +08:00
|
|
|
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextQueue;
|
|
|
|
|
import org.broadinstitute.sting.gatk.dataSources.providers.IterableLocusContextQueue;
|
2009-05-22 04:09:32 +08:00
|
|
|
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceOrderedView;
|
2009-04-10 04:28:17 +08:00
|
|
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
|
|
|
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
|
|
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
2009-05-14 02:51:16 +08:00
|
|
|
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
2009-04-10 04:28:17 +08:00
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
|
|
|
import org.broadinstitute.sting.utils.Utils;
|
|
|
|
|
import org.apache.log4j.Logger;
|
|
|
|
|
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.io.File;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A simple, short-term solution to iterating over all reference positions over a series of
|
|
|
|
|
* genomic locations. Simply overloads the superclass traverse function to go over the entire
|
|
|
|
|
* interval's reference positions.
|
|
|
|
|
*/
|
2009-05-18 09:31:57 +08:00
|
|
|
public class TraverseLoci extends TraversalEngine {
|
2009-04-10 04:28:17 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* our log, which we want to capture anything from this class
|
|
|
|
|
*/
|
|
|
|
|
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
|
|
|
|
|
2009-05-18 09:31:57 +08:00
|
|
|
public TraverseLoci(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
2009-04-10 04:28:17 +08:00
|
|
|
super( reads, ref, rods );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public <M,T> T traverse(Walker<M,T> walker, ArrayList<GenomeLoc> locations) {
|
|
|
|
|
if ( locations.isEmpty() )
|
|
|
|
|
Utils.scareUser("Requested all locations be processed without providing locations to be processed!");
|
|
|
|
|
|
2009-05-18 09:31:57 +08:00
|
|
|
throw new UnsupportedOperationException("This traversal type not supported by TraverseLoci");
|
2009-04-10 04:28:17 +08:00
|
|
|
}
|
|
|
|
|
|
2009-05-09 05:27:54 +08:00
|
|
|
@Override
|
2009-04-10 04:28:17 +08:00
|
|
|
public <M,T> T traverse( Walker<M,T> walker,
|
|
|
|
|
Shard shard,
|
2009-05-09 05:27:54 +08:00
|
|
|
ShardDataProvider dataProvider,
|
2009-04-10 04:28:17 +08:00
|
|
|
T sum ) {
|
2009-05-18 09:31:57 +08:00
|
|
|
logger.debug(String.format("TraverseLoci.traverse Genomic interval is %s", shard.getGenomeLoc()));
|
2009-04-10 04:28:17 +08:00
|
|
|
|
|
|
|
|
if ( !(walker instanceof LocusWalker) )
|
|
|
|
|
throw new IllegalArgumentException("Walker isn't a loci walker!");
|
|
|
|
|
|
|
|
|
|
LocusWalker<M, T> locusWalker = (LocusWalker<M, T>)walker;
|
2009-05-14 02:51:16 +08:00
|
|
|
|
2009-05-16 05:02:12 +08:00
|
|
|
LocusIterator locusIterator = null;
|
|
|
|
|
LocusContextQueue locusContextQueue = null;
|
2009-05-22 04:09:32 +08:00
|
|
|
ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
2009-05-16 05:02:12 +08:00
|
|
|
|
|
|
|
|
DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
|
|
|
|
switch( dataSource ) {
|
|
|
|
|
case REFERENCE:
|
|
|
|
|
locusIterator = new ReferenceLocusIterator( dataProvider );
|
|
|
|
|
locusContextQueue = new SeekableLocusContextQueue( dataProvider );
|
|
|
|
|
break;
|
|
|
|
|
case READS:
|
|
|
|
|
IterableLocusContextQueue iterableQueue = new IterableLocusContextQueue( dataProvider );
|
|
|
|
|
locusIterator = iterableQueue;
|
|
|
|
|
locusContextQueue = iterableQueue;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
|
|
|
|
}
|
2009-04-10 04:28:17 +08:00
|
|
|
|
|
|
|
|
// We keep processing while the next reference location is within the interval
|
2009-05-14 02:51:16 +08:00
|
|
|
while( locusIterator.hasNext() ) {
|
|
|
|
|
GenomeLoc site = locusIterator.next();
|
2009-04-10 04:28:17 +08:00
|
|
|
|
|
|
|
|
TraversalStatistics.nRecords++;
|
|
|
|
|
|
|
|
|
|
// Iterate forward to get all reference ordered data covering this locus
|
2009-05-22 04:09:32 +08:00
|
|
|
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(site);
|
2009-04-10 04:28:17 +08:00
|
|
|
|
2009-05-14 02:51:16 +08:00
|
|
|
LocusContext locus = locusContextQueue.seek( site ).peek();
|
2009-05-09 05:27:54 +08:00
|
|
|
char refBase = dataProvider.getReferenceBase( site );
|
2009-04-10 04:28:17 +08:00
|
|
|
|
|
|
|
|
final boolean keepMeP = locusWalker.filter(tracker, refBase, locus);
|
|
|
|
|
if (keepMeP) {
|
|
|
|
|
M x = locusWalker.map(tracker, refBase, locus);
|
|
|
|
|
sum = locusWalker.reduce(x, sum);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads) {
|
|
|
|
|
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
printProgress("loci", locus.getLocation());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
2009-05-08 22:12:45 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Temporary override of printOnTraversalDone.
|
|
|
|
|
* TODO: Add some sort of TE.getName() function once all TraversalEngines are ported.
|
|
|
|
|
* @param sum Result of the computation.
|
|
|
|
|
* @param <T> Type of the result.
|
|
|
|
|
*/
|
|
|
|
|
public <T> void printOnTraversalDone( T sum ) {
|
|
|
|
|
printOnTraversalDone( "loci", sum );
|
|
|
|
|
}
|
2009-04-25 03:40:21 +08:00
|
|
|
}
|