2009-03-27 23:40:45 +08:00
|
|
|
package org.broadinstitute.sting.gatk.executive;
|
|
|
|
|
|
2009-04-16 02:29:38 +08:00
|
|
|
import org.apache.log4j.Logger;
|
|
|
|
|
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextProvider;
|
|
|
|
|
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceProvider;
|
|
|
|
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
2009-04-10 04:28:17 +08:00
|
|
|
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
|
|
|
|
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
2009-04-15 21:52:56 +08:00
|
|
|
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
2009-04-10 04:28:17 +08:00
|
|
|
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
|
2009-04-16 02:29:38 +08:00
|
|
|
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
2009-04-10 04:28:17 +08:00
|
|
|
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
|
|
|
|
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
2009-04-16 02:29:38 +08:00
|
|
|
import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference;
|
|
|
|
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|
|
|
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
|
|
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
2009-04-10 04:28:17 +08:00
|
|
|
|
2009-04-16 02:29:38 +08:00
|
|
|
import java.io.BufferedReader;
|
2009-04-10 04:28:17 +08:00
|
|
|
import java.io.File;
|
2009-04-16 02:29:38 +08:00
|
|
|
import java.io.FileReader;
|
2009-04-10 04:28:17 +08:00
|
|
|
import java.io.IOException;
|
2009-04-16 02:29:38 +08:00
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.List;
|
2009-03-27 23:40:45 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A micro-scheduling manager for N-way threaded execution of a traversal
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
public class MicroManager {
|
2009-04-23 02:29:44 +08:00
|
|
|
private static long SHARD_SIZE = 100000L;
|
2009-04-10 04:28:17 +08:00
|
|
|
|
|
|
|
|
private File reads;
|
|
|
|
|
private FastaSequenceFile2 ref;
|
|
|
|
|
|
|
|
|
|
private TraverseLociByReference traversalEngine = null;
|
2009-03-27 23:40:45 +08:00
|
|
|
|
2009-04-10 04:28:17 +08:00
|
|
|
protected static Logger logger = Logger.getLogger(MicroManager.class);
|
|
|
|
|
|
2009-04-16 02:29:38 +08:00
|
|
|
protected List<GenomeLoc> intervalList = null;
|
|
|
|
|
|
2009-04-10 04:28:17 +08:00
|
|
|
public TraversalEngine getTraversalEngine() {
|
|
|
|
|
return traversalEngine;
|
2009-03-27 23:40:45 +08:00
|
|
|
}
|
|
|
|
|
|
2009-04-10 04:28:17 +08:00
|
|
|
public MicroManager( File reads, // the reads file
|
|
|
|
|
File refFile, // the reference file driving the traversal
|
|
|
|
|
int nThreadsToUse ) { // maximum number of threads to use to do the work
|
|
|
|
|
|
|
|
|
|
this.reads = reads;
|
|
|
|
|
ref = new FastaSequenceFile2(refFile);
|
|
|
|
|
GenomeLoc.setupRefContigOrdering(ref);
|
2009-03-27 23:40:45 +08:00
|
|
|
|
2009-04-10 04:28:17 +08:00
|
|
|
traversalEngine = new TraverseLociByReference( reads, refFile, new java.util.ArrayList() );
|
2009-03-27 23:40:45 +08:00
|
|
|
}
|
2009-04-10 04:28:17 +08:00
|
|
|
|
2009-04-16 02:29:38 +08:00
|
|
|
public void setIntervalList(List<GenomeLoc> intervalList) {
|
|
|
|
|
this.intervalList = intervalList;
|
|
|
|
|
}
|
2009-04-10 04:28:17 +08:00
|
|
|
|
|
|
|
|
public void execute( Walker walker, // the analysis technique to use.
|
|
|
|
|
List<GenomeLoc> locations ) { // list of work to do
|
2009-04-11 06:09:01 +08:00
|
|
|
ShardStrategy shardStrategy = null;
|
|
|
|
|
if( locations != null )
|
|
|
|
|
shardStrategy = ShardStrategyFactory.shatter( ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
|
|
|
|
|
ref.getSequenceDictionary(),
|
|
|
|
|
SHARD_SIZE,
|
|
|
|
|
locations );
|
|
|
|
|
else
|
|
|
|
|
shardStrategy = ShardStrategyFactory.shatter( ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
|
|
|
|
|
ref.getSequenceDictionary(),
|
|
|
|
|
SHARD_SIZE );
|
|
|
|
|
|
2009-04-11 04:50:28 +08:00
|
|
|
ReferenceIterator refIter = new ReferenceIterator(ref);
|
2009-04-15 21:52:56 +08:00
|
|
|
SAMDataSource dataSource = null;
|
2009-04-11 06:09:01 +08:00
|
|
|
|
|
|
|
|
try {
|
2009-04-16 02:29:38 +08:00
|
|
|
// todo: remove this code when we acutally handle command line args of multiple bam files
|
|
|
|
|
ArrayList<File> fl = new ArrayList<File>();
|
|
|
|
|
if (reads.getName().endsWith(".list")) {
|
|
|
|
|
BufferedReader bis = new BufferedReader(new FileReader(reads));
|
|
|
|
|
String line = null;
|
|
|
|
|
while ((line = bis.readLine()) != null) {
|
|
|
|
|
if (!line.equals("")){
|
|
|
|
|
fl.add(new File(line));
|
|
|
|
|
}
|
|
|
|
|
}
|
2009-04-22 08:28:08 +08:00
|
|
|
} else if (reads.getCanonicalPath().indexOf(",") > 0) {
|
|
|
|
|
for (String bamFile : reads.getCanonicalPath().split(",")) {
|
|
|
|
|
fl.add(new File(bamFile));
|
|
|
|
|
}
|
2009-04-16 02:29:38 +08:00
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
fl.add(reads);
|
|
|
|
|
}
|
|
|
|
|
dataSource = new SAMDataSource( fl );
|
2009-04-11 06:09:01 +08:00
|
|
|
}
|
|
|
|
|
catch( SimpleDataSourceLoadException ex ) {
|
|
|
|
|
throw new RuntimeException( ex );
|
2009-04-16 02:29:38 +08:00
|
|
|
}
|
2009-04-11 06:09:01 +08:00
|
|
|
catch( IOException ex ) {
|
|
|
|
|
throw new RuntimeException( ex );
|
|
|
|
|
}
|
2009-04-11 04:50:28 +08:00
|
|
|
|
2009-04-24 05:07:11 +08:00
|
|
|
boolean walkerInitialized = false;
|
|
|
|
|
Object accumulator = null;
|
2009-04-10 04:28:17 +08:00
|
|
|
|
|
|
|
|
for(Shard shard: shardStrategy) {
|
2009-04-16 02:29:38 +08:00
|
|
|
// CloseableIterator<SAMRecord> readShard = null;
|
|
|
|
|
MergingSamRecordIterator2 readShard = null;
|
2009-04-10 04:28:17 +08:00
|
|
|
try {
|
|
|
|
|
readShard = dataSource.seek( shard.getGenomeLoc() );
|
|
|
|
|
}
|
|
|
|
|
catch( SimpleDataSourceLoadException ex ) {
|
|
|
|
|
throw new RuntimeException( ex );
|
|
|
|
|
}
|
2009-04-11 06:09:01 +08:00
|
|
|
|
2009-04-11 04:50:28 +08:00
|
|
|
ReferenceProvider referenceProvider = new ReferenceProvider( refIter );
|
2009-04-10 04:28:17 +08:00
|
|
|
LocusContextProvider locusProvider = new LocusContextProvider( readShard );
|
|
|
|
|
|
2009-04-24 04:34:52 +08:00
|
|
|
// set the sam header of the traversal engine
|
|
|
|
|
traversalEngine.setSAMHeader(readShard.getMergedHeader());
|
2009-04-24 05:07:11 +08:00
|
|
|
|
|
|
|
|
if (!walkerInitialized) {
|
|
|
|
|
walker.initialize();
|
|
|
|
|
accumulator = ((LocusWalker<?,?>)walker).reduceInit();
|
|
|
|
|
walkerInitialized = true;
|
|
|
|
|
}
|
|
|
|
|
|
2009-04-10 04:28:17 +08:00
|
|
|
accumulator = traversalEngine.traverse( walker, shard, referenceProvider, locusProvider, accumulator );
|
2009-04-16 02:29:38 +08:00
|
|
|
readShard.close();
|
2009-04-10 04:28:17 +08:00
|
|
|
}
|
|
|
|
|
|
2009-04-16 02:29:38 +08:00
|
|
|
traversalEngine.printOnTraversalDone("loci", accumulator);
|
|
|
|
|
walker.onTraversalDone(accumulator);
|
2009-04-10 04:28:17 +08:00
|
|
|
}
|
|
|
|
|
|
2009-04-16 02:29:38 +08:00
|
|
|
|
2009-03-27 23:40:45 +08:00
|
|
|
}
|