Cleanup LinearMicroScheduler in preparation for TraverseByLoci inclusion.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@634 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
21536df308
commit
4c269b8496
|
|
@ -38,15 +38,17 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
/** our log, which we want to capture anything from this class */
|
/** our log, which we want to capture anything from this class */
|
||||||
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
|
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
|
||||||
|
|
||||||
// are we set to locus mode or read mode for dividing
|
|
||||||
private boolean locusMode = true;
|
|
||||||
|
|
||||||
// How strict should we be with SAM/BAM parsing?
|
// How strict should we be with SAM/BAM parsing?
|
||||||
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
|
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
|
||||||
|
|
||||||
// our list of readers
|
// our list of readers
|
||||||
private final List<File> samFileList = new ArrayList<File>();
|
private final List<File> samFileList = new ArrayList<File>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SAM header file.
|
||||||
|
*/
|
||||||
|
private final SAMFileHeader header;
|
||||||
|
|
||||||
// used for the reads case, the last count of reads retrieved
|
// used for the reads case, the last count of reads retrieved
|
||||||
private long readsTaken = 0;
|
private long readsTaken = 0;
|
||||||
|
|
||||||
|
|
@ -87,6 +89,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
header = createHeaderMerger().getMergedHeader();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -120,18 +123,13 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
public MergingSamRecordIterator2 seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException {
|
public MergingSamRecordIterator2 seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException {
|
||||||
|
|
||||||
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
||||||
SamFileHeaderMerger headerMerger = CreateHeader();
|
SamFileHeaderMerger headerMerger = createHeaderMerger();
|
||||||
|
|
||||||
// make a merging iterator for this record
|
// make a merging iterator for this record
|
||||||
MergingSamRecordIterator2 iter = new MergingSamRecordIterator2(headerMerger);
|
MergingSamRecordIterator2 iter = new MergingSamRecordIterator2(headerMerger);
|
||||||
|
|
||||||
|
|
||||||
// we do different things for locus and read modes
|
// we do different things for locus and read modes
|
||||||
if (locusMode) {
|
iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1);
|
||||||
iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1);
|
|
||||||
} else {
|
|
||||||
iter.queryContained(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// return the iterator
|
// return the iterator
|
||||||
return iter;
|
return iter;
|
||||||
|
|
@ -168,6 +166,13 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
includeUnmappedReads = seeUnMappedReads;
|
includeUnmappedReads = seeUnMappedReads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the (potentially merged) SAM file header.
|
||||||
|
* @return SAM file header.
|
||||||
|
*/
|
||||||
|
public SAMFileHeader getHeader() {
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
|
|
@ -180,7 +185,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
private BoundedReadIterator seekRead(ReadShard shard) throws SimpleDataSourceLoadException {
|
private BoundedReadIterator seekRead(ReadShard shard) throws SimpleDataSourceLoadException {
|
||||||
|
|
||||||
BoundedReadIterator bound = null;
|
BoundedReadIterator bound = null;
|
||||||
SamFileHeaderMerger headerMerger = CreateHeader();
|
SamFileHeaderMerger headerMerger = createHeaderMerger();
|
||||||
MergingSamRecordIterator2 iter = null;
|
MergingSamRecordIterator2 iter = null;
|
||||||
|
|
||||||
if (!intoUnmappedReads) {
|
if (!intoUnmappedReads) {
|
||||||
|
|
@ -193,7 +198,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
if (iter != null) {
|
if (iter != null) {
|
||||||
iter.close();
|
iter.close();
|
||||||
}
|
}
|
||||||
iter = new MergingSamRecordIterator2(CreateHeader());
|
iter = new MergingSamRecordIterator2(createHeaderMerger());
|
||||||
bound = toUnmappedReads(shard.getSize(), iter);
|
bound = toUnmappedReads(shard.getSize(), iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -204,7 +209,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
return bound;
|
return bound;
|
||||||
}
|
}
|
||||||
|
|
||||||
private SamFileHeaderMerger CreateHeader() {
|
private SamFileHeaderMerger createHeaderMerger() {
|
||||||
// TODO: make extremely less horrible
|
// TODO: make extremely less horrible
|
||||||
List<SAMFileReader> lst = GetReaderList();
|
List<SAMFileReader> lst = GetReaderList();
|
||||||
|
|
||||||
|
|
@ -314,7 +319,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
iter.close();
|
iter.close();
|
||||||
// now merge the headers
|
// now merge the headers
|
||||||
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
||||||
SamFileHeaderMerger mg = CreateHeader();
|
SamFileHeaderMerger mg = createHeaderMerger();
|
||||||
iter = new MergingSamRecordIterator2(mg);
|
iter = new MergingSamRecordIterator2(mg);
|
||||||
iter.queryContained(lastReadPos.getContig(), 1, Integer.MAX_VALUE);
|
iter.queryContained(lastReadPos.getContig(), 1, Integer.MAX_VALUE);
|
||||||
return new BoundedReadIterator(iter,readCount);
|
return new BoundedReadIterator(iter,readCount);
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,6 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
|
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraverseByReads;
|
import org.broadinstitute.sting.gatk.traversals.TraverseByReads;
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference;
|
import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference;
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraverseReads;
|
import org.broadinstitute.sting.gatk.traversals.TraverseReads;
|
||||||
|
|
@ -24,15 +23,8 @@ import java.util.List;
|
||||||
/** A micro-scheduling manager for single-threaded execution of a traversal. */
|
/** A micro-scheduling manager for single-threaded execution of a traversal. */
|
||||||
public class LinearMicroScheduler extends MicroScheduler {
|
public class LinearMicroScheduler extends MicroScheduler {
|
||||||
|
|
||||||
private TraversalEngine traversalEngine = null;
|
|
||||||
|
|
||||||
private boolean isAReadWalker = false;
|
private boolean isAReadWalker = false;
|
||||||
|
|
||||||
/** get the traversal engine */
|
|
||||||
public TraversalEngine getTraversalEngine() {
|
|
||||||
return traversalEngine;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new linear microscheduler to process the given reads and reference.
|
* Create a new linear microscheduler to process the given reads and reference.
|
||||||
*
|
*
|
||||||
|
|
@ -62,31 +54,16 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
ShardStrategy shardStrategy = getShardStrategy(reference, locations);
|
ShardStrategy shardStrategy = getShardStrategy(reference, locations);
|
||||||
SAMDataSource dataSource = getReadsDataSource();
|
SAMDataSource dataSource = getReadsDataSource();
|
||||||
|
|
||||||
boolean walkerInitialized = false;
|
walker.initialize();
|
||||||
Object accumulator = null;
|
Object accumulator = ((LocusWalker<?, ?>) walker).reduceInit();
|
||||||
|
|
||||||
for (Shard shard : shardStrategy) {
|
for (Shard shard : shardStrategy) {
|
||||||
|
|
||||||
StingSAMIterator readShard = null;
|
StingSAMIterator readShard = dataSource.seek(shard);
|
||||||
try {
|
|
||||||
readShard = dataSource.seek(shard);
|
|
||||||
}
|
|
||||||
catch (SimpleDataSourceLoadException ex) {
|
|
||||||
throw new RuntimeException(ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
ReferenceProvider referenceProvider = new ReferenceProvider(reference, shard.getGenomeLoc());
|
ReferenceProvider referenceProvider = new ReferenceProvider(reference, shard.getGenomeLoc());
|
||||||
LocusContextProvider locusProvider = new LocusContextProvider(readShard);
|
LocusContextProvider locusProvider = new LocusContextProvider(readShard);
|
||||||
|
|
||||||
// set the sam header of the traversal engine
|
|
||||||
traversalEngine.setSAMHeader(readShard.getHeader());
|
|
||||||
|
|
||||||
if (!walkerInitialized) {
|
|
||||||
walker.initialize();
|
|
||||||
accumulator = ((LocusWalker<?, ?>) walker).reduceInit();
|
|
||||||
walkerInitialized = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!isAReadWalker) {
|
if (!isAReadWalker) {
|
||||||
accumulator = ((TraverseLociByReference) traversalEngine).traverse(walker, shard, referenceProvider, locusProvider, accumulator);
|
accumulator = ((TraverseLociByReference) traversalEngine).traverse(walker, shard, referenceProvider, locusProvider, accumulator);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,9 @@ public abstract class MicroScheduler {
|
||||||
|
|
||||||
protected IndexedFastaSequenceFile reference;
|
protected IndexedFastaSequenceFile reference;
|
||||||
|
|
||||||
|
protected TraversalEngine traversalEngine = null;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
|
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
|
||||||
* selected walker.
|
* selected walker.
|
||||||
|
|
@ -70,7 +73,9 @@ public abstract class MicroScheduler {
|
||||||
* of the microscheduler shouldn't need to know anything about the traversal engine.
|
* of the microscheduler shouldn't need to know anything about the traversal engine.
|
||||||
* @return The traversal engine.
|
* @return The traversal engine.
|
||||||
*/
|
*/
|
||||||
public abstract TraversalEngine getTraversalEngine();
|
public TraversalEngine getTraversalEngine() {
|
||||||
|
return traversalEngine;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Walks a walker over the given list of intervals.
|
* Walks a walker over the given list of intervals.
|
||||||
|
|
@ -115,6 +120,12 @@ public abstract class MicroScheduler {
|
||||||
catch( FileNotFoundException ex ) {
|
catch( FileNotFoundException ex ) {
|
||||||
throw new RuntimeException( ex );
|
throw new RuntimeException( ex );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Side effect: initialize the traversal engine with reads data.
|
||||||
|
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
|
||||||
|
// doesn't have to bend over backward providing legacy getters and setters.
|
||||||
|
traversalEngine.setSAMHeader(dataSource.getHeader());
|
||||||
|
|
||||||
return dataSource;
|
return dataSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,10 +29,4 @@ import net.sf.samtools.util.CloseableIterator;
|
||||||
* A descriptions should go here. Blame aaron if it's missing.
|
* A descriptions should go here. Blame aaron if it's missing.
|
||||||
*/
|
*/
|
||||||
public interface StingSAMIterator extends CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
|
public interface StingSAMIterator extends CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
|
||||||
|
|
||||||
/**
|
|
||||||
* gets the header from the iterator
|
|
||||||
* @return the samfileheader for the iterator, null if one is not available
|
|
||||||
*/
|
|
||||||
public SAMFileHeader getHeader();
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue