Cleanup LinearMicroScheduler in preparation for TraverseByLoci inclusion.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@634 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-05-08 00:58:37 +00:00
parent 21536df308
commit 4c269b8496
4 changed files with 34 additions and 47 deletions

View File

@ -38,15 +38,17 @@ public class SAMDataSource implements SimpleDataSource {
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
// are we set to locus mode or read mode for dividing
private boolean locusMode = true;
// How strict should we be with SAM/BAM parsing?
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
// our list of readers
private final List<File> samFileList = new ArrayList<File>();
/**
* SAM header file.
*/
private final SAMFileHeader header;
// used for the reads case, the last count of reads retrieved
private long readsTaken = 0;
@ -87,6 +89,7 @@ public class SAMDataSource implements SimpleDataSource {
}
header = createHeaderMerger().getMergedHeader();
}
/**
@ -120,18 +123,13 @@ public class SAMDataSource implements SimpleDataSource {
public MergingSamRecordIterator2 seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException {
// right now this is pretty damn heavy, it copies the file list into a reader list every time
SamFileHeaderMerger headerMerger = CreateHeader();
SamFileHeaderMerger headerMerger = createHeaderMerger();
// make a merging iterator for this record
MergingSamRecordIterator2 iter = new MergingSamRecordIterator2(headerMerger);
// we do different things for locus and read modes
if (locusMode) {
iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1);
} else {
iter.queryContained(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1);
}
iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1);
// return the iterator
return iter;
@ -168,6 +166,13 @@ public class SAMDataSource implements SimpleDataSource {
includeUnmappedReads = seeUnMappedReads;
}
/**
* Gets the (potentially merged) SAM file header.
* @return SAM file header.
*/
public SAMFileHeader getHeader() {
return header;
}
/**
* <p>
@ -180,7 +185,7 @@ public class SAMDataSource implements SimpleDataSource {
private BoundedReadIterator seekRead(ReadShard shard) throws SimpleDataSourceLoadException {
BoundedReadIterator bound = null;
SamFileHeaderMerger headerMerger = CreateHeader();
SamFileHeaderMerger headerMerger = createHeaderMerger();
MergingSamRecordIterator2 iter = null;
if (!intoUnmappedReads) {
@ -193,7 +198,7 @@ public class SAMDataSource implements SimpleDataSource {
if (iter != null) {
iter.close();
}
iter = new MergingSamRecordIterator2(CreateHeader());
iter = new MergingSamRecordIterator2(createHeaderMerger());
bound = toUnmappedReads(shard.getSize(), iter);
}
@ -204,7 +209,7 @@ public class SAMDataSource implements SimpleDataSource {
return bound;
}
private SamFileHeaderMerger CreateHeader() {
private SamFileHeaderMerger createHeaderMerger() {
// TODO: make extremely less horrible
List<SAMFileReader> lst = GetReaderList();
@ -314,7 +319,7 @@ public class SAMDataSource implements SimpleDataSource {
iter.close();
// now merge the headers
// right now this is pretty damn heavy, it copies the file list into a reader list every time
SamFileHeaderMerger mg = CreateHeader();
SamFileHeaderMerger mg = createHeaderMerger();
iter = new MergingSamRecordIterator2(mg);
iter.queryContained(lastReadPos.getContig(), 1, Integer.MAX_VALUE);
return new BoundedReadIterator(iter,readCount);

View File

@ -7,7 +7,6 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.gatk.traversals.TraverseByReads;
import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference;
import org.broadinstitute.sting.gatk.traversals.TraverseReads;
@ -24,15 +23,8 @@ import java.util.List;
/** A micro-scheduling manager for single-threaded execution of a traversal. */
public class LinearMicroScheduler extends MicroScheduler {
private TraversalEngine traversalEngine = null;
private boolean isAReadWalker = false;
/** get the traversal engine */
public TraversalEngine getTraversalEngine() {
return traversalEngine;
}
/**
* Create a new linear microscheduler to process the given reads and reference.
*
@ -62,31 +54,16 @@ public class LinearMicroScheduler extends MicroScheduler {
ShardStrategy shardStrategy = getShardStrategy(reference, locations);
SAMDataSource dataSource = getReadsDataSource();
boolean walkerInitialized = false;
Object accumulator = null;
walker.initialize();
Object accumulator = ((LocusWalker<?, ?>) walker).reduceInit();
for (Shard shard : shardStrategy) {
StingSAMIterator readShard = null;
try {
readShard = dataSource.seek(shard);
}
catch (SimpleDataSourceLoadException ex) {
throw new RuntimeException(ex);
}
StingSAMIterator readShard = dataSource.seek(shard);
ReferenceProvider referenceProvider = new ReferenceProvider(reference, shard.getGenomeLoc());
LocusContextProvider locusProvider = new LocusContextProvider(readShard);
// set the sam header of the traversal engine
traversalEngine.setSAMHeader(readShard.getHeader());
if (!walkerInitialized) {
walker.initialize();
accumulator = ((LocusWalker<?, ?>) walker).reduceInit();
walkerInitialized = true;
}
if (!isAReadWalker) {
accumulator = ((TraverseLociByReference) traversalEngine).traverse(walker, shard, referenceProvider, locusProvider, accumulator);
} else {

View File

@ -37,6 +37,9 @@ public abstract class MicroScheduler {
protected IndexedFastaSequenceFile reference;
protected TraversalEngine traversalEngine = null;
/**
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
* selected walker.
@ -70,7 +73,9 @@ public abstract class MicroScheduler {
* of the microscheduler shouldn't need to know anything about the traversal engine.
* @return The traversal engine.
*/
public abstract TraversalEngine getTraversalEngine();
public TraversalEngine getTraversalEngine() {
return traversalEngine;
}
/**
* Walks a walker over the given list of intervals.
@ -115,6 +120,12 @@ public abstract class MicroScheduler {
catch( FileNotFoundException ex ) {
throw new RuntimeException( ex );
}
// Side effect: initialize the traversal engine with reads data.
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
// doesn't have to bend over backward providing legacy getters and setters.
traversalEngine.setSAMHeader(dataSource.getHeader());
return dataSource;
}

View File

@ -29,10 +29,4 @@ import net.sf.samtools.util.CloseableIterator;
* A descriptions should go here. Blame aaron if it's missing.
*/
public interface StingSAMIterator extends CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
/**
* gets the header from the iterator
* @return the samfileheader for the iterator, null if one is not available
*/
public SAMFileHeader getHeader();
}