From 4c269b8496fda11d8bbc5ac92308335bb7b5b265 Mon Sep 17 00:00:00 2001 From: hanna Date: Fri, 8 May 2009 00:58:37 +0000 Subject: [PATCH] Cleanup LinearMicroScheduler in preparation for TraverseByLoci inclusion. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@634 348d0f76-0448-11de-a6fe-93d51630548a --- .../simpleDataSources/SAMDataSource.java | 33 +++++++++++-------- .../gatk/executive/LinearMicroScheduler.java | 29 ++-------------- .../sting/gatk/executive/MicroScheduler.java | 13 +++++++- .../gatk/iterators/StingSAMIterator.java | 6 ---- 4 files changed, 34 insertions(+), 47 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java index 3f572a69c..bf7f2622e 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java @@ -38,15 +38,17 @@ public class SAMDataSource implements SimpleDataSource { /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(SAMDataSource.class); - // are we set to locus mode or read mode for dividing - private boolean locusMode = true; - // How strict should we be with SAM/BAM parsing? protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT; // our list of readers private final List samFileList = new ArrayList(); + /** + * SAM header file. + */ + private final SAMFileHeader header; + // used for the reads case, the last count of reads retrieved private long readsTaken = 0; @@ -87,6 +89,7 @@ public class SAMDataSource implements SimpleDataSource { } + header = createHeaderMerger().getMergedHeader(); } /** @@ -120,18 +123,13 @@ public class SAMDataSource implements SimpleDataSource { public MergingSamRecordIterator2 seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException { // right now this is pretty damn heavy, it copies the file list into a reader list every time - SamFileHeaderMerger headerMerger = CreateHeader(); + SamFileHeaderMerger headerMerger = createHeaderMerger(); // make a merging iterator for this record MergingSamRecordIterator2 iter = new MergingSamRecordIterator2(headerMerger); - // we do different things for locus and read modes - if (locusMode) { - iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1); - } else { - iter.queryContained(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1); - } + iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1); // return the iterator return iter; @@ -168,6 +166,13 @@ public class SAMDataSource implements SimpleDataSource { includeUnmappedReads = seeUnMappedReads; } + /** + * Gets the (potentially merged) SAM file header. + * @return SAM file header. + */ + public SAMFileHeader getHeader() { + return header; + } /** *

@@ -180,7 +185,7 @@ public class SAMDataSource implements SimpleDataSource { private BoundedReadIterator seekRead(ReadShard shard) throws SimpleDataSourceLoadException { BoundedReadIterator bound = null; - SamFileHeaderMerger headerMerger = CreateHeader(); + SamFileHeaderMerger headerMerger = createHeaderMerger(); MergingSamRecordIterator2 iter = null; if (!intoUnmappedReads) { @@ -193,7 +198,7 @@ public class SAMDataSource implements SimpleDataSource { if (iter != null) { iter.close(); } - iter = new MergingSamRecordIterator2(CreateHeader()); + iter = new MergingSamRecordIterator2(createHeaderMerger()); bound = toUnmappedReads(shard.getSize(), iter); } @@ -204,7 +209,7 @@ public class SAMDataSource implements SimpleDataSource { return bound; } - private SamFileHeaderMerger CreateHeader() { + private SamFileHeaderMerger createHeaderMerger() { // TODO: make extremely less horrible List lst = GetReaderList(); @@ -314,7 +319,7 @@ public class SAMDataSource implements SimpleDataSource { iter.close(); // now merge the headers // right now this is pretty damn heavy, it copies the file list into a reader list every time - SamFileHeaderMerger mg = CreateHeader(); + SamFileHeaderMerger mg = createHeaderMerger(); iter = new MergingSamRecordIterator2(mg); iter.queryContained(lastReadPos.getContig(), 1, Integer.MAX_VALUE); return new BoundedReadIterator(iter,readCount); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 64ee04e60..8744221af 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -7,7 +7,6 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.traversals.TraverseByReads; import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference; import org.broadinstitute.sting.gatk.traversals.TraverseReads; @@ -24,15 +23,8 @@ import java.util.List; /** A micro-scheduling manager for single-threaded execution of a traversal. */ public class LinearMicroScheduler extends MicroScheduler { - private TraversalEngine traversalEngine = null; - private boolean isAReadWalker = false; - /** get the traversal engine */ - public TraversalEngine getTraversalEngine() { - return traversalEngine; - } - /** * Create a new linear microscheduler to process the given reads and reference. * @@ -62,31 +54,16 @@ public class LinearMicroScheduler extends MicroScheduler { ShardStrategy shardStrategy = getShardStrategy(reference, locations); SAMDataSource dataSource = getReadsDataSource(); - boolean walkerInitialized = false; - Object accumulator = null; + walker.initialize(); + Object accumulator = ((LocusWalker) walker).reduceInit(); for (Shard shard : shardStrategy) { - StingSAMIterator readShard = null; - try { - readShard = dataSource.seek(shard); - } - catch (SimpleDataSourceLoadException ex) { - throw new RuntimeException(ex); - } + StingSAMIterator readShard = dataSource.seek(shard); ReferenceProvider referenceProvider = new ReferenceProvider(reference, shard.getGenomeLoc()); LocusContextProvider locusProvider = new LocusContextProvider(readShard); - // set the sam header of the traversal engine - traversalEngine.setSAMHeader(readShard.getHeader()); - - if (!walkerInitialized) { - walker.initialize(); - accumulator = ((LocusWalker) walker).reduceInit(); - walkerInitialized = true; - } - if (!isAReadWalker) { accumulator = ((TraverseLociByReference) traversalEngine).traverse(walker, shard, referenceProvider, locusProvider, accumulator); } else { diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 72554dd63..7e2dca21b 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -37,6 +37,9 @@ public abstract class MicroScheduler { protected IndexedFastaSequenceFile reference; + protected TraversalEngine traversalEngine = null; + + /** * MicroScheduler factory function. Create a microscheduler appropriate for reducing the * selected walker. @@ -70,7 +73,9 @@ public abstract class MicroScheduler { * of the microscheduler shouldn't need to know anything about the traversal engine. * @return The traversal engine. */ - public abstract TraversalEngine getTraversalEngine(); + public TraversalEngine getTraversalEngine() { + return traversalEngine; + } /** * Walks a walker over the given list of intervals. @@ -115,6 +120,12 @@ public abstract class MicroScheduler { catch( FileNotFoundException ex ) { throw new RuntimeException( ex ); } + + // Side effect: initialize the traversal engine with reads data. + // TODO: Give users a dedicated way of getting the header so that the MicroScheduler + // doesn't have to bend over backward providing legacy getters and setters. + traversalEngine.setSAMHeader(dataSource.getHeader()); + return dataSource; } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java index ddbdffbc9..f79584ee7 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java @@ -29,10 +29,4 @@ import net.sf.samtools.util.CloseableIterator; * A descriptions should go here. Blame aaron if it's missing. */ public interface StingSAMIterator extends CloseableIterator, Iterable { - - /** - * gets the header from the iterator - * @return the samfileheader for the iterator, null if one is not available - */ - public SAMFileHeader getHeader(); }