From 0aba688e6f68cdf9f9f229587343cdffb5bae40c Mon Sep 17 00:00:00 2001 From: aaron Date: Wed, 6 May 2009 21:40:41 +0000 Subject: [PATCH] Added a interface that all our SAMRecord iterators should try to code to. This is in the effort to keep our code generic git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@609 348d0f76-0448-11de-a6fe-93d51630548a --- .../simpleDataSources/SAMDataSource.java | 4 +- .../gatk/executive/LinearMicroScheduler.java | 13 ++-- .../gatk/iterators/BoundedReadIterator.java | 6 +- .../iterators/MergingSamRecordIterator2.java | 6 +- .../gatk/iterators/StingSAMIterator.java | 38 ++++++++++++ .../gatk/traversals/TraverseReadsTest.java | 62 +++++++++---------- 6 files changed, 86 insertions(+), 43 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java index 12f0bee5b..b2965ac14 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java @@ -269,7 +269,7 @@ public class SAMDataSource implements SimpleDataSource { private BoundedReadIterator fastMappedReadSeek(long readCount, MergingSamRecordIterator2 iter) throws SimpleDataSourceLoadException { BoundedReadIterator bound;// is this the first time we're doing this? if (lastReadPos == null) { - lastReadPos = new GenomeLoc(iter.getMergedHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, 0); + lastReadPos = new GenomeLoc(iter.getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, 0); iter.queryContained(lastReadPos.getContig(), 1, -1); bound = new BoundedReadIterator(iter, readCount); this.readsTaken = readCount; @@ -339,7 +339,7 @@ public class SAMDataSource implements SimpleDataSource { } // in case we're run out of reads, get out else { - throw new StingException("Danger"); + throw new StingException("Danger: weve run out reads in fastMappedReadSeek"); //return null; } bound = new BoundedReadIterator(iter, readCount); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 1ff7725d5..b56d1a81d 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -7,9 +7,11 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException; import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference; import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; @@ -21,7 +23,7 @@ import java.util.List; */ public class LinearMicroScheduler extends MicroScheduler { - private TraverseLociByReference traversalEngine = null; + private TraversalEngine traversalEngine = null; public TraversalEngine getTraversalEngine() { return traversalEngine; @@ -46,12 +48,15 @@ public class LinearMicroScheduler extends MicroScheduler { ShardStrategy shardStrategy = getShardStrategy( reference, locations ); SAMDataSource dataSource = getReadsDataSource(); + // determine if we're a read walker: they get a slightly different, but not in any way worse execute methodology + boolean readwalker = (walker instanceof ReadWalker) ? true : false; + boolean walkerInitialized = false; Object accumulator = null; for(Shard shard: shardStrategy) { - MergingSamRecordIterator2 readShard = null; + StingSAMIterator readShard = null; try { readShard = (MergingSamRecordIterator2)dataSource.seek( shard ); } @@ -63,7 +68,7 @@ public class LinearMicroScheduler extends MicroScheduler { LocusContextProvider locusProvider = new LocusContextProvider( readShard ); // set the sam header of the traversal engine - traversalEngine.setSAMHeader(readShard.getMergedHeader()); + traversalEngine.setSAMHeader(readShard.getHeader()); if (!walkerInitialized) { walker.initialize(); @@ -71,7 +76,7 @@ public class LinearMicroScheduler extends MicroScheduler { walkerInitialized = true; } - accumulator = traversalEngine.traverse( walker, shard, referenceProvider, locusProvider, accumulator ); + accumulator = ((TraverseLociByReference)traversalEngine).traverse( walker, shard, referenceProvider, locusProvider, accumulator ); readShard.close(); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java index 29ac75e32..59e920d15 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java @@ -33,7 +33,7 @@ import java.util.Iterator; * This class implements a read iterator that is bounded by the number of reads * it will produce over the iteration. */ -public class BoundedReadIterator implements CloseableIterator, Iterable { +public class BoundedReadIterator implements StingSAMIterator { // the genome loc we're bounding final private long readCount; @@ -70,11 +70,11 @@ public class BoundedReadIterator implements CloseableIterator, Iterab } - public SAMFileHeader getMergedHeader() { + public SAMFileHeader getHeader() { // todo: this is bad, we need an iterface out there for samrecords that supports getting the header, // regardless of the merging if (iterator instanceof MergingSamRecordIterator2) - return ((MergingSamRecordIterator2)iterator).getMergedHeader(); + return ((MergingSamRecordIterator2)iterator).getHeader(); else return null; } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java index a99fd8e79..870ce1772 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java @@ -15,7 +15,6 @@ import edu.mit.broad.picard.sam.ReservedTagConstants; import edu.mit.broad.picard.sam.SamFileHeaderMerger; import edu.mit.broad.picard.util.PeekableIterator; import net.sf.samtools.*; -import net.sf.samtools.util.CloseableIterator; import org.apache.log4j.Logger; import java.lang.reflect.Constructor; @@ -29,7 +28,7 @@ import java.util.PriorityQueue; * iterable stream. The underlying iterators/files must all have the same sort order unless * the requested output format is unsorted, in which case any combination is valid. */ -public class MergingSamRecordIterator2 implements CloseableIterator, Iterable { +public class MergingSamRecordIterator2 implements StingSAMIterator { protected PriorityQueue pq = null; protected final SamFileHeaderMerger samHeaderMerger; protected final SAMFileHeader.SortOrder sortOrder; @@ -152,6 +151,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, } final ComparableSamRecordIterator iterator = this.pq.poll(); + if (iterator == null) { return null; } @@ -244,7 +244,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, } /** Returns the merged header that the merging iterator is working from. */ - public SAMFileHeader getMergedHeader() { + public SAMFileHeader getHeader() { return this.samHeaderMerger.getMergedHeader(); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java new file mode 100755 index 000000000..ddbdffbc9 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java @@ -0,0 +1,38 @@ +package org.broadinstitute.sting.gatk.iterators; + +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.util.CloseableIterator; +/** + * + * User: aaron + * Date: May 6, 2009 + * Time: 5:30:41 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + +/** + * @author aaron + * @version 1.0 + * @date May 6, 2009 + *

+ * Interface ClosableGetHeaderIterator + *

+ * A descriptions should go here. Blame aaron if it's missing. + */ +public interface StingSAMIterator extends CloseableIterator, Iterable { + + /** + * gets the header from the iterator + * @return the samfileheader for the iterator, null if one is not available + */ + public SAMFileHeader getHeader(); +} diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java index 6bcdfca6c..095ca50a3 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java @@ -148,31 +148,31 @@ public class TraverseReadsTest extends BaseTest { Shard shard = shardStrategy.next(); BoundedReadIterator readIter = null; try { - readIter = (BoundedReadIterator)dataSource.seek(shard); + readIter = (BoundedReadIterator) dataSource.seek(shard); } catch (SimpleDataSourceLoadException ex) { throw new RuntimeException(ex); } - //LocusContextProvider locusProvider = new LocusContextProvider( readIter ); + //LocusContextProvider locusProvider = new LocusContextProvider( readIter ); - // set the sam header of the traversal engine - traversalEngine.setSAMHeader(readIter.getMergedHeader()); + // set the sam header of the traversal engine + traversalEngine.setSAMHeader(readIter.getHeader()); - if (!walkerInitialized) { - countReadWalker.initialize(); - accumulator = ((ReadWalker) countReadWalker).reduceInit(); - walkerInitialized = true; + if (!walkerInitialized) { + countReadWalker.initialize(); + accumulator = ((ReadWalker) countReadWalker).reduceInit(); + walkerInitialized = true; - } - if (shard == null) { - fail("Shard == null"); - } + } + if (shard == null) { + fail("Shard == null"); + } - accumulator = traversalEngine.traverse(countReadWalker, shard, readIter, accumulator); - readIter.close(); - + accumulator = traversalEngine.traverse(countReadWalker, shard, readIter, accumulator); + readIter.close(); + } traversalEngine.printOnTraversalDone("loci", accumulator); @@ -181,7 +181,7 @@ public class TraverseReadsTest extends BaseTest { if (!(accumulator instanceof Integer)) { fail("Count read walker should return an interger."); } - if (((Integer)accumulator) != 9721) { + if (((Integer) accumulator) != 9721) { fail("there should be 9721 mapped reads in the index file"); } } @@ -229,30 +229,30 @@ public class TraverseReadsTest extends BaseTest { Shard shard = shardStrategy.next(); BoundedReadIterator readIter = null; try { - readIter = (BoundedReadIterator)dataSource.seek(shard); + readIter = (BoundedReadIterator) dataSource.seek(shard); } catch (SimpleDataSourceLoadException ex) { throw new RuntimeException(ex); } - //LocusContextProvider locusProvider = new LocusContextProvider( readIter ); + //LocusContextProvider locusProvider = new LocusContextProvider( readIter ); - // set the sam header of the traversal engine - traversalEngine.setSAMHeader(readIter.getMergedHeader()); + // set the sam header of the traversal engine + traversalEngine.setSAMHeader(readIter.getHeader()); - if (!walkerInitialized) { - countReadWalker.initialize(); - accumulator = ((ReadWalker) countReadWalker).reduceInit(); - walkerInitialized = true; + if (!walkerInitialized) { + countReadWalker.initialize(); + accumulator = ((ReadWalker) countReadWalker).reduceInit(); + walkerInitialized = true; - } - if (shard == null) { - fail("Shard == null"); - } + } + if (shard == null) { + fail("Shard == null"); + } - accumulator = traversalEngine.traverse(countReadWalker, shard, readIter, accumulator); - readIter.close(); + accumulator = traversalEngine.traverse(countReadWalker, shard, readIter, accumulator); + readIter.close(); } @@ -262,7 +262,7 @@ public class TraverseReadsTest extends BaseTest { if (!(accumulator instanceof Integer)) { fail("Count read walker should return an interger."); } - if (((Integer)accumulator) != 10000) { + if (((Integer) accumulator) != 10000) { fail("there should be 9721 mapped reads in the index file"); } }