Added a interface that all our SAMRecord iterators should try to code to. This is in the effort to keep our code generic

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@609 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-05-06 21:40:41 +00:00
parent 62e7e46754
commit 0aba688e6f
6 changed files with 86 additions and 43 deletions

View File

@ -269,7 +269,7 @@ public class SAMDataSource implements SimpleDataSource {
private BoundedReadIterator fastMappedReadSeek(long readCount, MergingSamRecordIterator2 iter) throws SimpleDataSourceLoadException { private BoundedReadIterator fastMappedReadSeek(long readCount, MergingSamRecordIterator2 iter) throws SimpleDataSourceLoadException {
BoundedReadIterator bound;// is this the first time we're doing this? BoundedReadIterator bound;// is this the first time we're doing this?
if (lastReadPos == null) { if (lastReadPos == null) {
lastReadPos = new GenomeLoc(iter.getMergedHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, 0); lastReadPos = new GenomeLoc(iter.getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, 0);
iter.queryContained(lastReadPos.getContig(), 1, -1); iter.queryContained(lastReadPos.getContig(), 1, -1);
bound = new BoundedReadIterator(iter, readCount); bound = new BoundedReadIterator(iter, readCount);
this.readsTaken = readCount; this.readsTaken = readCount;
@ -339,7 +339,7 @@ public class SAMDataSource implements SimpleDataSource {
} }
// in case we're run out of reads, get out // in case we're run out of reads, get out
else { else {
throw new StingException("Danger"); throw new StingException("Danger: weve run out reads in fastMappedReadSeek");
//return null; //return null;
} }
bound = new BoundedReadIterator(iter, readCount); bound = new BoundedReadIterator(iter, readCount);

View File

@ -7,9 +7,11 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException; import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2; import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference; import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference;
import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
@ -21,7 +23,7 @@ import java.util.List;
*/ */
public class LinearMicroScheduler extends MicroScheduler { public class LinearMicroScheduler extends MicroScheduler {
private TraverseLociByReference traversalEngine = null; private TraversalEngine traversalEngine = null;
public TraversalEngine getTraversalEngine() { public TraversalEngine getTraversalEngine() {
return traversalEngine; return traversalEngine;
@ -46,12 +48,15 @@ public class LinearMicroScheduler extends MicroScheduler {
ShardStrategy shardStrategy = getShardStrategy( reference, locations ); ShardStrategy shardStrategy = getShardStrategy( reference, locations );
SAMDataSource dataSource = getReadsDataSource(); SAMDataSource dataSource = getReadsDataSource();
// determine if we're a read walker: they get a slightly different, but not in any way worse execute methodology
boolean readwalker = (walker instanceof ReadWalker) ? true : false;
boolean walkerInitialized = false; boolean walkerInitialized = false;
Object accumulator = null; Object accumulator = null;
for(Shard shard: shardStrategy) { for(Shard shard: shardStrategy) {
MergingSamRecordIterator2 readShard = null; StingSAMIterator readShard = null;
try { try {
readShard = (MergingSamRecordIterator2)dataSource.seek( shard ); readShard = (MergingSamRecordIterator2)dataSource.seek( shard );
} }
@ -63,7 +68,7 @@ public class LinearMicroScheduler extends MicroScheduler {
LocusContextProvider locusProvider = new LocusContextProvider( readShard ); LocusContextProvider locusProvider = new LocusContextProvider( readShard );
// set the sam header of the traversal engine // set the sam header of the traversal engine
traversalEngine.setSAMHeader(readShard.getMergedHeader()); traversalEngine.setSAMHeader(readShard.getHeader());
if (!walkerInitialized) { if (!walkerInitialized) {
walker.initialize(); walker.initialize();
@ -71,7 +76,7 @@ public class LinearMicroScheduler extends MicroScheduler {
walkerInitialized = true; walkerInitialized = true;
} }
accumulator = traversalEngine.traverse( walker, shard, referenceProvider, locusProvider, accumulator ); accumulator = ((TraverseLociByReference)traversalEngine).traverse( walker, shard, referenceProvider, locusProvider, accumulator );
readShard.close(); readShard.close();
} }

View File

@ -33,7 +33,7 @@ import java.util.Iterator;
* This class implements a read iterator that is bounded by the number of reads * This class implements a read iterator that is bounded by the number of reads
* it will produce over the iteration. * it will produce over the iteration.
*/ */
public class BoundedReadIterator implements CloseableIterator<SAMRecord>, Iterable<SAMRecord> { public class BoundedReadIterator implements StingSAMIterator {
// the genome loc we're bounding // the genome loc we're bounding
final private long readCount; final private long readCount;
@ -70,11 +70,11 @@ public class BoundedReadIterator implements CloseableIterator<SAMRecord>, Iterab
} }
public SAMFileHeader getMergedHeader() { public SAMFileHeader getHeader() {
// todo: this is bad, we need an iterface out there for samrecords that supports getting the header, // todo: this is bad, we need an iterface out there for samrecords that supports getting the header,
// regardless of the merging // regardless of the merging
if (iterator instanceof MergingSamRecordIterator2) if (iterator instanceof MergingSamRecordIterator2)
return ((MergingSamRecordIterator2)iterator).getMergedHeader(); return ((MergingSamRecordIterator2)iterator).getHeader();
else else
return null; return null;
} }

View File

@ -15,7 +15,6 @@ import edu.mit.broad.picard.sam.ReservedTagConstants;
import edu.mit.broad.picard.sam.SamFileHeaderMerger; import edu.mit.broad.picard.sam.SamFileHeaderMerger;
import edu.mit.broad.picard.util.PeekableIterator; import edu.mit.broad.picard.util.PeekableIterator;
import net.sf.samtools.*; import net.sf.samtools.*;
import net.sf.samtools.util.CloseableIterator;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
@ -29,7 +28,7 @@ import java.util.PriorityQueue;
* iterable stream. The underlying iterators/files must all have the same sort order unless * iterable stream. The underlying iterators/files must all have the same sort order unless
* the requested output format is unsorted, in which case any combination is valid. * the requested output format is unsorted, in which case any combination is valid.
*/ */
public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>, Iterable<SAMRecord> { public class MergingSamRecordIterator2 implements StingSAMIterator {
protected PriorityQueue<ComparableSamRecordIterator> pq = null; protected PriorityQueue<ComparableSamRecordIterator> pq = null;
protected final SamFileHeaderMerger samHeaderMerger; protected final SamFileHeaderMerger samHeaderMerger;
protected final SAMFileHeader.SortOrder sortOrder; protected final SAMFileHeader.SortOrder sortOrder;
@ -152,6 +151,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>,
} }
final ComparableSamRecordIterator iterator = this.pq.poll(); final ComparableSamRecordIterator iterator = this.pq.poll();
if (iterator == null) { if (iterator == null) {
return null; return null;
} }
@ -244,7 +244,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>,
} }
/** Returns the merged header that the merging iterator is working from. */ /** Returns the merged header that the merging iterator is working from. */
public SAMFileHeader getMergedHeader() { public SAMFileHeader getHeader() {
return this.samHeaderMerger.getMergedHeader(); return this.samHeaderMerger.getMergedHeader();
} }

View File

@ -0,0 +1,38 @@
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.util.CloseableIterator;
/**
*
* User: aaron
* Date: May 6, 2009
* Time: 5:30:41 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date May 6, 2009
* <p/>
* Interface ClosableGetHeaderIterator
* <p/>
* A descriptions should go here. Blame aaron if it's missing.
*/
public interface StingSAMIterator extends CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
/**
* gets the header from the iterator
* @return the samfileheader for the iterator, null if one is not available
*/
public SAMFileHeader getHeader();
}

View File

@ -148,30 +148,30 @@ public class TraverseReadsTest extends BaseTest {
Shard shard = shardStrategy.next(); Shard shard = shardStrategy.next();
BoundedReadIterator readIter = null; BoundedReadIterator readIter = null;
try { try {
readIter = (BoundedReadIterator)dataSource.seek(shard); readIter = (BoundedReadIterator) dataSource.seek(shard);
} }
catch (SimpleDataSourceLoadException ex) { catch (SimpleDataSourceLoadException ex) {
throw new RuntimeException(ex); throw new RuntimeException(ex);
} }
//LocusContextProvider locusProvider = new LocusContextProvider( readIter ); //LocusContextProvider locusProvider = new LocusContextProvider( readIter );
// set the sam header of the traversal engine // set the sam header of the traversal engine
traversalEngine.setSAMHeader(readIter.getMergedHeader()); traversalEngine.setSAMHeader(readIter.getHeader());
if (!walkerInitialized) { if (!walkerInitialized) {
countReadWalker.initialize(); countReadWalker.initialize();
accumulator = ((ReadWalker<?, ?>) countReadWalker).reduceInit(); accumulator = ((ReadWalker<?, ?>) countReadWalker).reduceInit();
walkerInitialized = true; walkerInitialized = true;
} }
if (shard == null) { if (shard == null) {
fail("Shard == null"); fail("Shard == null");
} }
accumulator = traversalEngine.traverse(countReadWalker, shard, readIter, accumulator); accumulator = traversalEngine.traverse(countReadWalker, shard, readIter, accumulator);
readIter.close(); readIter.close();
} }
@ -181,7 +181,7 @@ public class TraverseReadsTest extends BaseTest {
if (!(accumulator instanceof Integer)) { if (!(accumulator instanceof Integer)) {
fail("Count read walker should return an interger."); fail("Count read walker should return an interger.");
} }
if (((Integer)accumulator) != 9721) { if (((Integer) accumulator) != 9721) {
fail("there should be 9721 mapped reads in the index file"); fail("there should be 9721 mapped reads in the index file");
} }
} }
@ -229,30 +229,30 @@ public class TraverseReadsTest extends BaseTest {
Shard shard = shardStrategy.next(); Shard shard = shardStrategy.next();
BoundedReadIterator readIter = null; BoundedReadIterator readIter = null;
try { try {
readIter = (BoundedReadIterator)dataSource.seek(shard); readIter = (BoundedReadIterator) dataSource.seek(shard);
} }
catch (SimpleDataSourceLoadException ex) { catch (SimpleDataSourceLoadException ex) {
throw new RuntimeException(ex); throw new RuntimeException(ex);
} }
//LocusContextProvider locusProvider = new LocusContextProvider( readIter ); //LocusContextProvider locusProvider = new LocusContextProvider( readIter );
// set the sam header of the traversal engine // set the sam header of the traversal engine
traversalEngine.setSAMHeader(readIter.getMergedHeader()); traversalEngine.setSAMHeader(readIter.getHeader());
if (!walkerInitialized) { if (!walkerInitialized) {
countReadWalker.initialize(); countReadWalker.initialize();
accumulator = ((ReadWalker<?, ?>) countReadWalker).reduceInit(); accumulator = ((ReadWalker<?, ?>) countReadWalker).reduceInit();
walkerInitialized = true; walkerInitialized = true;
} }
if (shard == null) { if (shard == null) {
fail("Shard == null"); fail("Shard == null");
} }
accumulator = traversalEngine.traverse(countReadWalker, shard, readIter, accumulator); accumulator = traversalEngine.traverse(countReadWalker, shard, readIter, accumulator);
readIter.close(); readIter.close();
} }
@ -262,7 +262,7 @@ public class TraverseReadsTest extends BaseTest {
if (!(accumulator instanceof Integer)) { if (!(accumulator instanceof Integer)) {
fail("Count read walker should return an interger."); fail("Count read walker should return an interger.");
} }
if (((Integer)accumulator) != 10000) { if (((Integer) accumulator) != 10000) {
fail("there should be 9721 mapped reads in the index file"); fail("there should be 9721 mapped reads in the index file");
} }
} }