diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategy.java
index acbe78565..42d24b83b 100755
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategy.java
@@ -56,16 +56,17 @@ public class IntervalShardStrategy implements ShardStrategy {
*
* @param size the next recommended shard size.
*/
- public void adjustNextShardSize(long size) {
+ public void adjustNextShardSize( long size ) {
this.size = size;
}
/**
* construct the shard strategy from a seq dictionary, a shard size, and and genomeLocs
+ *
* @param size
* @param locations
*/
- IntervalShardStrategy(long size, GenomeLocSortedSet locations) {
+ IntervalShardStrategy( long size, GenomeLocSortedSet locations ) {
if (locations == null || locations.isEmpty()) {
throw new StingException("IntervalShardStrategy: genomic regions list is empty.");
}
@@ -73,12 +74,12 @@ public class IntervalShardStrategy implements ShardStrategy {
this.size = size;
}
- /**
+ /**
* the default constructor
*
- * @param dict the sequence dictionary to use
+ * @param dict the sequence dictionary to use
*/
- IntervalShardStrategy(SAMSequenceDictionary dict, GenomeLocSortedSet locations) {
+ IntervalShardStrategy( SAMSequenceDictionary dict, GenomeLocSortedSet locations ) {
if (locations == null || locations.isEmpty()) {
throw new StingException("IntervalShardStrategy: genomic regions list is empty.");
}
@@ -88,44 +89,39 @@ public class IntervalShardStrategy implements ShardStrategy {
/**
* returns true if there are additional shards
+ *
* @return false if we're done processing shards
*/
public boolean hasNext() {
- return (!regions.isEmpty());
+ return ( !regions.isEmpty() );
}
/**
* gets the next Shard
+ *
* @return the next shard
*/
public Shard next() {
- if ((this.regions == null) || (regions.isEmpty())) {
+ if (( this.regions == null ) || ( regions.isEmpty() )) {
throw new StingException("IntervalShardStrategy: genomic regions list is empty in next() function.");
}
// get the first region in the list
GenomeLoc loc = regions.iterator().next();
- if (loc.getStop() - loc.getStart() <= this.size) {
- regions.removeRegion(loc);
- return new IntervalShard(loc);
- } else {
- GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(),loc.getStart(),loc.getStart()+size-1);
- regions.removeRegion(subLoc);
- return new IntervalShard(subLoc);
- }
-
+ regions.removeRegion(loc);
+ return new IntervalShard(loc);
+
}
- /**
- * we don't support the remove command
- */
+ /** we don't support the remove command */
public void remove() {
throw new UnsupportedOperationException("ShardStrategies don't support remove()");
}
/**
* makes the ReadIntervalShard iterable, i.e. usable in a for loop.
+ *
* @return
*/
public Iterator iterator() {
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java
index 9372c815e..34316167d 100755
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java
@@ -5,13 +5,11 @@ import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
+import net.sf.samtools.util.CloseableIterator;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
-import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
-import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
-import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
-import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
+import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
@@ -35,43 +33,53 @@ import java.util.List;
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
public class SAMDataSource implements SimpleDataSource {
+
+
/** Backing support for reads. */
private Reads reads = null;
- /** our SAM data files */
- private final SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
-
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
- // How strict should we be with SAM/BAM parsing?
- protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
-
- // our list of readers
- private final List samFileList = new ArrayList();
-
- /** SAM header file. */
- private final SAMFileHeader header;
-
// used for the reads case, the last count of reads retrieved
- private long readsTaken = 0;
+ long readsTaken = 0;
// our last genome loc position
- private GenomeLoc lastReadPos = null;
+ GenomeLoc lastReadPos = null;
// do we take unmapped reads
private boolean includeUnmappedReads = true;
// reads based traversal variables
private boolean intoUnmappedReads = false;
- private int readsAtLastPos = 0;
+ private int readsSeenAtLastPos = 0;
+
+
+ /**
+ * package protected getter and setter for the iterator generator
+ *
+ * @return
+ */
+ IteratorGenerator getIterGen() {
+ return iterGen;
+ }
+
+ void setIterGen( IteratorGenerator iterGen ) {
+ this.iterGen = iterGen;
+ }
+
+ // where we get out iterators from
+ private IteratorGenerator iterGen;
/**
* constructor, given sam files
*
* @param reads the list of sam files
+ * @param byReads are we a by reads traversal, or a loci traversal. We could delete this field
+ * if we passed in iterGen, which would be a better (although more complicated for the
+ * consumers of SAMDataSources).
*/
- public SAMDataSource(Reads reads) throws SimpleDataSourceLoadException {
+ public SAMDataSource( Reads reads, boolean byReads ) throws SimpleDataSourceLoadException {
this.reads = reads;
// check the length
@@ -82,32 +90,11 @@ public class SAMDataSource implements SimpleDataSource {
if (!smFile.canRead()) {
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
}
- samFileList.add(smFile);
-
}
+ iterGen = new MSR2IteratorGenerator(reads, byReads);
- header = createHeaderMerger().getMergedHeader();
}
- /**
- * Load a SAM/BAM, given an input file.
- *
- * @param samFile the file name
- * @return a SAMFileReader for the file, null if we're attempting to read a list
- */
- private SAMFileReader initializeSAMFile(final File samFile) {
- if (samFile.toString().endsWith(".list")) {
- return null;
- } else {
- SAMFileReader samReader = new SAMFileReader(samFile, true);
- samReader.setValidationStringency(strictness);
-
- final SAMFileHeader header = samReader.getFileHeader();
- logger.debug(String.format("Sort order is: " + header.getSortOrder()));
-
- return samReader;
- }
- }
/**
*
@@ -115,18 +102,12 @@ public class SAMDataSource implements SimpleDataSource {
*
*
* @param location the genome location to extract data for
+ *
* @return an iterator for that region
*/
- public StingSAMIterator seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException {
-
- // right now this is very heavy, it copies the file list into a reader list every time
- SamFileHeaderMerger headerMerger = createHeaderMerger();
-
- // make a merging iterator for this record
- MergingSamRecordIterator2 iter = new MergingSamRecordIterator2(headerMerger);
-
- iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1);
-
+ public StingSAMIterator seekLocus( GenomeLoc location ) throws SimpleDataSourceLoadException {
+ // make a merging iterator for this record
+ CloseableIterator iter = iterGen.seek(location);
// return the iterator
return StingSAMIteratorAdapter.adapt(reads, iter);
}
@@ -137,9 +118,10 @@ public class SAMDataSource implements SimpleDataSource {
*
*
* @param shard the shard to get data for
+ *
* @return an iterator for that region
*/
- public StingSAMIterator seek(Shard shard) throws SimpleDataSourceLoadException {
+ public StingSAMIterator seek( Shard shard ) throws SimpleDataSourceLoadException {
StingSAMIterator iterator = null;
if (shard.getShardType() == Shard.ShardType.READ) {
iterator = seekRead((ReadShard) shard);
@@ -149,8 +131,7 @@ public class SAMDataSource implements SimpleDataSource {
reads.getMaxOnTheFlySorts(),
reads.getFilterZeroMappingQualityReads(),
reads.getSafetyChecking());
- } else if (shard.getShardType() == Shard.ShardType.LOCUS ||
- shard.getShardType() == Shard.ShardType.INTERVAL) {
+ } else if (shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.INTERVAL) {
iterator = seekLocus(shard.getGenomeLoc());
iterator = TraversalEngine.applyDecoratingIterators(false,
iterator,
@@ -158,8 +139,7 @@ public class SAMDataSource implements SimpleDataSource {
reads.getMaxOnTheFlySorts(),
reads.getFilterZeroMappingQualityReads(),
reads.getSafetyChecking());
- }
- else {
+ } else {
throw new StingException("seek: Unknown shard type");
}
@@ -173,17 +153,7 @@ public class SAMDataSource implements SimpleDataSource {
* @return SAM file header.
*/
public SAMFileHeader getHeader() {
- return header;
- }
-
- /**
- * create the merging header.
- *
- * @return a SamFileHeaderMerger that includes the set of SAM files we were created with
- */
- private SamFileHeaderMerger createHeaderMerger() {
- List lst = GetReaderList();
- return new SamFileHeaderMerger(lst, SORT_ORDER, true);
+ return this.iterGen.getHeader();
}
@@ -193,26 +163,33 @@ public class SAMDataSource implements SimpleDataSource {
*
*
* @param shard the read shard to extract from
+ *
* @return an iterator for that region
*/
- private BoundedReadIterator seekRead(ReadShard shard) throws SimpleDataSourceLoadException {
+ private BoundedReadIterator seekRead( ReadShard shard ) throws SimpleDataSourceLoadException {
BoundedReadIterator bound = null;
- SamFileHeaderMerger headerMerger = createHeaderMerger();
- MergingSamRecordIterator2 iter = null;
+ CloseableIterator iter = null;
if (!intoUnmappedReads) {
- iter = new MergingSamRecordIterator2(headerMerger);
- bound = fastMappedReadSeek(shard.getSize(), iter);
+ if (lastReadPos == null) {
+ lastReadPos = new GenomeLoc(iterGen.getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
+ iter = iterGen.seek(lastReadPos);
+ return InitialReadIterator(shard.getSize(), iter);
+ } else {
+ lastReadPos.setStop(-1);
+ iter = iterGen.seek(lastReadPos);
+ bound = fastMappedReadSeek(shard.getSize(), StingSAMIteratorAdapter.adapt(reads, iter));
+ }
}
- if ((bound == null || intoUnmappedReads) && includeUnmappedReads) {
+
+ if (( bound == null || intoUnmappedReads ) && includeUnmappedReads) {
if (iter != null) {
iter.close();
}
- iter = new MergingSamRecordIterator2(createHeaderMerger());
- bound = toUnmappedReads(shard.getSize(), iter);
+ iter = iterGen.seek(null);
+ bound = toUnmappedReads(shard.getSize(), (PeekingStingIterator) iter);
}
-
if (bound == null) {
shard.signalDone();
bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), 0);
@@ -228,7 +205,7 @@ public class SAMDataSource implements SimpleDataSource {
*
* @param seeUnMappedReads true to see unmapped reads, false otherwise
*/
- public void viewUnmappedReads(boolean seeUnMappedReads) {
+ public void viewUnmappedReads( boolean seeUnMappedReads ) {
includeUnmappedReads = seeUnMappedReads;
}
@@ -238,19 +215,21 @@ public class SAMDataSource implements SimpleDataSource {
*
* @param readCount how many reads to retrieve
* @param iter the iterator to use
+ *
* @return the bounded iterator that you can use to get the intervaled reads from
* @throws SimpleDataSourceLoadException
*/
- private BoundedReadIterator toUnmappedReads(long readCount, MergingSamRecordIterator2 iter) throws SimpleDataSourceLoadException {
+ BoundedReadIterator toUnmappedReads( long readCount, U iter ) throws SimpleDataSourceLoadException {
int count = 0;
+ int cnt = 0;
SAMRecord d = null;
while (iter.hasNext()) {
d = iter.peek();
int x = d.getReferenceIndex();
- if (x < 0 || x >= d.getHeader().getSequenceDictionary().getSequences().size()) {
+ if (x < 0)
// we have the magic read that starts the unmapped read segment!
break;
- }
+ cnt++;
iter.next();
}
@@ -262,6 +241,7 @@ public class SAMDataSource implements SimpleDataSource {
// now walk until we've taken the unmapped read count
while (iter.hasNext() && count < this.readsTaken) {
iter.next();
+ count++;
}
// check to see what happened, did we run out of reads?
@@ -280,108 +260,164 @@ public class SAMDataSource implements SimpleDataSource {
* A seek function for unmapped reads.
*
* @param readCount how many reads to retrieve
- * @param iter the iterator to use
+ * @param iter the iterator to use, seeked to the correct start location
+ *
* @return the bounded iterator that you can use to get the intervaled reads from
* @throws SimpleDataSourceLoadException
*/
- private BoundedReadIterator fastMappedReadSeek(long readCount, MergingSamRecordIterator2 iter) throws SimpleDataSourceLoadException {
- if (lastReadPos == null) {
+ BoundedReadIterator fastMappedReadSeek( long readCount, StingSAMIterator iter ) throws SimpleDataSourceLoadException {
+ BoundedReadIterator bound;
+ correctForReadPileupSeek(iter);
+ if (readsTaken == 0) {
return InitialReadIterator(readCount, iter);
- } else {
- BoundedReadIterator bound;
- iter.queryContained(lastReadPos.getContig(), (int) lastReadPos.getStop(), -1);
+ }
+ int x = 0;
+ SAMRecord rec = null;
+ int lastPos = 0;
- // move the number of reads we read from the last pos
- boolean atLeastOneReadSeen = false; // we have a problem where some chomesomes don't have a single read (i.e. the chrN_random chrom.)
- while (iter.hasNext() && this.readsAtLastPos > 0) {
- iter.next();
- --readsAtLastPos;
- atLeastOneReadSeen = true;
- }
- if (readsAtLastPos != 0 && atLeastOneReadSeen) {
- throw new SimpleDataSourceLoadException("Seek problem: reads at last position count != 0");
- }
-
- int x = 0;
- SAMRecord rec = null;
- int lastPos = 0;
-
- while (x < readsTaken) {
- if (iter.hasNext()) {
- rec = iter.next();
- if (lastPos == rec.getAlignmentStart()) {
- ++this.readsAtLastPos;
- } else {
- this.readsAtLastPos = 1;
- }
- lastPos = rec.getAlignmentStart();
- ++x;
+ while (x < readsTaken) {
+ if (iter.hasNext()) {
+ rec = iter.next();
+ if (lastPos == rec.getAlignmentStart()) ++this.readsSeenAtLastPos;
+ else this.readsSeenAtLastPos = 1;
+ lastPos = rec.getAlignmentStart();
+ ++x;
+ } else {
+ // jump contigs
+ if (lastReadPos.toNextContig() == false) {
+ // check to see if we're using unmapped reads, if not return, we're done
+ readsTaken = 0;
+ intoUnmappedReads = true;
+ return null;
} else {
- // jump contigs
- if (lastReadPos.toNextContig() == false) {
- // check to see if we're using unmapped reads, if not return, we're done
- readsTaken = 0;
- intoUnmappedReads = true;
- return null;
-
- } else {
- iter.close();
- // now merge the headers
- // right now this is pretty damn heavy, it copies the file list into a reader list every time
- SamFileHeaderMerger mg = createHeaderMerger();
- iter = new MergingSamRecordIterator2(mg);
- iter.queryContained(lastReadPos.getContig(), 1, Integer.MAX_VALUE);
- return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);
- }
+ readsTaken = readCount;
+ readsSeenAtLastPos = 0;
+ lastReadPos.setStop(-1);
+ CloseableIterator ret = iterGen.seek(lastReadPos);
+ return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount);
}
}
-
-
- // if we're off the end of the last contig (into unmapped territory)
- if (rec != null && rec.getAlignmentStart() == 0) {
- readsTaken += readCount;
- intoUnmappedReads = true;
- }
- // else we're not off the end, store our location
- else if (rec != null) {
- int stopPos = rec.getAlignmentStart();
- if (stopPos < lastReadPos.getStart()) {
- lastReadPos = new GenomeLoc(lastReadPos.getContigIndex() + 1, stopPos, stopPos);
- } else {
- lastReadPos.setStop(rec.getAlignmentStart());
- }
- }
- // in case we're run out of reads, get out
- else {
- throw new StingException("Danger: weve run out reads in fastMappedReadSeek");
- //return null;
- }
- bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);
-
- // return the iterator
- return bound;
}
+ // if we're off the end of the last contig (into unmapped territory)
+ if (rec != null && rec.getAlignmentStart() == 0) {
+ readsTaken += readCount;
+ intoUnmappedReads = true;
+ }
+ // else we're not off the end, store our location
+ else if (rec != null) {
+ int stopPos = rec.getAlignmentStart();
+ if (stopPos < lastReadPos.getStart()) {
+ lastReadPos = new GenomeLoc(lastReadPos.getContigIndex() + 1, stopPos, stopPos);
+ } else {
+ lastReadPos.setStart(rec.getAlignmentStart());
+ }
+ }
+ // in case we're run out of reads, get out
+ else {
+ throw new StingException("Danger: weve run out reads in fastMappedReadSeek");
+ //return null;
+ }
+ bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);
+
+ // return the iterator
+ return bound;
}
+ /**
+ * Even though the iterator has seeked to the correct location, there may be multiple reads at that location,
+ * and we may have given some of them out already. Move the iterator to the correct location using the readsAtLastPos variable
+ *
+ * @param iter the iterator
+ */
+ private void correctForReadPileupSeek( StingSAMIterator iter ) {
+ // move the number of reads we read from the last pos
+ boolean atLeastOneReadSeen = false; // we have a problem where some chomesomes don't have a single read (i.e. the chrN_random chrom.)
+ while (iter.hasNext() && this.readsSeenAtLastPos > 0) {
+ iter.next();
+ --readsSeenAtLastPos;
+ atLeastOneReadSeen = true;
+ }
+ if (readsSeenAtLastPos != 0 && atLeastOneReadSeen) {
+ throw new SimpleDataSourceLoadException("Seek problem: reads at last position count != 0");
+ }
+ }
+
+
/**
* set the initial iterator
*
* @param readCount the number of reads
* @param iter the merging iterator
+ *
* @return a bounded read iterator at the first read position in the file.
*/
- private BoundedReadIterator InitialReadIterator(long readCount, MergingSamRecordIterator2 iter) {
+ private BoundedReadIterator InitialReadIterator( long readCount, CloseableIterator iter ) {
BoundedReadIterator bound;
- lastReadPos = new GenomeLoc(iter.getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, 0);
- iter.queryContained(lastReadPos.getContig(), 1, -1);
bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);
this.readsTaken = readCount;
return bound;
}
+}
+
+
+/**
+ * iterator generator
+ *
+ * This class generates iterators for the SAMDataSource. This class was introduced for testing purposes,
+ * since it became increasingly hard to test the SAM data source code. The class defines two abstraact
+ * methods:
+ *
+ * -seek( GenomeLoc ) which returns an iterator seeked to the genome loc, and if null is passed to the default
+ * location (which is implementation specific).
+ *
+ * -getHeader(), which returns a SAMFileHeader for the specified IteratorGenerator. I hope we can phase this
+ * method out, since it doesn't seem necessary, and it would be much cleaner with out it.
+ */
+abstract class IteratorGenerator {
+ /** our log, which we want to capture anything from this class */
+ protected static Logger logger = Logger.getLogger(SAMDataSource.class);
+
+ /**
+ * seek to a location
+ *
+ * @param seekTo the genome loc to seek to
+ *
+ * @return StingSAMIterator
+ */
+ public abstract CloseableIterator seek( GenomeLoc seekTo );
+
+ /**
+ * get the merged header
+ *
+ * @return the merged header
+ */
+ public abstract SAMFileHeader getHeader();
+
+ /**
+ * Load a SAM/BAM, given an input file.
+ *
+ * @param samFile the file name
+ *
+ * @return a SAMFileReader for the file, null if we're attempting to read a list
+ */
+ protected static SAMFileReader initializeSAMFile( final File samFile, SAMFileReader.ValidationStringency strictness ) {
+ if (samFile.toString().endsWith(".list")) {
+ return null;
+ } else {
+ SAMFileReader samReader = new SAMFileReader(samFile, true);
+ samReader.setValidationStringency(strictness);
+
+ final SAMFileHeader header = samReader.getFileHeader();
+ logger.debug(String.format("Sort order is: " + header.getSortOrder()));
+
+ return samReader;
+ }
+ }
+
/**
* A private function that, given the internal file list, generates a SamFileReader
* list of validated files.
@@ -389,11 +425,11 @@ public class SAMDataSource implements SimpleDataSource {
* @return a list of SAMFileReaders that represent the stored file names
* @throws SimpleDataSourceLoadException if there's a problem loading the files
*/
- private List GetReaderList() throws SimpleDataSourceLoadException {
+ protected static List GetReaderList( Reads reads, SAMFileReader.ValidationStringency strictness ) throws SimpleDataSourceLoadException {
// right now this is pretty damn heavy, it copies the file list into a reader list every time
List lst = new ArrayList();
- for (File f : this.samFileList) {
- SAMFileReader reader = initializeSAMFile(f);
+ for (File f : reads.getReadsFiles()) {
+ SAMFileReader reader = initializeSAMFile(f, strictness);
if (reader.getFileHeader().getReadGroups().size() < 1) {
//logger.warn("Setting header in reader " + f.getName());
@@ -412,4 +448,65 @@ public class SAMDataSource implements SimpleDataSource {
return lst;
}
+ /**
+ * create the merging header.
+ *
+ * @return a SamFileHeaderMerger that includes the set of SAM files we were created with
+ */
+ protected static SamFileHeaderMerger createHeaderMerger( Reads reads, SAMFileReader.ValidationStringency strictness, SAMFileHeader.SortOrder SORT_ORDER ) {
+ List lst = GetReaderList(reads, strictness);
+ return new SamFileHeaderMerger(lst, SORT_ORDER, true);
+ }
}
+
+
+/**
+ * MSR2IteratorGenerator
+ *
+ * generates a MerginsSAMIterator2, given a genomic location. The constructor takes the reads structure,
+ * and a flag indicating if we're dealing with reads or loci (to determine the correct query function).
+ */
+class MSR2IteratorGenerator extends IteratorGenerator {
+ /** our read pile */
+ private Reads reads;
+
+ private SamFileHeaderMerger header;
+
+ // How strict should we be with SAM/BAM parsing?
+ protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
+
+ // are we by reads or by loci
+ protected boolean byReads = true;
+
+ /** our SAM data files */
+ private final SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate;
+
+ public MSR2IteratorGenerator( Reads reads, boolean byReads ) {
+ this.reads = reads;
+ this.header = IteratorGenerator.createHeaderMerger(reads, strictness, sortOrder);
+ this.byReads = byReads;
+ }
+
+ public CloseableIterator seek( GenomeLoc seekTo ) {
+ SamFileHeaderMerger mg = createHeaderMerger(reads, strictness, sortOrder);
+ MergingSamRecordIterator2 iter = new MergingSamRecordIterator2(mg, reads);
+ if (seekTo != null) {
+ if (byReads)
+ iter.queryContained(seekTo.getContig(), (int) seekTo.getStart(), (int) seekTo.getStop());
+ else
+ iter.queryOverlapping(seekTo.getContig(), (int) seekTo.getStart(), (int) seekTo.getStop());
+ }
+ return iter;
+ }
+
+ /**
+ * get the merged header
+ *
+ * @return the merged header
+ */
+ public SAMFileHeader getHeader() {
+ return header.getMergedHeader();
+ }
+}
+
+
diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
index 8336007e9..eb6674105 100755
--- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@@ -75,11 +75,13 @@ public abstract class MicroScheduler {
protected MicroScheduler(Walker walker, Reads reads, File refFile, List> rods) {
if (walker instanceof ReadWalker) {
traversalEngine = new TraverseReads(reads.getReadsFiles(), refFile, rods);
+ this.reads = getReadsDataSource(reads,true);
} else {
traversalEngine = new TraverseLoci(reads.getReadsFiles(), refFile, rods);
+ this.reads = getReadsDataSource(reads,false);
}
- this.reads = getReadsDataSource(reads);
+
this.reference = openReferenceSequenceFile(refFile);
this.rods = getReferenceOrderedDataSources(rods);
}
@@ -159,12 +161,12 @@ public abstract class MicroScheduler {
* Gets a data source for the given set of reads.
* @return A data source for the given set of reads.
*/
- private SAMDataSource getReadsDataSource(Reads reads) {
+ private SAMDataSource getReadsDataSource(Reads reads, boolean byReads) {
// By reference traversals are happy with no reads. Make sure that case is handled.
if (reads.getReadsFiles().size() == 0)
return null;
- SAMDataSource dataSource = new SAMDataSource(reads);
+ SAMDataSource dataSource = new SAMDataSource(reads, byReads);
// Side effect: initialize the traversal engine with reads data.
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java
index 9f8d9c8fb..eb5fb0a48 100755
--- a/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java
+++ b/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java
@@ -7,20 +7,29 @@ import java.util.Iterator;
import org.broadinstitute.sting.gatk.Reads;
-/**
+/*
+ * Copyright (c) 2009 The Broad Institute
*
- * User: aaron
- * Date: Apr 14, 2009
- * Time: 5:27:31 PM
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
*
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
*
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*/
diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java
index 87f22ee96..12b022d44 100644
--- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java
+++ b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java
@@ -31,7 +31,7 @@ import java.util.PriorityQueue;
* iterable stream. The underlying iterators/files must all have the same sort order unless
* the requested output format is unsorted, in which case any combination is valid.
*/
-public class MergingSamRecordIterator2 implements CloseableIterator, Iterable {
+public class MergingSamRecordIterator2 implements CloseableIterator, Iterable, PeekingStingIterator {
protected PriorityQueue pq = null;
protected final SamFileHeaderMerger samHeaderMerger;
protected final SAMFileHeader.SortOrder sortOrder;
@@ -43,7 +43,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator,
* Constructs a new merging iterator with the same set of readers and sort order as
* provided by the header merger parameter.
*/
- public MergingSamRecordIterator2(final SamFileHeaderMerger headerMerger) {
+ public MergingSamRecordIterator2(final SamFileHeaderMerger headerMerger, Reads reads) {
this.samHeaderMerger = headerMerger;
this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
this.pq = new PriorityQueue(samHeaderMerger.getReaders().size());
@@ -275,6 +275,16 @@ public class MergingSamRecordIterator2 implements CloseableIterator,
public Iterator iterator() {
return this;
}
+
+ /**
+ * Gets source information for the reads. Contains information about the original reads
+ * files, plus information about downsampling, etc.
+ *
+ * @return
+ */
+ public Reads getSourceInfo() {
+ return null; //To change body of implemented methods use File | Settings | File Templates.
+ }
}
// Should replace picard class with the same name
diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/PeekingStingIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/PeekingStingIterator.java
new file mode 100644
index 000000000..8abf05409
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/iterators/PeekingStingIterator.java
@@ -0,0 +1,43 @@
+package org.broadinstitute.sting.gatk.iterators;
+
+import net.sf.samtools.SAMRecord;
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *
+ * Interface Peekable
+ *
+ * This interface indicates that
+ */
+public interface PeekingStingIterator extends StingSAMIterator {
+ /**
+ * peek, given the specified type
+ * @return
+ */
+ SAMRecord peek();
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
index f427117f6..7f029eeeb 100755
--- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
+++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
@@ -337,34 +337,7 @@ public abstract class TraversalEngine {
return true;
}
- protected Iterator initializeReads() {
- if( readsFiles.size() == 1 )
- samReader = initializeSAMFile(readsFiles.get(0));
- else
- samReader = null;
- return wrapReadsIterator(getReadsIterator(samReader), true);
- }
-
- protected Iterator getReadsIterator(final SAMFileReader samReader) {
- // If the file has an index, querying functions are available. Use them if possible...
- if ( samReader == null && readsFiles.size() > 0 ) {
- SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
-
- List readers = new ArrayList();
-
- for ( File readsFile: readsFiles ) {
- SAMFileReader reader = initializeSAMFile(readsFile);
- readers.add(reader);
- }
-
- SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers, SORT_ORDER);
- return new MergingSamRecordIterator2(headerMerger);
- }
- else {
- return samReader.iterator();
- }
- }
-
+
@Deprecated
protected StingSAMIterator wrapReadsIterator( final Iterator rawIterator, final boolean enableVerification ) {
// Reads sourceInfo is gone by this point in the traversal engine. Stub in a null and rely on the iterator to
diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java
index 25c033806..50266bb09 100755
--- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java
+++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java
@@ -246,39 +246,7 @@ public class TraverseDuplicates extends TraversalEngine {
return sum;
}
- // --------------------------------------------------------------------------------------------------------------
- //
- // old style interface to the system
- //
- // --------------------------------------------------------------------------------------------------------------
- @Override
- public T traverse(Walker walker, List locations) {
- if ( walker instanceof DuplicateWalker) {
- Walker x = walker;
- DuplicateWalker, ?> dupWalker = (DuplicateWalker, ?>)x;
- return (T)this.traverseByRead(dupWalker, locations);
- } else {
- throw new IllegalArgumentException("Walker isn't a duplicate walker!");
- }
- }
- /**
- * Should we deleted at the soonist possible opportunity
- */
- public Object traverseByRead(DuplicateWalker walker, List locations) {
- samReadIter = initializeReads();
-
- // Initialize the walker
- walker.initialize();
-
- // Initialize the sum
- FilteringIterator filterIter = new FilteringIterator(samReadIter, new duplicateStreamFilterFunc());
- T sum = actuallyTraverse(walker, filterIter, walker.reduceInit());
-
- //printOnTraversalDone("reads", sum);
- walker.onTraversalDone(sum);
- return sum;
- }
// --------------------------------------------------------------------------------------------------------------
//
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java
new file mode 100644
index 000000000..e371749f9
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java
@@ -0,0 +1,165 @@
+package org.broadinstitute.sting.utils.sam;
+
+import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator;
+import org.broadinstitute.sting.gatk.Reads;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMFileHeader;
+
+import java.util.Iterator;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/** this fake iterator allows us to look at how specific piles of reads are handled */
+public class ArtificialSAMIterator implements PeekingStingIterator {
+
+
+ protected int currentChromo = 0;
+ protected int currentRead = 0;
+ protected int totalReadCount = 0;
+ protected boolean done = false;
+ // the next record
+ protected SAMRecord next = null;
+ protected SAMFileHeader header = null;
+
+ // the passed in parameters
+ protected final int sChr;
+ protected final int eChromosomeCount;
+ protected final int rCount;
+ protected int uMappedReadCount;
+
+ // let us know to make a read, we need this to help out the fake sam query iterator
+ private boolean initialized = false;
+
+ /**
+ * create the fake iterator, given the mapping of chromosomes and read counts
+ *
+ * @param startingChr the starting chromosome
+ * @param endingChr the ending chromosome
+ * @param readCount the number of reads in each chromosome
+ * @param header the associated header
+ */
+ ArtificialSAMIterator( int startingChr, int endingChr, int readCount, SAMFileHeader header ) {
+ sChr = startingChr;
+ eChromosomeCount = (endingChr - startingChr) + 1;
+ rCount = readCount;
+ this.header = header;
+ this.currentChromo = 0;
+ uMappedReadCount = 0;
+ }
+
+ /**
+ * create the fake iterator, given the mapping of chromosomes and read counts
+ *
+ * @param startingChr the starting chromosome
+ * @param endingChr the ending chromosome
+ * @param readCount the number of reads in each chromosome
+ * @param header the associated header
+ */
+ ArtificialSAMIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount, SAMFileHeader header ) {
+ sChr = startingChr;
+ eChromosomeCount = (endingChr - startingChr) + 1;
+ rCount = readCount;
+ this.header = header;
+ this.currentChromo = 0;
+ this.uMappedReadCount = unmappedReadCount;
+ }
+
+
+ public Reads getSourceInfo() {
+ throw new UnsupportedOperationException("We don't support this");
+ }
+
+ public void close() {
+ // done
+ currentChromo = Integer.MAX_VALUE;
+ }
+
+ public boolean hasNext() {
+ if (!initialized){
+ initialized = true;
+ createNextRead();
+ }
+ if (this.next != null) {
+ return true;
+ }
+ return false;
+ }
+
+ private boolean createNextRead() {
+ if (currentRead >= rCount) {
+ currentChromo++;
+ currentRead = 0;
+ }
+ // check for end condition, have we finished the chromosome listing, and have no unmapped reads
+ if (currentChromo >= eChromosomeCount) {
+ if (uMappedReadCount < 1) {
+ this.next = null;
+ return false;
+ } else {
+ ++totalReadCount;
+ this.next = ArtificialSamUtils.createArtificialRead(this.header, String.valueOf(totalReadCount), -1, -1, 50);
+ --uMappedReadCount;
+ return true;
+ }
+ }
+ ++totalReadCount;
+ this.next = ArtificialSamUtils.createArtificialRead(this.header, String.valueOf(totalReadCount), currentChromo, currentRead, 50);
+ ++currentRead;
+ return true;
+ }
+
+
+ public SAMRecord next() {
+ SAMRecord ret = next;
+ createNextRead();
+ return ret;
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException("You've tried to remove on a StingSAMIterator (unsupported), not to mention that this is a fake iterator.");
+ }
+
+ public Iterator iterator() {
+ return this;
+ }
+
+ /**
+ * some instrumentation methods
+ */
+ public int readsTaken() {
+ return totalReadCount;
+ }
+
+ /**
+ * peek at the next sam record
+ *
+ * @return
+ */
+ public SAMRecord peek() {
+ return this.next;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java
new file mode 100644
index 000000000..c6f7793be
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java
@@ -0,0 +1,181 @@
+package org.broadinstitute.sting.utils.sam;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMSequenceRecord;
+import net.sf.samtools.SAMRecord;
+
+import java.util.List;
+
+import org.broadinstitute.sting.utils.StingException;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *
+ * Class ArtificialSAMQueryIterator
+ *
+ * A descriptions should go here. Blame aaron if it's missing.
+ */
+public class ArtificialSAMQueryIterator extends ArtificialSAMIterator {
+
+ // get the next positon
+ protected int finalPos = 0;
+ protected int startPos = 0;
+ protected int contigIndex = -1;
+ protected boolean overlapping = false;
+ protected int startingChr = 0;
+ protected boolean seeked = false;
+
+ /**
+ * create the fake iterator, given the mapping of chromosomes and read counts
+ *
+ * @param startingChr the starting chromosome
+ * @param endingChr the ending chromosome
+ * @param readCount the number of reads in each chromosome
+ * @param header the associated header
+ */
+ ArtificialSAMQueryIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount, SAMFileHeader header ) {
+ super(startingChr, endingChr, readCount, unmappedReadCount, header);
+ this.startingChr = startingChr;
+ }
+
+ /**
+ * query containing - get reads contained by the specified interval
+ *
+ * @param contig the contig index string
+ * @param start the start position
+ * @param stop the stop position
+ */
+ public void queryContained( String contig, int start, int stop ) {
+ this.overlapping = false;
+ initialize(contig, start, stop);
+ }
+
+ /**
+ * query containing - get reads contained by the specified interval
+ *
+ * @param contig the contig index string
+ * @param start the start position
+ * @param stop the stop position
+ */
+ public void queryOverlapping( String contig, int start, int stop ) {
+ this.overlapping = true;
+ initialize(contig, start, stop);
+ }
+
+
+ /**
+ * initialize the query iterator
+ *
+ * @param contig the contig
+ * @param start the start position
+ * @param stop the stop postition
+ */
+ private void initialize( String contig, int start, int stop ) {
+ ensureUntouched();
+ finalPos = stop;
+ startPos = start;
+ if (finalPos < 0) {
+ finalPos = Integer.MAX_VALUE;
+ }
+ // sanity check that we have the contig
+ contigIndex = -1;
+ List list = header.getSequenceDictionary().getSequences();
+ for (SAMSequenceRecord rec : list) {
+ if (rec.getSequenceName().equals(contig)) {
+ contigIndex = rec.getSequenceIndex();
+ }
+ }
+ if (contigIndex < 0) { throw new IllegalArgumentException("Contig" + contig + " doesn't exist"); }
+ while (super.hasNext() && this.peek().getReferenceIndex() < contigIndex) {
+ super.next();
+ }
+ if (!super.hasNext()) {
+ throw new StingException("Unable to find the target chromosome");
+ }
+ while (super.hasNext() && this.peek().getAlignmentStart() < start) {
+ super.next();
+ }
+ // sanity check that we have an actual matching read next
+ SAMRecord rec = this.peek();
+ if (!matches(rec)) {
+ throw new StingException("The next read doesn't match");
+ }
+ // set the seeked variable to true
+ seeked = true;
+ }
+
+ /**
+ * given a read and the query type, check if it matches our regions
+ *
+ * @param rec the read
+ *
+ * @return true if it belongs in our region
+ */
+ public boolean matches( SAMRecord rec ) {
+ if (rec.getReferenceIndex() != this.contigIndex) {
+ return false;
+ }
+ if (!overlapping) {
+ // if the start or the end are somewhere within our range
+ if (( rec.getAlignmentStart() >= startPos && rec.getAlignmentEnd() <= finalPos )) {
+ return true;
+ }
+ } else {
+ if (( rec.getAlignmentStart() <= finalPos && rec.getAlignmentStart() >= startPos ) ||
+ ( rec.getAlignmentEnd() <= finalPos && rec.getAlignmentEnd() >= startPos )) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+
+ /**
+ * override the hasNext, to incorportate our limiting factor
+ *
+ * @return
+ */
+ public boolean hasNext() {
+ boolean res = super.hasNext();
+ if (!seeked) {
+ return res;
+ }
+ if (res && matches(this.next)) {
+ return true;
+ }
+ return false;
+ }
+
+ /** make sure we haven't been used as an iterator yet; this is to miror the MergingSamIterator2 action. */
+ public void ensureUntouched() {
+ if (this.currentChromo != 0 || this.currentRead > 0) {
+ throw new UnsupportedOperationException("We've already been used as an iterator; you can't query after that");
+ }
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSamUtils.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSamUtils.java
index 30c6897e1..953862ab9 100755
--- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSamUtils.java
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSamUtils.java
@@ -5,7 +5,7 @@ import net.sf.samtools.*;
import java.io.File;
import java.util.*;
-import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
+import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator;
import org.broadinstitute.sting.gatk.Reads;
/**
@@ -28,11 +28,6 @@ import org.broadinstitute.sting.gatk.Reads;
/**
* @author aaron
* @version 1.0
- * @date May 21, 2009
- *
- * Class samUtils
- *
- * A collection of utilities for making sam and bam files. Mostly these are for creating sam and bam files for testing purposes.
*/
public class ArtificialSamUtils {
public static final int DEFAULT_READ_LENGTH = 50;
@@ -46,7 +41,7 @@ public class ArtificialSamUtils {
* @param chromosomeSize how large each chromosome is
* @param readsPerChomosome how many reads to make in each chromosome. They'll be aligned from position 1 to x (which is the number of reads)
*/
- public static void createArtificialBamFile(String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome) {
+ public static void createArtificialBamFile( String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome ) {
SAMFileHeader header = createArtificialSamHeader(numberOfChromosomes, startingChromosome, chromosomeSize);
File outFile = new File(filename);
@@ -70,7 +65,7 @@ public class ArtificialSamUtils {
* @param chromosomeSize how large each chromosome is
* @param readsPerChomosome how many reads to make in each chromosome. They'll be aligned from position 1 to x (which is the number of reads)
*/
- public static void createArtificialSamFile(String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome) {
+ public static void createArtificialSamFile( String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome ) {
SAMFileHeader header = createArtificialSamHeader(numberOfChromosomes, startingChromosome, chromosomeSize);
File outFile = new File(filename);
@@ -91,14 +86,15 @@ public class ArtificialSamUtils {
* @param numberOfChromosomes the number of chromosomes to create
* @param startingChromosome the starting number for the chromosome (most likely set to 1)
* @param chromosomeSize the length of each chromosome
+ *
* @return
*/
- public static SAMFileHeader createArtificialSamHeader(int numberOfChromosomes, int startingChromosome, int chromosomeSize) {
+ public static SAMFileHeader createArtificialSamHeader( int numberOfChromosomes, int startingChromosome, int chromosomeSize ) {
SAMFileHeader header = new SAMFileHeader();
SAMSequenceDictionary dict = new SAMSequenceDictionary();
// make up some sequence records
for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) {
- SAMSequenceRecord rec = new SAMSequenceRecord("chr" + (x), chromosomeSize /* size */);
+ SAMSequenceRecord rec = new SAMSequenceRecord("chr" + ( x ), chromosomeSize /* size */);
rec.setSequenceLength(chromosomeSize);
dict.addSequence(rec);
}
@@ -114,9 +110,10 @@ public class ArtificialSamUtils {
* @param refIndex the reference index, i.e. what chromosome to associate it with
* @param alignmentStart where to start the alignment
* @param length the length of the read
+ *
* @return the artificial read
*/
- public static SAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) {
+ public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) {
SAMRecord record = new SAMRecord(header);
record.setReadName(name);
record.setReferenceIndex(refIndex);
@@ -131,95 +128,64 @@ public class ArtificialSamUtils {
/**
* create an iterator containing the specified read piles
+ *
* @param startingChr the chromosome (reference ID) to start from
- * @param endingChr the id to end with
- * @param readCount the number of reads per chromosome
+ * @param endingChr the id to end with
+ * @param readCount the number of reads per chromosome
+ *
* @return StingSAMIterator representing the specified amount of fake data
*/
- public static StingSAMIterator unmappedReadIterator(int startingChr, int endingChr, int readCount ) {
- SAMFileHeader header = createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
- Map map = new HashMap();
- for (int x = startingChr; x < endingChr; x++) {
- map.put(x,readCount);
- }
- return new ArtificialSAMIterator(startingChr, endingChr, readCount,header);
+ public static PeekingStingIterator unmappedReadIterator( int startingChr, int endingChr, int readCount ) {
+ SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
+
+ return new ArtificialSAMIterator(startingChr, endingChr, readCount, header);
}
-
-}
-
-/**
- * this fake iterator allows us to look at how specific piles of reads are handled
- */
-class ArtificialSAMIterator implements StingSAMIterator {
-
-
- private int currentChromo = 0;
- private int currentRead = 0;
- private int readCount = 0;
- private boolean done = false;
- // the next record
- private SAMRecord next = null;
- SAMFileHeader header = null;
-
- // the passed in parameters
- final int sChr;
- final int eChr;
- final int rCount;
-
/**
- * create the fake iterator, given the mapping of chromosomes and read counts
- * @param startingChr the starting chromosome
- * @param endingChr the ending chromosome
- * @param readCount the number of reads in each chromosome
- * @param header the associated header
+ * create an iterator containing the specified read piles
+ *
+ * @param startingChr the chromosome (reference ID) to start from
+ * @param endingChr the id to end with
+ * @param readCount the number of reads per chromosome
+ * @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file
+ *
+ * @return StingSAMIterator representing the specified amount of fake data
*/
- ArtificialSAMIterator(int startingChr, int endingChr, int readCount, SAMFileHeader header) {
- sChr = startingChr;
- eChr = endingChr;
- rCount = readCount;
- this.header = header;
- this.currentChromo = startingChr;
+ public static PeekingStingIterator unmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) {
+ SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
+
+ return new ArtificialSAMIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
}
- public Reads getSourceInfo() {
- throw new UnsupportedOperationException("We don't support this");
+ /**
+ * create an ArtificialSAMQueryIterator containing the specified read piles
+ *
+ * @param startingChr the chromosome (reference ID) to start from
+ * @param endingChr the id to end with
+ * @param readCount the number of reads per chromosome
+ *
+ * @return StingSAMIterator representing the specified amount of fake data
+ */
+ public static ArtificialSAMQueryIterator queryReadIterator( int startingChr, int endingChr, int readCount ) {
+ SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
+
+ return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, 0, header);
}
- public void close() {
- // done
- currentChromo = Integer.MAX_VALUE;
- }
+ /**
+ * create an ArtificialSAMQueryIterator containing the specified read piles
+ *
+ * @param startingChr the chromosome (reference ID) to start from
+ * @param endingChr the id to end with
+ * @param readCount the number of reads per chromosome
+ * @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file
+ *
+ * @return StingSAMIterator representing the specified amount of fake data
+ */
+ public static ArtificialSAMQueryIterator queryReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) {
+ SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
- public boolean hasNext() {
- if (currentRead >= rCount) {
- currentChromo++;
- currentRead = 0;
- }
- if (currentChromo >= eChr) {
- return false;
- }
- int alignment = 0;
- if (currentChromo >= 0) {
- alignment = currentRead;
- } else {
- alignment = 0;
- }
-
- this.next = ArtificialSamUtils.createArtificialRead(this.header,String.valueOf(readCount), currentChromo, alignment, 50);
- currentRead++;
- return true;
- }
-
- public SAMRecord next() {
- return next;
- }
-
- public void remove() {
- throw new UnsupportedOperationException("You've tried to remove on a StingSAMIterator (unsupported), not to mention that this is a fake iterator.");
- }
-
- public Iterator iterator() {
- return this;
+ return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
}
}
+
diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategyTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategyTest.java
index 4330ea46a..2b6a49ad4 100755
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategyTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategyTest.java
@@ -43,6 +43,12 @@ import net.sf.samtools.SAMFileHeader;
* Class ReadIntervalShardStrategyTest
*
* Tests the ReadIntervalShardStrategy class
+ *
+ *
+ * TODO: this test has been changed, since we now force interval shards to not subdivide if they're large,
+ * so you should always get one shard back, reguardless of the specified length. If this behavior changes
+ * back in the future, the expected number of shards should change from 1 to > 1.
+ *
*/
public class IntervalShardStrategyTest extends BaseTest {
@@ -75,7 +81,7 @@ public class IntervalShardStrategyTest extends BaseTest {
counter++;
}
assertTrue(d instanceof IntervalShard);
- assertEquals(10, counter);
+ assertEquals(1, counter);
}
@Test
@@ -92,7 +98,7 @@ public class IntervalShardStrategyTest extends BaseTest {
counter++;
}
assertTrue(d instanceof IntervalShard);
- assertEquals(50, counter);
+ assertEquals(5, counter);
}
@Test
@@ -105,16 +111,11 @@ public class IntervalShardStrategyTest extends BaseTest {
int counter = 0;
while (strat.hasNext()) {
Shard d = strat.next();
- if (counter % 2 == 0) {
assertEquals(1, d.getGenomeLoc().getStart());
- assertEquals(789, d.getGenomeLoc().getStop());
- } else {
- assertEquals(790, d.getGenomeLoc().getStart());
assertEquals(1000, d.getGenomeLoc().getStop());
- }
counter++;
}
- assertEquals(10, counter);
+ assertEquals(5, counter);
}
diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java
index 8b192feda..aff387c0b 100755
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java
@@ -89,7 +89,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
Reads reads = new Reads(fl);
try {
- SAMDataSource data = new SAMDataSource(reads);
+ SAMDataSource data = new SAMDataSource(reads,false);
for (Shard sh : strat) {
int readCount = 0;
count++;
@@ -139,7 +139,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
int count = 0;
try {
- SAMDataSource data = new SAMDataSource(reads);
+ SAMDataSource data = new SAMDataSource(reads,false);
for (Shard sh : strat) {
int readCount = 0;
count++;
@@ -176,7 +176,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
logger.debug("Pile two:");
try {
- SAMDataSource data = new SAMDataSource(reads);
+ SAMDataSource data = new SAMDataSource(reads,false);
for (Shard sh : strat) {
int readCount = 0;
count++;
diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMByReadsTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMByReadsTest.java
index 5edcd0394..2c6e456e3 100755
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMByReadsTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMByReadsTest.java
@@ -2,15 +2,20 @@ package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
import static junit.framework.Assert.fail;
import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
+import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
+import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMQueryIterator;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
import org.junit.Before;
import org.junit.Test;
@@ -38,16 +43,14 @@ import java.util.List;
/**
* @author aaron
* @version 1.0
- * @date Apr 15, 2009
- *
- * Class SAMByReadsTest
- *
- * Test sam traversal by reads FIX ME
*/
public class SAMByReadsTest extends BaseTest {
- private FastaSequenceFile2 seq;
+
private List fl;
+ ShardStrategy shardStrategy;
+ Reads reads;
+ private int targetReadCount = 14;
/**
* This function does the setup of our parser, before each method call.
@@ -59,56 +62,42 @@ public class SAMByReadsTest extends BaseTest {
fl = new ArrayList();
// sequence
- seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly17/v0/Homo_sapiens_assembly17.fasta"));
- GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
+ //seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly17/v0/Homo_sapiens_assembly17.fasta"));
+ //GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
+
+ // setup the test files
+ fl.add(new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"));
+ reads = new Reads(fl);
+
+
}
/** Test out that we can shard the file and iterate over every read */
@Test
- public void testTotalReadCount() {
- logger.warn("Executing testTotalReadCount");
-
- // setup the test files
- fl.add(new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"));
- Reads reads = new Reads(fl);
-
- final int targetReadCount = 5000;
-
- ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,seq.getSequenceDictionary(),targetReadCount);
-
+ public void testToUnmappedReads() {
+ ArtificialIteratorGenerator gen = new ArtificialIteratorGenerator(1,10,100,1000);
+ GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
try {
- SAMDataSource data = new SAMDataSource(reads);
-
- // check the total read count
- final int totalReads = 10000;
-
- int readsSeen = 0;
- BoundedReadIterator iter;
-
-
- for (Shard sd : shardStrategy) {
- int readcnt = 0;
-
- iter = (BoundedReadIterator)data.seek(sd);
-
- for (SAMRecord r : iter) {
-
- readcnt++;
-
+ int unmappedReadsSeen = 0;
+ int iterations = 0;
+ SAMDataSource data = new SAMDataSource(reads,true);
+ for (int x = 0; x < 10; x++) {
+ ++iterations;
+ PeekingStingIterator iter = ArtificialSamUtils.unmappedReadIterator(1, 100, 10, 1000);
+ BoundedReadIterator ret = data.toUnmappedReads(100, iter);
+ // count the reads we've gotten back
+ if (ret == null) {
+ fail("On iteration " + iterations + " we were returned a null pointer, after seeing " + unmappedReadsSeen + " reads out of a 1000");
+ }
+ while (ret.hasNext()) {
+ ret.next();
+ unmappedReadsSeen++;
}
-
-
- readsSeen += readcnt;
- //logger.warn("Seen " + readsSeen + " reads.");
-
}
- // make sure we've seen all the reads
- assertEquals(totalReads,readsSeen);
- logger.warn("Success " + readsSeen + " equals target count of " + totalReads);
-
+ assertEquals(1000,unmappedReadsSeen);
}
-
+
catch (SimpleDataSourceLoadException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
@@ -116,4 +105,145 @@ public class SAMByReadsTest extends BaseTest {
}
+
+ /** Test out that we can shard the file and iterate over every read */
+ @Test
+ public void testShardingOfReadsSize14() {
+ ArtificialIteratorGenerator gen = new ArtificialIteratorGenerator(1,10,100,1000);
+ GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
+ targetReadCount = 14;
+ try {
+ int iterations = 0;
+ int readCount = 0;
+ SAMDataSource data = new SAMDataSource(reads,true);
+
+
+ data.setIterGen(gen);
+ shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS, gen.getHeader().getSequenceDictionary(), targetReadCount);
+ while (shardStrategy.hasNext()) {
+
+
+ BoundedReadIterator ret = (BoundedReadIterator)data.seek(shardStrategy.next());
+ assertTrue(ret != null);
+ while (ret.hasNext()) {
+ ret.next();
+ readCount++;
+ }
+ ret.close();
+ iterations++;
+ }
+
+ // assert that we saw 2000 reads
+ assertEquals(2000,readCount);
+
+ /**
+ * this next assertion is based on the following logic:
+ * 14 reads per shard = 8 shards per each 100 read chromosome
+ * 10 chromosomes = 8 * 10 = 80
+ * 1000 unmapped reads / 14 = 72
+ * 1 iteration at the end to know we're done
+ * 80 + 72 + 1 = 153
+ */
+ assertEquals(153,iterations);
+
+ }
+
+ catch (SimpleDataSourceLoadException e) {
+ e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
+ fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
+ }
+
+
+ }
+
+ /** Test out that we can shard the file and iterate over every read */
+ @Test
+ public void testShardingOfReadsSize25() {
+ ArtificialIteratorGenerator gen = new ArtificialIteratorGenerator(1,10,100,1000);
+ GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
+ targetReadCount = 25;
+ try {
+ int iterations = 0;
+ int readCount = 0;
+ SAMDataSource data = new SAMDataSource(reads,true);
+
+
+ data.setIterGen(gen);
+ shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS, gen.getHeader().getSequenceDictionary(), targetReadCount);
+ while (shardStrategy.hasNext()) {
+
+
+ BoundedReadIterator ret = (BoundedReadIterator)data.seek(shardStrategy.next());
+ assertTrue(ret != null);
+ while (ret.hasNext()) {
+ ret.next();
+ readCount++;
+ }
+ ret.close();
+ iterations++;
+ }
+
+ // assert that we saw 2000 reads
+ assertEquals(2000,readCount);
+
+ /**
+ * this next assertion is based on the following logic:
+ * 25 reads per shard = 5 shards (1 on the end to realize we're done)
+ * 10 chromosomes = 5 * 10 = 50
+ * 1000 unmapped reads / 25 = 40
+ * 1 iteration at the end to know we're done
+ * 50 + 40 + 1 = 91
+ */
+ assertEquals(91,iterations);
+
+ }
+
+ catch (SimpleDataSourceLoadException e) {
+ e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
+ fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
+ }
+
+
+ }
+
+
}
+
+/**
+ * use this to inject into SAMDataSource for testing
+ */
+class ArtificialIteratorGenerator extends IteratorGenerator {
+ // How strict should we be with SAM/BAM parsing?
+ protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
+
+ // the header
+ private SAMFileHeader header;
+ private int endingChr;
+ private int startingChr;
+ private int readCount;
+ private int readSize;
+ /** our SAM data files */
+ private final SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate;
+
+ public ArtificialIteratorGenerator( int startingChr, int endingChr, int readCount, int readSize) {
+ header = ArtificialSamUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + readSize);
+
+ }
+
+ public CloseableIterator seek( GenomeLoc seekTo ) {
+ ArtificialSAMQueryIterator iter = ArtificialSamUtils.queryReadIterator(1, 10, 100, 1000);
+ if (seekTo != null) {
+ iter.queryContained(seekTo.getContig(), (int)seekTo.getStart(), (int)seekTo.getStop());
+ }
+ return iter;
+ }
+
+ /**
+ * get the merged header
+ *
+ * @return the merged header
+ */
+ public SAMFileHeader getHeader() {
+ return this.header;
+ }
+}
\ No newline at end of file
diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java
index 40db61131..3ecc8df76 100755
--- a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java
@@ -85,7 +85,7 @@ public class BoundedReadIteratorTest extends BaseTest {
long shardReadCount = 0;
try {
- SAMDataSource data = new SAMDataSource(reads);
+ SAMDataSource data = new SAMDataSource(reads,true);
// make sure we have a shard
if (!strat.hasNext()) {
diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java
index d1023c8cd..4e9707e98 100755
--- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java
@@ -122,7 +122,7 @@ public class TraverseReadsTest extends BaseTest {
ref.getSequenceDictionary(),
readSize);
- SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
+ SAMDataSource dataSource = new SAMDataSource(new Reads(bamList),true);
dataSource.viewUnmappedReads(false);
countReadWalker.initialize();
@@ -170,7 +170,7 @@ public class TraverseReadsTest extends BaseTest {
ref.getSequenceDictionary(),
readSize);
- SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
+ SAMDataSource dataSource = new SAMDataSource(new Reads(bamList),true);
dataSource.viewUnmappedReads(true);
countReadWalker.initialize();
diff --git a/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIteratorTest.java b/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIteratorTest.java
new file mode 100644
index 000000000..8ad9c15ba
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIteratorTest.java
@@ -0,0 +1,115 @@
+package org.broadinstitute.sting.utils.sam;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import net.sf.samtools.SAMRecord;
+import static junit.framework.Assert.assertTrue;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *
+ * Class ArtificialSAMQueryIteratorTest
+ *
+ * a test for the ArtificialSAMQueryIterator class.
+ */
+public class ArtificialSAMQueryIteratorTest extends BaseTest {
+
+ @Test
+ public void testWholeChromosomeQuery() {
+ ArtificialSAMQueryIterator iter = ArtificialSamUtils.queryReadIterator(1, 2, 100);
+ iter.queryContained("chr1", 1, -1);
+ int count = 0;
+ while (iter.hasNext()) {
+ SAMRecord rec = iter.next();
+ count++;
+ }
+ assertEquals(100, count);
+
+ }
+
+ @Test
+ public void testContainedQueryStart() {
+ ArtificialSAMQueryIterator iter = ArtificialSamUtils.queryReadIterator(1, 2, 100);
+ iter.queryContained("chr1", 1, 50);
+ int count = 0;
+ while (iter.hasNext()) {
+ SAMRecord rec = iter.next();
+ count++;
+ }
+ assertEquals(1, count);
+
+ }
+
+ @Test
+ public void testOverlappingQueryStart() {
+ ArtificialSAMQueryIterator iter = ArtificialSamUtils.queryReadIterator(1, 2, 100);
+ iter.queryOverlapping("chr1", 1, 50);
+ int count = 0;
+ while (iter.hasNext()) {
+ SAMRecord rec = iter.next();
+ count++;
+ }
+ assertEquals(50, count);
+
+ }
+
+ @Test
+ public void testContainedQueryMiddle() {
+ ArtificialSAMQueryIterator iter = ArtificialSamUtils.queryReadIterator(1, 2, 100);
+ iter.queryContained("chr1", 25, 74);
+ int count = 0;
+ while (iter.hasNext()) {
+ SAMRecord rec = iter.next();
+ count++;
+ }
+ assertEquals(1, count);
+
+ }
+
+ @Test
+ public void testOverlappingQueryMiddle() {
+ ArtificialSAMQueryIterator iter = ArtificialSamUtils.queryReadIterator(1, 2, 100);
+ iter.queryOverlapping("chr1", 25, 74);
+ int count = 0;
+ while (iter.hasNext()) {
+ SAMRecord rec = iter.next();
+ count++;
+ }
+ assertEquals(50, count);
+
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testUnknownChromosome() {
+ ArtificialSAMQueryIterator iter = ArtificialSamUtils.queryReadIterator(1, 2, 100);
+ iter.queryOverlapping("chr621", 25, 74);
+ }
+}
diff --git a/java/test/org/broadinstitute/sting/utils/sam/ArtificialSamUtilsTest.java b/java/test/org/broadinstitute/sting/utils/sam/ArtificialSamUtilsTest.java
new file mode 100644
index 000000000..558c47b74
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/utils/sam/ArtificialSamUtilsTest.java
@@ -0,0 +1,109 @@
+package org.broadinstitute.sting.utils.sam;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
+import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+import static junit.framework.Assert.assertTrue;
+import net.sf.samtools.SAMRecord;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: aaronmckenna
+ * Date: Jun 3, 2009
+ * Time: 3:09:34 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public class ArtificialSamUtilsTest extends BaseTest {
+
+
+ @Test
+ public void basicReadIteratorTest() {
+ StingSAMIterator iter = ArtificialSamUtils.unmappedReadIterator(1, 100, 100);
+ int count = 0;
+ while (iter.hasNext()) {
+ SAMRecord rec = iter.next();
+ count++;
+ }
+ assertEquals(100 * 100, count);
+ }
+
+ @Test
+ public void tenPerChromosome() {
+ StingSAMIterator iter = ArtificialSamUtils.unmappedReadIterator(1, 100, 10);
+ int count = 0;
+ while (iter.hasNext()) {
+ SAMRecord rec = iter.next();
+
+ assertEquals(Integer.valueOf(Math.round(count / 10)), rec.getReferenceIndex());
+ count++;
+ }
+ assertEquals(100 * 10, count);
+ }
+
+ @Test
+ public void onePerChromosome() {
+ StingSAMIterator iter = ArtificialSamUtils.unmappedReadIterator(1, 100, 1);
+ int count = 0;
+ while (iter.hasNext()) {
+ SAMRecord rec = iter.next();
+
+ assertEquals(Integer.valueOf(count), rec.getReferenceIndex());
+ count++;
+ }
+ assertEquals(100 * 1, count);
+ }
+
+ @Test
+ public void basicUnmappedIteratorTest() {
+ StingSAMIterator iter = ArtificialSamUtils.unmappedReadIterator(1, 100, 100, 1000);
+ int count = 0;
+ for (int x = 0; x < (100* 100); x++ ) {
+ if (!iter.hasNext()) {
+ fail ("we didn't get the expected number of reads");
+ }
+ SAMRecord rec = iter.next();
+ assertTrue(rec.getReferenceIndex() >= 0);
+ count++;
+ }
+ assertEquals(count, 100 * 100);
+
+ // now we should have 1000 unmapped reads
+ count = 0;
+ while (iter.hasNext()) {
+ SAMRecord rec = iter.next();
+ assertTrue(rec.getReferenceIndex() < 0);
+ count++;
+ }
+ assertEquals(1000, count);
+ }
+
+ @Test
+ public void testPeeking() {
+ PeekingStingIterator iter = ArtificialSamUtils.unmappedReadIterator(1, 100, 100);
+ int count = 0;
+ while (iter.hasNext()) {
+ int readCnt = ((ArtificialSAMIterator)(iter)).readsTaken();
+
+ // peek the record
+ SAMRecord rec = iter.peek();
+ assertTrue(rec.getReferenceIndex() >= 0);
+
+ // next the record
+ SAMRecord rec2 = iter.next();
+ assertTrue(rec2.getReadName() == rec.getReadName());
+ assertTrue(rec2.getAlignmentStart() == rec.getAlignmentStart());
+
+ // find out how many reads we've taken now
+ int readCnt2 = ((ArtificialSAMIterator)(iter)).readsTaken();
+
+ count++;
+ if (count < 100*100) assertEquals(readCnt + 1, readCnt2);
+ else assertEquals(readCnt, readCnt2);
+ }
+ assertEquals(100 * 100, count );
+
+ }
+}