diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java
index b3085a523..bb278ba54 100755
--- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java
@@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import net.sf.picard.sam.SamFileHeaderMerger;
-import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMReadGroupRecord;
@@ -21,24 +20,43 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Iterator;
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
/**
* User: aaron
* Date: Mar 26, 2009
* Time: 2:36:16 PM
- *
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ *
+ * Converts shards to SAM iterators over the specified region
*/
public class SAMDataSource implements SimpleDataSource {
/** Backing support for reads. */
- private Reads reads = null;
+ private final Reads reads;
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
@@ -62,7 +80,7 @@ public class SAMDataSource implements SimpleDataSource {
/**
* constructor, given sam files
*
- * @param reads the list of sam files
+ * @param reads the list of sam files
* @param byReads are we a by reads traversal, or a loci traversal. We could delete this field
* if we passed in iterGen, which would be a better (although more complicated for the
* consumers of SAMDataSources).
@@ -79,11 +97,12 @@ public class SAMDataSource implements SimpleDataSource {
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
}
}
- iteratorPool = new SAMIteratorPool(reads,byReads);
+ iteratorPool = new SAMIteratorPool(reads, byReads);
}
/**
* For unit testing, add a custom iterator pool.
+ *
* @param iteratorPool Custom mock iterator pool.
*/
void setResourcePool( SAMIteratorPool iteratorPool ) {
@@ -100,7 +119,7 @@ public class SAMDataSource implements SimpleDataSource {
*
* @return an iterator for that region
*/
- public StingSAMIterator seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException {
+ public StingSAMIterator seekLocus( GenomeLoc location ) throws SimpleDataSourceLoadException {
return iteratorPool.iterator(location);
}
@@ -180,7 +199,7 @@ public class SAMDataSource implements SimpleDataSource {
iter.close();
}
iter = iteratorPool.iterator(null);
- bound = toUnmappedReads(shard.getSize(), iter);
+ bound = toUnmappedReads(shard.getSize(), (QueryIterator) iter);
}
if (bound == null) {
shard.signalDone();
@@ -211,41 +230,24 @@ public class SAMDataSource implements SimpleDataSource {
* @return the bounded iterator that you can use to get the intervaled reads from
* @throws SimpleDataSourceLoadException
*/
- BoundedReadIterator toUnmappedReads( long readCount, StingSAMIterator iter ) throws SimpleDataSourceLoadException {
- PeekableIterator peekable = new PeekableIterator(iter);
+ BoundedReadIterator toUnmappedReads( long readCount, QueryIterator iter ) throws SimpleDataSourceLoadException {
+ iter.queryUnmappedReads();
int count = 0;
- int cnt = 0;
- SAMRecord d = null;
- while (peekable.hasNext()) {
- d = peekable.peek();
- int x = d.getReferenceIndex();
- if (x < 0)
- // we have the magic read that starts the unmapped read segment!
- break;
- cnt++;
- peekable.next();
- }
-
- // check to see what happened, did we run out of reads?
- if (!peekable.hasNext()) {
- return null;
- }
-
// now walk until we've taken the unmapped read count
- while (peekable.hasNext() && count < this.readsTaken) {
- peekable.next();
+ while (iter.hasNext() && count < this.readsTaken) {
+ iter.next();
count++;
}
// check to see what happened, did we run out of reads?
- if (!peekable.hasNext()) {
+ if (!iter.hasNext()) {
return null;
}
// we're not out of unmapped reads, so increment our read cout
this.readsTaken += readCount;
- return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, peekable), readCount);
+ return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);
}
@@ -356,77 +358,104 @@ public class SAMDataSource implements SimpleDataSource {
}
-class SAMIteratorPool extends ResourcePool {
- /**
- * Source information about the reads.
- */
+class SAMIteratorPool extends ResourcePool {
+ /** Source information about the reads. */
protected Reads reads;
- /**
- * Is this a by-reads traversal or a by-locus?
- */
+ /** Is this a by-reads traversal or a by-locus? */
protected boolean byReads;
- /**
- * File header for the combined file.
- */
+ /** File header for the combined file. */
protected SAMFileHeader header;
/** our log, which we want to capture anything from this class */
- protected static Logger logger = Logger.getLogger(SAMIteratorPool.class);
+ protected static Logger logger = Logger.getLogger(SAMIteratorPool.class);
public SAMIteratorPool( Reads reads, boolean byReads ) {
this.reads = reads;
this.byReads = byReads;
- SamFileHeaderMerger merger = createNewResource( null );
+ SamFileHeaderMerger merger = createNewResource(null);
this.header = merger.getMergedHeader();
// Add this resource to the pool.
- this.addNewResource( merger );
+ this.addNewResource(merger);
}
- /**
- * Get the combined header for all files in the iterator pool.
- */
+ /** Get the combined header for all files in the iterator pool. */
public SAMFileHeader getHeader() {
return header;
}
- protected SamFileHeaderMerger selectBestExistingResource( GenomeLoc position, List mergers) {
- if( mergers.size() == 0 )
+ protected SamFileHeaderMerger selectBestExistingResource( GenomeLoc position, List mergers ) {
+ if (mergers.size() == 0)
return null;
return mergers.get(0);
}
protected SamFileHeaderMerger createNewResource( GenomeLoc position ) {
- return createHeaderMerger( reads, SAMFileHeader.SortOrder.coordinate );
+ return createHeaderMerger(reads, SAMFileHeader.SortOrder.coordinate);
}
- protected StingSAMIterator createIteratorFromResource( GenomeLoc loc, SamFileHeaderMerger headerMerger ) {
+ protected QueryIterator createIteratorFromResource( GenomeLoc loc, SamFileHeaderMerger headerMerger ) {
final MergingSamRecordIterator2 iterator = new MergingSamRecordIterator2(headerMerger, reads);
- if( loc != null ) {
+ if (loc != null) {
if (byReads)
iterator.queryContained(loc.getContig(), (int) loc.getStart(), (int) loc.getStop());
else
iterator.queryOverlapping(loc.getContig(), (int) loc.getStart(), (int) loc.getStop());
}
- return new StingSAMIterator() {
- public Reads getSourceInfo() { return reads; }
+ return new QueryIterator() {
+ public Reads getSourceInfo() {
+ return reads;
+ }
+
public void close() {
iterator.close();
release(this);
}
- public Iterator iterator() { return this; }
- public boolean hasNext() { return iterator.hasNext(); }
- public SAMRecord next() { return iterator.next(); }
- public void remove() { throw new UnsupportedOperationException("Can't remove from a StingSAMIterator"); }
+
+ public Iterator iterator() {
+ return this;
+ }
+
+ public boolean hasNext() {
+ return iterator.hasNext();
+ }
+
+ public SAMRecord next() {
+ return iterator.next();
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException("Can't remove from a StingSAMIterator");
+ }
+
+ public SAMRecord peek() {
+ return iterator.peek();
+ }
+
+ public void queryOverlapping( String contig, int start, int stop ) {
+ iterator.queryOverlapping(contig, start, stop);
+ }
+
+ public void query( String contig, int start, int stop, boolean contained ) {
+ iterator.query(contig, start, stop, contained);
+ }
+
+ public void queryUnmappedReads() {
+ iterator.queryUnmappedReads();
+ }
+
+ public void queryContained( String contig, int start, int stop ) {
+ iterator.queryContained(contig, start, stop);
+ }
};
}
protected void closeResource( SamFileHeaderMerger resource ) {
- for( SAMFileReader reader: resource.getReaders() )
+ for (SAMFileReader reader : resource.getReaders())
reader.close();
}
diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java
index 3c55d71be..c3bdc5737 100644
--- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java
+++ b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java
@@ -34,6 +34,7 @@ import net.sf.samtools.util.CloseableIterator;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.Utils;
import java.lang.reflect.Constructor;
import java.util.Comparator;
@@ -46,7 +47,7 @@ import java.util.PriorityQueue;
* iterable stream. The underlying iterators/files must all have the same sort order unless
* the requested output format is unsorted, in which case any combination is valid.
*/
-public class MergingSamRecordIterator2 implements CloseableIterator, Iterable, PeekingStingIterator {
+public class MergingSamRecordIterator2 implements CloseableIterator, Iterable, QueryIterator {
protected PriorityQueue pq = null;
protected final SamFileHeaderMerger samHeaderMerger;
protected final SAMFileHeader.SortOrder sortOrder;
@@ -54,6 +55,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator,
private SAMRecord mNextRecord;
protected boolean initialized = false;
protected final Reads reads;
+ protected boolean warnedUserAboutSortOrder = false; // so we only warn the user once
/**
* Constructs a new merging iterator with the same set of readers and sort order as
@@ -62,7 +64,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator,
* @param headerMerger the header to merge
* @param reads the reads pile
*/
- public MergingSamRecordIterator2(final SamFileHeaderMerger headerMerger, Reads reads) {
+ public MergingSamRecordIterator2( final SamFileHeaderMerger headerMerger, Reads reads ) {
this.samHeaderMerger = headerMerger;
this.reads = reads;
this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
@@ -90,11 +92,24 @@ public class MergingSamRecordIterator2 implements CloseableIterator,
}
- private void checkSortOrder(SAMFileReader reader) {
+ /**
+ * verify the sort order
+ *
+ * @param reader the reader to check
+ */
+ private void checkSortOrder( SAMFileReader reader ) {
if (this.sortOrder != SAMFileHeader.SortOrder.unsorted && reader.getFileHeader().getSortOrder() != this.sortOrder) {
- throw new PicardException("Files are not compatible with sort order: " + reader.getFileHeader().getSortOrder() +
- " vrs " + this.sortOrder + ". Make sure that the SO flag in your bam file is set (The reader attribute for sort order equals "
- + reader.getFileHeader().getAttribute("SO") + " in this case).");
+ if (reads.getSafetyChecking()) {
+ throw new PicardException("Files are not compatible with sort order: " + reader.getFileHeader().getSortOrder() +
+ " vrs " + this.sortOrder + ". Make sure that the SO flag in your bam file is set (The reader attribute for sort order equals "
+ + reader.getFileHeader().getAttribute("SO") + " in this case).");
+ } else if(!warnedUserAboutSortOrder) {
+ warnedUserAboutSortOrder = true;
+ Utils.warnUser("Files are not compatible with sort order: " + reader.getFileHeader().getSortOrder() +
+ " vrs " + this.sortOrder + ". Make sure that the SO flag in your bam file is set (The reader attribute for sort order equals "
+ + reader.getFileHeader().getAttribute("SO") + " in this case).");
+ }
+
}
}
@@ -102,7 +117,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator,
return true;
}
- public void queryOverlapping(final String contig, final int start, final int stop) {
+ public void queryOverlapping( final String contig, final int start, final int stop ) {
if (initialized) {
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
}
@@ -118,7 +133,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator,
}
- public void query(final String contig, final int start, final int stop, final boolean contained) {
+ public void query( final String contig, final int start, final int stop, final boolean contained ) {
if (initialized) {
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
}
@@ -133,7 +148,22 @@ public class MergingSamRecordIterator2 implements CloseableIterator,
}
- public void queryContained(final String contig, final int start, final int stop) {
+ public void queryUnmappedReads() {
+ if (initialized) {
+ throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
+ }
+ final SAMRecordComparator comparator = getComparator();
+ for (final SAMFileReader reader : samHeaderMerger.getReaders()) {
+ Iterator recordIter = reader.queryUnmapped();
+ final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator);
+ addIfNotEmpty(iterator);
+ }
+ setInitialized();
+
+ }
+
+
+ public void queryContained( final String contig, final int start, final int stop ) {
if (initialized) {
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
}
@@ -237,7 +267,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator,
*
* @param iterator the iterator to add
*/
- protected void addIfNotEmpty(final ComparableSamRecordIterator iterator) {
+ protected void addIfNotEmpty( final ComparableSamRecordIterator iterator ) {
//System.out.printf("Adding %s %s %d%n", iterator.peek().getReadName(), iterator.peek().getReferenceName(), iterator.peek().getAlignmentStart());
if (iterator.hasNext()) {
pq.offer(iterator);
@@ -262,11 +292,11 @@ public class MergingSamRecordIterator2 implements CloseableIterator,
// For unsorted build a fake comparator that compares based on object ID
if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) {
return new SAMRecordComparator() {
- public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) {
+ public int fileOrderCompare( final SAMRecord lhs, final SAMRecord rhs ) {
return System.identityHashCode(lhs) - System.identityHashCode(rhs);
}
- public int compare(final SAMRecord lhs, final SAMRecord rhs) {
+ public int compare( final SAMRecord lhs, final SAMRecord rhs ) {
return fileOrderCompare(lhs, rhs);
}
};
@@ -345,13 +375,13 @@ class ComparableSamRecordIterator extends PeekableIterator implements
* @param sam the SAM file to read records from
* @param comparator the Comparator to use to provide ordering fo SAMRecords
*/
- public ComparableSamRecordIterator(final SAMFileReader sam, final Comparator comparator) {
+ public ComparableSamRecordIterator( final SAMFileReader sam, final Comparator comparator ) {
super(sam.iterator());
this.reader = sam;
this.comparator = comparator;
}
- public ComparableSamRecordIterator(final SAMFileReader sam, Iterator iterator, final Comparator comparator) {
+ public ComparableSamRecordIterator( final SAMFileReader sam, Iterator iterator, final Comparator comparator ) {
super(iterator); // use the provided iterator
this.reader = sam;
this.comparator = comparator;
@@ -381,7 +411,7 @@ class ComparableSamRecordIterator extends PeekableIterator implements
*
* @return a negative, 0 or positive number as described in the Comparator interface
*/
- public int compareTo(final ComparableSamRecordIterator that) {
+ public int compareTo( final ComparableSamRecordIterator that ) {
if (this.comparator.getClass() != that.comparator.getClass()) {
throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " +
"have different orderings internally");
diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/PeekingStingIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/PeekingIterator.java
similarity index 82%
rename from java/src/org/broadinstitute/sting/gatk/iterators/PeekingStingIterator.java
rename to java/src/org/broadinstitute/sting/gatk/iterators/PeekingIterator.java
index 8abf05409..60c62aedb 100644
--- a/java/src/org/broadinstitute/sting/gatk/iterators/PeekingStingIterator.java
+++ b/java/src/org/broadinstitute/sting/gatk/iterators/PeekingIterator.java
@@ -1,6 +1,5 @@
package org.broadinstitute.sting.gatk.iterators;
-import net.sf.samtools.SAMRecord;
/*
* Copyright (c) 2009 The Broad Institute
@@ -30,14 +29,12 @@ import net.sf.samtools.SAMRecord;
/**
* @author aaron
*
- * Interface Peekable
+ * Class PeekingIterator
*
- * This interface indicates that
+ * a peekable interface, that requires a peek() method
*/
-public interface PeekingStingIterator extends StingSAMIterator {
- /**
- * peek, given the specified type
- * @return
- */
- SAMRecord peek();
+public interface PeekingIterator {
+
+ /** @return returns a peeked value */
+ public T peek();
}
diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/QueryIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/QueryIterator.java
new file mode 100644
index 000000000..a4375b68e
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/iterators/QueryIterator.java
@@ -0,0 +1,48 @@
+package org.broadinstitute.sting.gatk.iterators;
+
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMRecordComparator;
+import net.sf.samtools.SAMFileReader;
+
+import java.util.Iterator;
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *
+ * This interface indicates that the iterator is query able
+ */
+public interface QueryIterator extends StingSAMIterator, PeekingIterator {
+
+ /**
+ * The required methods to query able
+ **/
+ public void queryOverlapping(final String contig, final int start, final int stop);
+ public void query(final String contig, final int start, final int stop, final boolean contained);
+ public void queryUnmappedReads();
+ public void queryContained(final String contig, final int start, final int stop);
+}
diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadValidationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadValidationWalker.java
index 533b69d93..0de487922 100644
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadValidationWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadValidationWalker.java
@@ -50,13 +50,13 @@ import java.math.BigInteger;
*
* A descriptions should go here. Blame aaron if it's missing.
*/
-public class ReadValidationWalker extends ReadWalker {
+public class ReadValidationWalker extends ReadWalker {
// our MD5 sum
private MessageDigest m;
// private list of md5sums
- private final List list = new ArrayList();
+ private final List list = new ArrayList();
/**
* The initialize function.
@@ -94,8 +94,8 @@ public class ReadValidationWalker extends ReadWalker {
* bam file, if it was specified on the command line
* @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
*/
- public Integer reduceInit() {
- return new Integer(0);
+ public SAMRecord reduceInit() {
+ return null;
}
/**
@@ -104,31 +104,16 @@ public class ReadValidationWalker extends ReadWalker {
* @param output the output source
* @return the SAMFileWriter, so that the next reduce can emit to the same source
*/
- public Integer reduce( SAMRecord read, Integer output ) {
- byte[] ray = new byte[read.getReadName().length() + 5];
- int x = 0;
- for (char c: read.getReadName().toCharArray()) {
- ray[x] = (byte)c;
- //System.err.println("adding " + c + " to pos " + x);
- x++;
+ public SAMRecord reduce( SAMRecord read, SAMRecord output ) {
+ if (output == null)
+ return read;
+ if ((read.getReferenceIndex() == output.getReferenceIndex()) && (read.getAlignmentStart() < output.getAlignmentStart())) {
+ System.err.println("saw the read " + read.getReadName() + " duplicated, old alignment = " + output.getAlignmentStart());
}
- //System.err.println(read.getReadName() + " name, alignment = " + read.getAlignmentStart());
- int y = 0;
- for (;y < 4; y++) {
- ray[x+y] = (byte)((read.getAlignmentStart() >> y * 8) & 0x000f);
+ else if (read.getReferenceIndex() != output.getReferenceIndex()){
+ System.err.println("Switching Chromo");
}
- ray[x+y] = read.getSecondOfPairFlag() ? (byte)1 : (byte)0;
- BigInteger bigInt = new BigInteger(m.digest(ray));
- //System.err.println(bigInt.toString());
- m.reset();
- if (this.list.contains(bigInt)) {
- throw new StingException("Seen Read: " + bigInt + "-> " + read.getReadName() + " before (list size = " + list.size() + ")");
- }
- if (read.getAlignmentStart() < output) {
- throw new StingException("Seen Read " + read.getReadName() + " has alignment of " + read.getAlignmentStart() + " before (list size = " + output + ")");
- }
- list.add(bigInt);
- return read.getAlignmentStart();
+ return read;
}
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java
index 41d3cd62a..4f9d12bbf 100644
--- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java
@@ -1,9 +1,12 @@
package org.broadinstitute.sting.utils.sam;
-import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator;
+import org.broadinstitute.sting.gatk.iterators.QueryIterator;
+import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.Reads;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.util.CloseableIterator;
+import net.sf.picard.util.PeekableIterator;
import java.util.Iterator;
@@ -34,7 +37,7 @@ import java.util.Iterator;
*/
/** this fake iterator allows us to look at how specific piles of reads are handled */
-public class ArtificialSAMIterator implements PeekingStingIterator {
+public class ArtificialSAMIterator implements StingSAMIterator {
protected int currentChromo = 0;
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java
index fdf7771a3..a9ff96a62 100644
--- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java
@@ -7,6 +7,7 @@ import net.sf.samtools.SAMRecord;
import java.util.List;
import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.gatk.iterators.QueryIterator;
/*
@@ -41,7 +42,7 @@ import org.broadinstitute.sting.utils.StingException;
* to test out classes that use specific itervals. The reads returned will
* all lie in order in the specified interval.
*/
-public class ArtificialSAMQueryIterator extends ArtificialSAMIterator {
+public class ArtificialSAMQueryIterator extends ArtificialSAMIterator implements QueryIterator {
// get the next positon
protected int finalPos = 0;
@@ -88,6 +89,39 @@ public class ArtificialSAMQueryIterator extends ArtificialSAMIterator {
initialize(contig, start, stop);
}
+ @Override
+ public void query( String contig, int start, int stop, boolean contained ) {
+ if (contained)
+ queryContained(contig, start, stop);
+ else
+ queryOverlapping(contig, start, stop);
+ }
+
+ @Override
+ public void queryUnmappedReads() {
+ initializeUnmapped();
+ }
+
+
+ /**
+ * initialize the iterator to an unmapped read position
+ */
+ public void initializeUnmapped() {
+ ensureUntouched();
+ while (super.hasNext() && this.peek().getReferenceIndex() >= 0) {
+ super.next();
+ }
+ // sanity check that we have an actual matching read next
+ SAMRecord rec = this.peek();
+ if (rec == null) {
+ throw new StingException("The next read doesn't match");
+ }
+ // set the seeked variable to true
+ seeked = true;
+ }
+
+
+
/**
* initialize the query iterator
@@ -124,7 +158,7 @@ public class ArtificialSAMQueryIterator extends ArtificialSAMIterator {
// sanity check that we have an actual matching read next
SAMRecord rec = this.peek();
if (!matches(rec)) {
- throw new StingException("The next read doesn't match");
+ throw new StingException("The next read doesn't match");
}
// set the seeked variable to true
seeked = true;
@@ -141,6 +175,11 @@ public class ArtificialSAMQueryIterator extends ArtificialSAMIterator {
if (rec.getReferenceIndex() != this.contigIndex) {
return false;
}
+ // if we have an unmapped read, matching the contig is good enough for us
+ if (rec.getReferenceIndex() < 0) {
+ return true;
+ }
+
if (!overlapping) {
// if the start or the end are somewhere within our range
if (( rec.getAlignmentStart() >= startPos && rec.getAlignmentEnd() <= finalPos )) {
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
index 38704d138..6c9032909 100755
--- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
@@ -5,8 +5,7 @@ import net.sf.samtools.*;
import java.io.File;
import java.util.*;
-import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator;
-import org.broadinstitute.sting.gatk.Reads;
+import org.broadinstitute.sting.gatk.iterators.QueryIterator;
import org.broadinstitute.sting.utils.StingException;
/**
@@ -103,6 +102,51 @@ public class ArtificialSAMUtils {
return header;
}
+ /**
+ * setup a default read group for a SAMFileHeader
+ *
+ * @param header the header to set
+ * @param readGroupID the read group ID tag
+ * @param sampleName the sample name
+ *
+ * @return the adjusted SAMFileHeader
+ */
+ public static SAMFileHeader createDefaultReadGroup( SAMFileHeader header, String readGroupID, String sampleName ) {
+ SAMReadGroupRecord rec = new SAMReadGroupRecord(readGroupID);
+ rec.setSample(sampleName);
+ List readGroups = new ArrayList();
+ readGroups.add(rec);
+ header.setReadGroups(readGroups);
+ return header;
+ }
+
+ /**
+ * setup read groups for the specified read groups and sample names
+ *
+ * @param header the header to set
+ * @param readGroupIDs the read group ID tags
+ * @param sampleNames the sample names
+ *
+ * @return the adjusted SAMFileHeader
+ */
+ public static SAMFileHeader createEnumeratedReadGroups( SAMFileHeader header, List readGroupIDs, List sampleNames ) {
+ if (readGroupIDs.size() != sampleNames.size()) {
+ throw new StingException("read group count and sample name count must be the same");
+ }
+
+ List readGroups = new ArrayList();
+
+ int x = 0;
+ for (; x < readGroupIDs.size(); x++) {
+ SAMReadGroupRecord rec = new SAMReadGroupRecord(readGroupIDs.get(x));
+ rec.setSample(sampleNames.get(x));
+ readGroups.add(rec);
+ }
+ header.setReadGroups(readGroups);
+ return header;
+ }
+
+
/**
* Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read
*
@@ -115,7 +159,7 @@ public class ArtificialSAMUtils {
* @return the artificial read
*/
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) {
- if( alignmentStart == 0 )
+ if (alignmentStart == 0)
throw new StingException("Invalid alignment start for artificial read");
SAMRecord record = new SAMRecord(header);
record.setReadName(name);
@@ -128,20 +172,26 @@ public class ArtificialSAMUtils {
return record;
}
-
/**
- * create an iterator containing the specified read piles
+ * Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read
*
- * @param startingChr the chromosome (reference ID) to start from
- * @param endingChr the id to end with
- * @param readCount the number of reads per chromosome
+ * @param header the SAM header to associate the read with
+ * @param name the name of the read
+ * @param refIndex the reference index, i.e. what chromosome to associate it with
+ * @param alignmentStart where to start the alignment
+ * @param bases the sequence of the read
+ * @param qual the qualities of the read
*
- * @return StingSAMIterator representing the specified amount of fake data
+ * @return the artificial read
*/
- public static PeekingStingIterator unmappedReadIterator( int startingChr, int endingChr, int readCount ) {
- SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
-
- return new ArtificialSAMIterator(startingChr, endingChr, readCount, header);
+ public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) {
+ if (bases.length != qual.length) {
+ throw new StingException("Passed in read string is different length then the quality array");
+ }
+ SAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length);
+ rec.setReadBases(bases);
+ rec.setBaseQualities(bases);
+ return rec;
}
/**
@@ -150,14 +200,29 @@ public class ArtificialSAMUtils {
* @param startingChr the chromosome (reference ID) to start from
* @param endingChr the id to end with
* @param readCount the number of reads per chromosome
+ *
+ * @return StingSAMIterator representing the specified amount of fake data
+ */
+ public static QueryIterator unmappedReadIterator( int startingChr, int endingChr, int readCount ) {
+ SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
+
+ return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, 0, header);
+ }
+
+ /**
+ * create an iterator containing the specified read piles
+ *
+ * @param startingChr the chromosome (reference ID) to start from
+ * @param endingChr the id to end with
+ * @param readCount the number of reads per chromosome
* @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file
*
* @return StingSAMIterator representing the specified amount of fake data
*/
- public static PeekingStingIterator unmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) {
+ public static QueryIterator unmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) {
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
- return new ArtificialSAMIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
+ return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
}
/**
@@ -171,16 +236,16 @@ public class ArtificialSAMUtils {
*/
public static ArtificialSAMQueryIterator queryReadIterator( int startingChr, int endingChr, int readCount ) {
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
-
+
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, 0, header);
}
/**
* create an ArtificialSAMQueryIterator containing the specified read piles
*
- * @param startingChr the chromosome (reference ID) to start from
- * @param endingChr the id to end with
- * @param readCount the number of reads per chromosome
+ * @param startingChr the chromosome (reference ID) to start from
+ * @param endingChr the id to end with
+ * @param readCount the number of reads per chromosome
* @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file
*
* @return StingSAMIterator representing the specified amount of fake data
diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java
index 07d5b58d6..c45d5bb1e 100755
--- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java
@@ -3,7 +3,6 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import static junit.framework.Assert.fail;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
@@ -84,7 +83,7 @@ public class SAMByReadsTest extends BaseTest {
SAMDataSource data = new SAMDataSource(reads,true);
for (int x = 0; x < 10; x++) {
++iterations;
- PeekingStingIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 10, 1000);
+ QueryIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 10, 1000);
BoundedReadIterator ret = data.toUnmappedReads(100, iter);
// count the reads we've gotten back
if (ret == null) {
@@ -173,7 +172,7 @@ public class SAMByReadsTest extends BaseTest {
while (shardStrategy.hasNext()) {
- BoundedReadIterator ret = (BoundedReadIterator)data.seek(shardStrategy.next());
+ StingSAMIterator ret = data.seek(shardStrategy.next());
assertTrue(ret != null);
while (ret.hasNext()) {
ret.next();
@@ -227,7 +226,7 @@ class ArtificialResourcePool extends SAMIteratorPool {
}
@Override
- public StingSAMIterator iterator( GenomeLoc loc ) {
+ public QueryIterator iterator( GenomeLoc loc ) {
ArtificialSAMQueryIterator iter = ArtificialSAMUtils.queryReadIterator(1, 10, 100, 1000);
if (loc != null) {
iter.queryContained(loc.getContig(), (int)loc.getStart(), (int)loc.getStop());
diff --git a/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMUtilsTest.java b/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMUtilsTest.java
index 9768d9258..2275084a7 100644
--- a/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMUtilsTest.java
+++ b/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMUtilsTest.java
@@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.sam;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
-import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator;
+import org.broadinstitute.sting.gatk.iterators.QueryIterator;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
@@ -80,30 +80,5 @@ public class ArtificialSAMUtilsTest extends BaseTest {
assertEquals(1000, count);
}
- @Test
- public void testPeeking() {
- PeekingStingIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 100);
- int count = 0;
- while (iter.hasNext()) {
- int readCnt = ((ArtificialSAMIterator)(iter)).readsTaken();
-
- // peek the record
- SAMRecord rec = iter.peek();
- assertTrue(rec.getReferenceIndex() >= 0);
-
- // next the record
- SAMRecord rec2 = iter.next();
- assertTrue(rec2.getReadName() == rec.getReadName());
- assertTrue(rec2.getAlignmentStart() == rec.getAlignmentStart());
-
- // find out how many reads we've taken now
- int readCnt2 = ((ArtificialSAMIterator)(iter)).readsTaken();
-
- count++;
- if (count < 100*100) assertEquals(readCnt + 1, readCnt2);
- else assertEquals(readCnt, readCnt2);
- }
- assertEquals(100 * 100, count );
-
- }
+
}