diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java
index 5df542a09..c69b74b6f 100644
--- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java
@@ -143,7 +143,7 @@ class MappedReadStreamPointer extends ReadStreamPointer {
// over a given interval would occasionally not pick up the last read in that interval.
mergingIterator.queryOverlapping( segment.locus.getContig(),
(int)segment.locus.getStart(),
- (int)segment.locus.getStop()+1);
+ (int)segment.locus.getStop()+ PlusOneFixIterator.PLUS_ONE_FIX_CONSTANT);
return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator);
}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java
index 89d905a3e..c7c6beb06 100755
--- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java
@@ -172,7 +172,8 @@ public class SAMDataSource implements SimpleDataSource {
reads.getSupplementalFilters());
// add the new overlapping detection iterator, if we have a last interval and we're a read based shard
- if (mLastInterval != null && shard.getShardType() == Shard.ShardType.READ_INTERVAL ) iterator = new IntervalOverlapIterator(iterator,mLastInterval,false);
+ if (mLastInterval != null && shard.getShardType() == Shard.ShardType.READ_INTERVAL )
+ iterator = new PlusOneFixIterator(shard.getGenomeLoc(),new IntervalOverlapIterator(iterator,mLastInterval,false));
mLastInterval = shard.getGenomeLoc();
} else {
diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/PlusOneFixIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/PlusOneFixIterator.java
new file mode 100644
index 000000000..267ea99d1
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/iterators/PlusOneFixIterator.java
@@ -0,0 +1,114 @@
+package org.broadinstitute.sting.gatk.iterators;
+
+import net.sf.samtools.SAMRecord;
+
+import java.util.Iterator;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.gatk.Reads;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: aaronmckenna
+ * Date: Nov 4, 2009
+ * Time: 10:41:02 PM
+ */
+public class PlusOneFixIterator implements StingSAMIterator {
+
+ /**
+ * this holds the value that we use to correct our query overlapping calls with,
+ * but more importantly it ties the ReadStreamPointer code to this so we don't loose
+ * track of this adjustment across the code.
+ */
+ public static final Integer PLUS_ONE_FIX_CONSTANT = 1;
+
+ /**
+ * our interval region
+ */
+ private final GenomeLoc mInterval;
+
+ /**
+ * our iterator
+ */
+ private final StingSAMIterator mIterator;
+
+ /**
+ * our next record
+ */
+ private SAMRecord mNextRecord;
+
+ /**
+ * This iterator filters all reads that have their start < PLUS_ONE_FIX_CONSTANT from the end of the
+ * interval:
+ *
+ *
+ * (this won't make sense in a non-fixed width font)
+ * --------------
+ * interval |
+ * ------------------------
+ * | good read|
+ * --------------------------------------------
+ * | overlap < PLUS_ONE_FIX_CONSTANT, we drop|
+ * -------------------------------------------
+ *
+ * The problem is that we had to put a +1 on our queryOverlapping() stop position value to SAMTools
+ * due to an indexing problem. Other iterators don't know about this adjustment though, so calculations
+ * like overlap fail. This iterator eliminates them before the read is seen by other iterators.
+ *
+ * create a plus one fix iterator, given:
+ *
+ * @param inteveral the interval we're checking
+ * @param iterator the iterator to draw reads from
+ */
+ public PlusOneFixIterator(GenomeLoc inteveral, StingSAMIterator iterator) {
+ if (inteveral == null || iterator == null)
+ throw new StingException("Both parameters to PlusOneFixIterator have to != null");
+
+ // save off the iterator and the interval
+ mInterval = inteveral;
+ mIterator = iterator;
+
+ // pop off the first read
+ if (mIterator.hasNext()) {
+ next();
+ }
+ }
+
+
+ @Override
+ public boolean hasNext() {
+ return (mNextRecord != null);
+ }
+
+ @Override
+ public SAMRecord next() {
+ SAMRecord ret = mNextRecord;
+ while (mIterator.hasNext()) {
+ mNextRecord = mIterator.next();
+ if (!(mNextRecord.getAlignmentStart() > (mInterval.getStop() - PLUS_ONE_FIX_CONSTANT))) return ret;
+ }
+ mNextRecord = null;
+ return ret;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("REMOVE on PlusOneFixIterator is not supported");
+ }
+
+ @Override
+ public Reads getSourceInfo() {
+ return this.mIterator.getSourceInfo();
+ }
+
+ @Override
+ public void close() {
+ this.mIterator.close();
+ }
+
+ @Override
+ public Iterator iterator() {
+ return this;
+ }
+}
diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/PlusOneFixIteratorTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/PlusOneFixIteratorTest.java
new file mode 100644
index 000000000..c32d67885
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/gatk/iterators/PlusOneFixIteratorTest.java
@@ -0,0 +1,120 @@
+package org.broadinstitute.sting.gatk.iterators;
+
+import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.BaseTest;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.Assert;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.Iterator;
+
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMRecord;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: aaronmckenna
+ * Date: Nov 4, 2009
+ * Time: 11:02:24 PM
+ */
+public class PlusOneFixIteratorTest extends BaseTest {
+ private final File bam = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam");
+ private static IndexedFastaSequenceFile seq;
+ private int chromosomeOneReadCount = 885;
+
+ //GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
+ /**
+ * This function does the setup of our parser, before each method call.
+ *
+ * Called before every test case method.
+ */
+ @BeforeClass
+ public static void beforeAll() {
+ try {
+ seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
+ } catch (FileNotFoundException e) {
+ fail("Unexpected Exception" + e.getLocalizedMessage());
+ }
+ GenomeLocParser.setupRefContigOrdering(seq);
+ }
+
+ /**
+ * there's a read starting at chr1:1108664, make our interval go from chr1 to 1108664, apply the iterator,
+ * and we shouldn't see the read.
+ */
+ @Test
+ public void testReadAtEndOfInterval() {
+ int countOfReads = 0;
+ SAMFileReader reader = new SAMFileReader(bam, true);
+ final int size = 1108664;
+ reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
+
+ GenomeLoc last = GenomeLocParser.createGenomeLoc("chr1", 1, size);
+ Iterator i = new PlusOneFixIterator(last, StingSAMIteratorAdapter.adapt(null, reader.queryOverlapping("chr1", 1, size)));
+ //Iterator i = reader.queryOverlapping("chr1", 1, size);
+ while (i.hasNext()) {
+ SAMRecord rec = i.next();
+ countOfReads++;
+ }
+ Assert.assertEquals(2, countOfReads);
+ }
+
+ /**
+ * there are 4296 reads in chr1, make sure we see them all.
+ *
+ * chr1 length = 247249719 (and no reads start at the last position
+ */
+ @Test
+ public void testAllReads() {
+ int countOfReads = 0;
+ SAMFileReader reader = new SAMFileReader(bam, true);
+ reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
+ final int size = 247249719;
+ GenomeLoc last = GenomeLocParser.createGenomeLoc("chr1", 1, size);
+ Iterator i = new PlusOneFixIterator(last, StingSAMIteratorAdapter.adapt(null, reader.queryOverlapping("chr1", 1, size)));
+ //Iterator i = reader.queryOverlapping("chr1", 1, size);
+ while (i.hasNext()) {
+ SAMRecord rec = i.next();
+ countOfReads++;
+ }
+ Assert.assertEquals(885, countOfReads);
+ }
+
+ /**
+ * there are 4296 reads in chr1, make sure we see them all.
+ *
+ * chr1 length = 247249719 (and no reads start at the last position
+ */
+ @Test
+ public void testAllReadsSharded() {
+ int countOfReads = 0;
+
+ final int size = 247249719;
+ int currentStop = 0;
+ int incr = 100000;
+ GenomeLoc last = GenomeLocParser.createGenomeLoc("chr1", 1, size);
+ while (currentStop < size) {
+ SAMFileReader reader = new SAMFileReader(bam, true);
+ reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
+ int lastStop = (currentStop > 1) ? currentStop: 1;
+ if (currentStop + incr < size)
+ currentStop += incr;
+ else
+ currentStop = size;
+ Iterator i = new PlusOneFixIterator(last, StingSAMIteratorAdapter.adapt(null, reader.queryOverlapping("chr1", lastStop, currentStop)));
+
+ while (i.hasNext()) {
+ SAMRecord rec = i.next();
+ countOfReads++;
+ }
+ reader.close();
+ }
+ Assert.assertEquals(885, countOfReads);
+ }
+}
+
diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/fasta/BamToFastqIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/fasta/BamToFastqIntegrationTest.java
index c46e76951..1d4ef2fad 100755
--- a/java/test/org/broadinstitute/sting/gatk/walkers/fasta/BamToFastqIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/fasta/BamToFastqIntegrationTest.java
@@ -12,13 +12,13 @@ public class BamToFastqIntegrationTest extends WalkerTest {
WalkerTestSpec spec1 = new WalkerTestSpec(
"-T BamToFastq -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,100-10,000,500;1:10,100,000-10,101,000;1:10,900,000-10,900,001 -o %s",
1,
- Arrays.asList("f742731e17fba105c7daae0e4f80ca1d"));
+ Arrays.asList("2ec2ff5ac405099bf48186d2c7e5fabd"));
executeTest("testBamToFasta", spec1);
WalkerTestSpec spec2 = new WalkerTestSpec(
"-T BamToFastq -reverse -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,100-10,000,500;1:10,100,000-10,101,000;1:10,900,000-10,900,001 -o %s",
1,
- Arrays.asList("19a5418fdf7b53dac8badb67bb1e1b88"));
+ Arrays.asList("5a79484da43925b0e0461be28fdad07c"));
executeTest("testBamToFastaReverse", spec2);
}
}