diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index e1c010622..5c4842182 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -142,10 +142,10 @@ class ReferenceOrderedDataPool extends ResourcePool list = new LinkedList(); - private int MAX_QUEUE = 5000; - private boolean usingQueue = false; + private LinkedList pastQueue = new LinkedList(); + private LinkedList aheadQueue = new LinkedList(); + private int MAX_QUEUE = 200; + /** + * create a flashback iterator + * @param iterator given a LocationAwareSeekableRODIterator + */ public FlashBackIterator(LocationAwareSeekableRODIterator iterator) { this.iterator = iterator; } + /** + * peek at the next location + * @return + */ @Override public GenomeLoc peekNextLocation() { - return iterator.peekNextLocation(); + return (aheadQueue.size() > 0) ? aheadQueue.getFirst().getLocation() : iterator.peekNextLocation(); } + /** + * get the position of this iterator + * @return + */ @Override public GenomeLoc position() { - return (usingQueue) ? list.getFirst().getLocation() : iterator.position(); + return (aheadQueue.size() > 0) ? aheadQueue.getFirst().getLocation() : iterator.position(); } + /** + * seek forward on the iterator + * @param interval the interval to seek to + * @return a RODRecordList at that location, null otherwise + */ @Override public RODRecordList seekForward(GenomeLoc interval) { + RODRecordList lt = iterator.seekForward(interval); - if (lt != null) list.addLast(new ComparableList(lt)); + createPastRecord(lt); return lt; } + /** + * do we have a next record + * @return true if we have another record + */ @Override public boolean hasNext() { - if (usingQueue) return (list.size() > 0 || iterator.hasNext()); - return iterator.hasNext(); + return (aheadQueue.size() > 0 || iterator.hasNext()); } + /** + * get the next record + * @return a RODRecordList + */ @Override public RODRecordList next() { - RODRecordList ret; - if (!usingQueue || list.size() < 1) { - usingQueue = false; - ret = iterator.next(); - list.addLast(new ComparableList(ret)); - if (list.size() > MAX_QUEUE) list.removeFirst(); - } else { - ret = list.getFirst().getList(); - list.removeFirst(); - } - return ret; + return getNext(); } + /** + * we don't support remove + */ @Override public void remove() { throw new UnsupportedOperationException("We don't support remove"); } + /** + * get the next record, either from the queue or from the iterator + * @return a RODRecordList + */ + private RODRecordList getNext() { + if (aheadQueue.size() > 0) { + RODRecordList ret = aheadQueue.getFirst().getList(); + aheadQueue.removeFirst(); + return ret; + } else { + RODRecordList ret = iterator.next(); + createPastRecord(ret); + return ret; + } + } + + private void createPastRecord(RODRecordList ret) { + ComparableList rec = new ComparableList(ret); + if (rec.getLocation() != null) pastQueue.addLast(new ComparableList(ret)); + if (pastQueue.size() > this.MAX_QUEUE) pastQueue.removeFirst(); + } + /** * can we flash back to the specified location? * @@ -82,8 +120,7 @@ public class FlashBackIterator implements LocationAwareSeekableRODIterator { * @return true if we can, false otherwise */ public boolean canFlashBackTo(GenomeLoc location) { - GenomeLoc farthestBack = (list.size() > 0) ? list.getFirst().getLocation() : iterator.peekNextLocation(); - System.err.println("farthestBack = " + farthestBack + " loc = " + location); + GenomeLoc farthestBack = (pastQueue.size() > 0) ? pastQueue.getFirst().getLocation() : iterator.peekNextLocation(); return (!farthestBack.isPast(location)); } @@ -94,18 +131,29 @@ public class FlashBackIterator implements LocationAwareSeekableRODIterator { */ public void flashBackTo(GenomeLoc location) { if (!canFlashBackTo(location)) throw new UnsupportedOperationException("we can't flash back to " + location); - if (list.size() > 0 && !list.getLast().getLocation().isBefore(location)) - usingQueue = true; + if (pastQueue.size()==0) return; // the iterator can do it alone + while (pastQueue.size() > 0 && !pastQueue.getLast().getLocation().isBefore(location)) { + aheadQueue.addFirst(pastQueue.getLast()); + pastQueue.removeLast(); + } + } + + public void close() { + this.aheadQueue.clear(); + this.pastQueue.clear(); } } +/** + * a list that buffers the location for this rod + */ class ComparableList implements Comparator { private RODRecordList list; private GenomeLoc location = null; public ComparableList(RODRecordList list) { this.list = list; - if (list != null && list.size() != 0) location = list.get(0).getLocation(); - else throw new IllegalStateException("Bad voodoo!"); + if (list != null && list.size() != 0) + location = list.getLocation(); } @Override diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorTest.java index db704f8f8..dc0da5f97 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorTest.java @@ -5,7 +5,6 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.junit.Assert; import org.junit.Before; @@ -39,7 +38,7 @@ public class FlashBackIteratorTest extends BaseTest { @Test public void testBasicIteration() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,0,0); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); GenomeLoc lastLocation = null; for (int x = 0; x < 10; x++) { @@ -54,7 +53,7 @@ public class FlashBackIteratorTest extends BaseTest { @Test public void testBasicIterationThenFlashBack() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,0,0); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); GenomeLoc lastLocation = null; for (int x = 0; x < 10; x++) { @@ -65,12 +64,12 @@ public class FlashBackIteratorTest extends BaseTest { } lastLocation = cur; } - iter.flashBackTo(GenomeLocParser.createGenomeLoc(0,2)); + iter.flashBackTo(GenomeLocParser.createGenomeLoc(0, 2)); } @Test public void testBasicIterationThenFlashBackThenIterate() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,0,0); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); GenomeLoc lastLocation = null; for (int x = 0; x < 10; x++) { @@ -81,15 +80,61 @@ public class FlashBackIteratorTest extends BaseTest { } lastLocation = cur; } - iter.flashBackTo(GenomeLocParser.createGenomeLoc(0,1)); + iter.flashBackTo(GenomeLocParser.createGenomeLoc(0, 1)); int count = 0; while (iter.hasNext()) { count++; iter.next(); } - Assert.assertEquals(10,count); + Assert.assertEquals(10, count); } + + @Test + public void testFlashBackTruth() { + GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); + LocationAwareSeekableRODIterator backIter = new FakeSeekableRODIterator(loc); + // remove the first three records + backIter.next(); + backIter.next(); + backIter.next(); + FlashBackIterator iter = new FlashBackIterator(backIter); + GenomeLoc lastLocation = null; + for (int x = 0; x < 10; x++) { + iter.next(); + GenomeLoc cur = iter.position(); + if (lastLocation != null) { + Assert.assertTrue(lastLocation.isBefore(cur)); + } + lastLocation = cur; + } + Assert.assertTrue(iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 5))); + Assert.assertTrue(iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 15))); + Assert.assertTrue(!iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 2))); + Assert.assertTrue(!iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 1))); + } + + @Test + public void testBasicIterationThenFlashBackHalfWayThenIterate() { + GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); + FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); + GenomeLoc lastLocation = null; + for (int x = 0; x < 10; x++) { + iter.next(); + GenomeLoc cur = iter.position(); + if (lastLocation != null) { + Assert.assertTrue(lastLocation.isBefore(cur)); + } + lastLocation = cur; + } + iter.flashBackTo(GenomeLocParser.createGenomeLoc(0, 5)); + int count = 0; + while (iter.hasNext()) { + count++; + iter.next(); + } + Assert.assertEquals(6, count); // chr1:5, 6, 7, 8, 9, and 10 + } } @@ -99,8 +144,10 @@ class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator { private GenomeLoc location; private FakeRODatum curROD; private int recordCount = 10; + public FakeSeekableRODIterator(GenomeLoc startingLoc) { - this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(),startingLoc.getStart()+1,startingLoc.getStop()+1);; + this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(), startingLoc.getStart() + 1, startingLoc.getStop() + 1); + ; } @Override @@ -129,7 +176,7 @@ class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator { public RODRecordList next() { RODRecordList list = new FakeRODRecordList(); curROD = new FakeRODatum(location); - location = GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart()+1,location.getStop()+1); + location = GenomeLocParser.createGenomeLoc(location.getContigIndex(), location.getStart() + 1, location.getStop() + 1); list.add(curROD); recordCount--; return list;