docs, cleanup, and some improvements to the iterators.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2901 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-02-26 22:36:04 +00:00
parent b69c2d0f70
commit d8fedd59be
3 changed files with 133 additions and 38 deletions

View File

@ -142,10 +142,10 @@ class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBack
}
/**
* Don't worry about closing the resource; let the file handles expire naturally for the moment.
* kill the buffers in the iterator
*/
public void closeResource( FlashBackIterator resource ) {
resource.close();
}
}

View File

@ -1,11 +1,9 @@
package org.broadinstitute.sting.gatk.refdata.utils;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
/**
@ -23,57 +21,97 @@ import java.util.List;
*/
public class FlashBackIterator implements LocationAwareSeekableRODIterator {
private LocationAwareSeekableRODIterator iterator;
private LinkedList<ComparableList> list = new LinkedList<ComparableList>();
private int MAX_QUEUE = 5000;
private boolean usingQueue = false;
private LinkedList<ComparableList> pastQueue = new LinkedList<ComparableList>();
private LinkedList<ComparableList> aheadQueue = new LinkedList<ComparableList>();
private int MAX_QUEUE = 200;
/**
* create a flashback iterator
* @param iterator given a LocationAwareSeekableRODIterator
*/
public FlashBackIterator(LocationAwareSeekableRODIterator iterator) {
this.iterator = iterator;
}
/**
* peek at the next location
* @return
*/
@Override
public GenomeLoc peekNextLocation() {
return iterator.peekNextLocation();
return (aheadQueue.size() > 0) ? aheadQueue.getFirst().getLocation() : iterator.peekNextLocation();
}
/**
* get the position of this iterator
* @return
*/
@Override
public GenomeLoc position() {
return (usingQueue) ? list.getFirst().getLocation() : iterator.position();
return (aheadQueue.size() > 0) ? aheadQueue.getFirst().getLocation() : iterator.position();
}
/**
* seek forward on the iterator
* @param interval the interval to seek to
* @return a RODRecordList at that location, null otherwise
*/
@Override
public RODRecordList seekForward(GenomeLoc interval) {
RODRecordList lt = iterator.seekForward(interval);
if (lt != null) list.addLast(new ComparableList(lt));
createPastRecord(lt);
return lt;
}
/**
* do we have a next record
* @return true if we have another record
*/
@Override
public boolean hasNext() {
if (usingQueue) return (list.size() > 0 || iterator.hasNext());
return iterator.hasNext();
return (aheadQueue.size() > 0 || iterator.hasNext());
}
/**
* get the next record
* @return a RODRecordList
*/
@Override
public RODRecordList next() {
RODRecordList ret;
if (!usingQueue || list.size() < 1) {
usingQueue = false;
ret = iterator.next();
list.addLast(new ComparableList(ret));
if (list.size() > MAX_QUEUE) list.removeFirst();
} else {
ret = list.getFirst().getList();
list.removeFirst();
}
return ret;
return getNext();
}
/**
* we don't support remove
*/
@Override
public void remove() {
throw new UnsupportedOperationException("We don't support remove");
}
/**
* get the next record, either from the queue or from the iterator
* @return a RODRecordList
*/
private RODRecordList getNext() {
if (aheadQueue.size() > 0) {
RODRecordList ret = aheadQueue.getFirst().getList();
aheadQueue.removeFirst();
return ret;
} else {
RODRecordList ret = iterator.next();
createPastRecord(ret);
return ret;
}
}
private void createPastRecord(RODRecordList ret) {
ComparableList rec = new ComparableList(ret);
if (rec.getLocation() != null) pastQueue.addLast(new ComparableList(ret));
if (pastQueue.size() > this.MAX_QUEUE) pastQueue.removeFirst();
}
/**
* can we flash back to the specified location?
*
@ -82,8 +120,7 @@ public class FlashBackIterator implements LocationAwareSeekableRODIterator {
* @return true if we can, false otherwise
*/
public boolean canFlashBackTo(GenomeLoc location) {
GenomeLoc farthestBack = (list.size() > 0) ? list.getFirst().getLocation() : iterator.peekNextLocation();
System.err.println("farthestBack = " + farthestBack + " loc = " + location);
GenomeLoc farthestBack = (pastQueue.size() > 0) ? pastQueue.getFirst().getLocation() : iterator.peekNextLocation();
return (!farthestBack.isPast(location));
}
@ -94,18 +131,29 @@ public class FlashBackIterator implements LocationAwareSeekableRODIterator {
*/
public void flashBackTo(GenomeLoc location) {
if (!canFlashBackTo(location)) throw new UnsupportedOperationException("we can't flash back to " + location);
if (list.size() > 0 && !list.getLast().getLocation().isBefore(location))
usingQueue = true;
if (pastQueue.size()==0) return; // the iterator can do it alone
while (pastQueue.size() > 0 && !pastQueue.getLast().getLocation().isBefore(location)) {
aheadQueue.addFirst(pastQueue.getLast());
pastQueue.removeLast();
}
}
public void close() {
this.aheadQueue.clear();
this.pastQueue.clear();
}
}
/**
* a list that buffers the location for this rod
*/
class ComparableList implements Comparator<ComparableList> {
private RODRecordList list;
private GenomeLoc location = null;
public ComparableList(RODRecordList list) {
this.list = list;
if (list != null && list.size() != 0) location = list.get(0).getLocation();
else throw new IllegalStateException("Bad voodoo!");
if (list != null && list.size() != 0)
location = list.getLocation();
}
@Override

View File

@ -5,7 +5,6 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.Assert;
import org.junit.Before;
@ -39,7 +38,7 @@ public class FlashBackIteratorTest extends BaseTest {
@Test
public void testBasicIteration() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,0,0);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0);
FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc));
GenomeLoc lastLocation = null;
for (int x = 0; x < 10; x++) {
@ -54,7 +53,7 @@ public class FlashBackIteratorTest extends BaseTest {
@Test
public void testBasicIterationThenFlashBack() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,0,0);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0);
FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc));
GenomeLoc lastLocation = null;
for (int x = 0; x < 10; x++) {
@ -65,12 +64,12 @@ public class FlashBackIteratorTest extends BaseTest {
}
lastLocation = cur;
}
iter.flashBackTo(GenomeLocParser.createGenomeLoc(0,2));
iter.flashBackTo(GenomeLocParser.createGenomeLoc(0, 2));
}
@Test
public void testBasicIterationThenFlashBackThenIterate() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,0,0);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0);
FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc));
GenomeLoc lastLocation = null;
for (int x = 0; x < 10; x++) {
@ -81,15 +80,61 @@ public class FlashBackIteratorTest extends BaseTest {
}
lastLocation = cur;
}
iter.flashBackTo(GenomeLocParser.createGenomeLoc(0,1));
iter.flashBackTo(GenomeLocParser.createGenomeLoc(0, 1));
int count = 0;
while (iter.hasNext()) {
count++;
iter.next();
}
Assert.assertEquals(10,count);
Assert.assertEquals(10, count);
}
@Test
public void testFlashBackTruth() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0);
LocationAwareSeekableRODIterator backIter = new FakeSeekableRODIterator(loc);
// remove the first three records
backIter.next();
backIter.next();
backIter.next();
FlashBackIterator iter = new FlashBackIterator(backIter);
GenomeLoc lastLocation = null;
for (int x = 0; x < 10; x++) {
iter.next();
GenomeLoc cur = iter.position();
if (lastLocation != null) {
Assert.assertTrue(lastLocation.isBefore(cur));
}
lastLocation = cur;
}
Assert.assertTrue(iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 5)));
Assert.assertTrue(iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 15)));
Assert.assertTrue(!iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 2)));
Assert.assertTrue(!iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 1)));
}
@Test
public void testBasicIterationThenFlashBackHalfWayThenIterate() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0);
FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc));
GenomeLoc lastLocation = null;
for (int x = 0; x < 10; x++) {
iter.next();
GenomeLoc cur = iter.position();
if (lastLocation != null) {
Assert.assertTrue(lastLocation.isBefore(cur));
}
lastLocation = cur;
}
iter.flashBackTo(GenomeLocParser.createGenomeLoc(0, 5));
int count = 0;
while (iter.hasNext()) {
count++;
iter.next();
}
Assert.assertEquals(6, count); // chr1:5, 6, 7, 8, 9, and 10
}
}
@ -99,8 +144,10 @@ class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator {
private GenomeLoc location;
private FakeRODatum curROD;
private int recordCount = 10;
public FakeSeekableRODIterator(GenomeLoc startingLoc) {
this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(),startingLoc.getStart()+1,startingLoc.getStop()+1);;
this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(), startingLoc.getStart() + 1, startingLoc.getStop() + 1);
;
}
@Override
@ -129,7 +176,7 @@ class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator {
public RODRecordList next() {
RODRecordList list = new FakeRODRecordList();
curROD = new FakeRODatum(location);
location = GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart()+1,location.getStop()+1);
location = GenomeLocParser.createGenomeLoc(location.getContigIndex(), location.getStart() + 1, location.getStop() + 1);
list.add(curROD);
recordCount--;
return list;