From d85461c463ce4b33d724b2bf9fde37854b787410 Mon Sep 17 00:00:00 2001 From: asivache Date: Thu, 14 Jan 2010 17:43:36 +0000 Subject: [PATCH] MergingIterator completely re-done. Now it is not a generic class (sorry guys), but rather it is tailored for merging ROD tracks. This implementation peeks the locations of next ROD annotations in each track, but does not actually read these RODs from underlying streams until the location is reached and it is time to actually return the object. Now underlying ROD track iterators (registered in the resource pool!) are not advanced prematurely past the current position and all the way to the next ROD record wherever it is, so that the sharding system can reuse them. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2582 348d0f76-0448-11de-a6fe-93d51630548a --- .../datasources/providers/RodLocusView.java | 7 +- .../sting/utils/MergingIterator.java | 83 ++++++++++++------- 2 files changed, 57 insertions(+), 33 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 44d126387..0c18dd1f0 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -32,7 +32,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { /** * The data sources along with their current states. */ - private MergingIterator> rodQueue = null; + private MergingIterator rodQueue = null; RefMetaDataTracker tracker = null; GenomeLoc lastLoc = null; @@ -86,7 +86,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { } } - rodQueue = new MergingIterator>(iterators); + rodQueue = new MergingIterator(iterators); //throw new StingException("RodLocusView currently disabled"); } @@ -99,8 +99,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { if ( ! rodQueue.hasNext() ) return false; else { - RODRecordList peeked = rodQueue.peek(); - return ! peeked.getLocation().isPast(shard.getGenomeLoc()); + return ! rodQueue.peekLocation().isPast(shard.getGenomeLoc()); } } diff --git a/java/src/org/broadinstitute/sting/utils/MergingIterator.java b/java/src/org/broadinstitute/sting/utils/MergingIterator.java index 45db41558..2ccbc66a7 100644 --- a/java/src/org/broadinstitute/sting/utils/MergingIterator.java +++ b/java/src/org/broadinstitute/sting/utils/MergingIterator.java @@ -1,38 +1,55 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.gatk.iterators.PeekingIterator; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.RODRecordList; import java.util.*; -public class MergingIterator> implements Iterator, PeekingIterator, Iterable { +public class MergingIterator implements Iterator>, Iterable> { PriorityQueue queue = new PriorityQueue(); private class Element implements Comparable { - public Iterator it = null; - public E value = null; + public SeekableRODIterator it = null; + //public E value = null; + public GenomeLoc nextLoc = null; - public Element(Iterator it) { - this.it = it; - update(); + public Element(Iterator> it) { + if ( it instanceof SeekableRODIterator ) { + this.it = (SeekableRODIterator)it; + if ( ! it.hasNext() ) throw new StingException("Iterator is empty"); + update(); + } else { + throw new StingException("Iterator passed to MergingIterator is not SeekableRODIterator"); + } } public Element update() { - if ( ! it.hasNext() ) - throw new RuntimeException("it is empty"); - - E prev = value; - value = it.next(); - //System.out.printf("Updating %s to prev=%s, next=%s%n", this, prev, value); + // E prev = value; + nextLoc = it.peekNextLocation(); // will return null if there is no next location return this; } public int compareTo(Element other) { - return value.compareTo(other.value); + if ( nextLoc == null ) { + if ( other.nextLoc != null ) return 1; // null means no more data available, so its after any non-null position + return 0; + } + if ( other.nextLoc == null ) return -1; // we can get to this point only if this.nextLoc != null + + return nextLoc.compareTo(other.nextLoc); + } + + public RODRecordList next() { + RODRecordList value = it.next(); + update(); + return value; } } - public Iterator iterator() { + public Iterator> iterator() { return this; } @@ -40,17 +57,21 @@ public class MergingIterator> implements Iterator, Pe ; } - public MergingIterator(Iterator it) { + public MergingIterator(Iterator> it) { add(it); } - public MergingIterator(Collection> its) { - for ( Iterator it : its ) { + public MergingIterator(Collection>> its) { + for ( Iterator> it : its ) { add(it); } } - public void add(Iterator it) { + /** If the iterator is non-empty (hasNext() is true), put it into the queue. The next location the iterator + * will be after a call to next() is peeked into and cached as queue's priority value. + * @param it + */ + public void add(Iterator> it) { if ( it.hasNext() ) queue.add(new Element(it)); } @@ -59,35 +80,39 @@ public class MergingIterator> implements Iterator, Pe return ! queue.isEmpty(); } - public E next() { + public RODRecordList next() { Element e = queue.poll(); - E value = e.value; + RODRecordList value = e.next(); // next() will also update next location cached by the Element - if ( e.it != null && e.it.hasNext() ) - queue.add(new Element(e.it)); + if ( e.nextLoc != null ) // we have more data in the track + queue.add(e); // add the element back to queue (note: its next location, on which priority is based, was updated //System.out.printf("Element is %s%n", e.value); return value; } - public E peek() { - return queue.peek().value; + /** Peeks into the genomic location of the record this iterator will return next. + * + * @return + */ + public GenomeLoc peekLocation() { + return queue.peek().nextLoc; } - public Collection allElementsLTE(E elt) { + public Collection> allElementsLTE(RODRecordList elt) { return allElementsLTE(elt, true); } - public Collection allElementsLTE(E elt, boolean includeElt) { - LinkedList all = new LinkedList(); + public Collection> allElementsLTE(RODRecordList elt, boolean includeElt) { + LinkedList> all = new LinkedList>(); if ( includeElt ) all.add(elt); while ( hasNext() ) { - E x = peek(); + Element x = queue.peek(); //System.out.printf("elt.compareTo(x) == %d%n", elt.compareTo(x)); //System.out.printf("In allElementLTE%n"); - int cmp = elt.compareTo(x); + int cmp = elt.getLocation().compareTo(x.nextLoc); //System.out.printf("x=%s%n elt=%s%n => elt.compareTo(x) == %d%n", x, elt, cmp); if ( cmp >= 0 ) { //System.out.printf(" Adding element x=%s, size = %d%n", x, all.size());