From c0891d512f7befefcf4498e53923b8b3b8fbd5aa Mon Sep 17 00:00:00 2001 From: asivache Date: Thu, 14 Jan 2010 17:38:19 +0000 Subject: [PATCH] added: peekNextLocation(); it's quite hard (and probably unnecessary, ever) to make seekable iterator a peekable one, but it is quite easy and useful to be able to peek just the next location the iterator will jump to after next call to next() git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2581 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/refdata/SeekableRODIterator.java | 42 +++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java index 232ef04c4..6d15a0ba1 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import org.broadinstitute.sting.gatk.iterators.PeekingIterator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.StingException; @@ -78,6 +79,11 @@ public class SeekableRODIterator implements I public SeekableRODIterator(Iterator it) { this.it = new PushbackIterator(it); records = new LinkedList(); + // the following is a trick: we would like the iterator to know the actual name assigned to + // the ROD implementing object we are working with. But the only way to do that is to + // get an instance of that ROD and query it for its name. Now, the only generic way we have at this point to instantiate + // the ROD is to make the underlying stream iterator to do it for us. So we are reading (or rather peeking into) + // the first line of the track data file just to get the ROD object created. ROD r = null; if (this.it.hasNext()) r = this.it.element(); name = (r==null?null:r.getName()); @@ -98,6 +104,21 @@ public class SeekableRODIterator implements I return it.hasNext(); } + // Returns point location (i.e. genome loc of length 1) on the reference, to which this iterator will advance + // upon next call to next(). + public GenomeLoc peekNextLocation() { + if ( curr_position + 1 <= max_position ) return GenomeLocParser.createGenomeLoc(curr_contig,curr_position+1); + + // sorry, next reference position is not covered by the RODs we are currently holding. In this case, + // the location we will jump to upon next call to next() is the start of the next ROD record that we did + // not read yet: + if ( it.hasNext() ) { + ROD r = it.element(); // peek, do not load! + return GenomeLocParser.createGenomeLoc(r.getLocation().getContigIndex(),r.getLocation().getStart()); + } + return null; // underlying iterator has no more records, there is no next location! + } + /** Advances iterator to the next genomic position that has ROD record(s) associated with it, * and returns all the records overlapping with that position as a RODList. The location of the whole * RODList object will be set to the smallest interval subsuming genomic intervals of all returned records. @@ -110,7 +131,7 @@ public class SeekableRODIterator implements I curr_position++; // curr_query_end = -1; - + if ( curr_position <= max_position ) { // we still have bases covered by at least one currently loaded record; @@ -161,7 +182,7 @@ public class SeekableRODIterator implements I // 'records' and current position are fully updated. Last, we need to set the location of the whole track // (collection of ROD records) to the genomic site we are currently looking at, and return the list - return new RODRecordList(name,records,GenomeLocParser.createGenomeLoc(curr_contig,curr_position)); + return new RODRecordList(name,records, GenomeLocParser.createGenomeLoc(curr_contig,curr_position)); } /** @@ -182,6 +203,20 @@ public class SeekableRODIterator implements I throw new UnsupportedOperationException("SeekableRODIterator does not implement remove() operation"); } + /** + * + */ + public GenomeLoc lastQueryLocation() { + if ( curr_contig < 0 ) return null; + if ( curr_query_end > curr_position ) { + return GenomeLocParser.createGenomeLoc(curr_contig,curr_position,curr_query_end); + } + else { + return GenomeLocParser.createGenomeLoc(curr_contig,curr_position); + } + + } + /** * Returns the current "position" (not location!! ;) ) of this iterator. This method is used by the sharding * system when it searches for available iterators in the pool that can be reused to resume traversal. @@ -275,7 +310,7 @@ public class SeekableRODIterator implements I if ( r.getLocation().getStop() < curr_position ) continue; // did not reach the requested interval yet - if ( r.getLocation().getStart() > interval.getStop() ) { + if ( r.getLocation().getStart() > curr_query_end ) { // past the query interval it.pushback(r); break; @@ -309,4 +344,5 @@ public class SeekableRODIterator implements I } } + }