diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java new file mode 100644 index 000000000..a770a950a --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java @@ -0,0 +1,219 @@ +package org.broadinstitute.sting.gatk.refdata; + +import org.broadinstitute.sting.utils.xReadLines; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.MalformedGenomeLocException; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; + +import java.util.Iterator; +import java.util.regex.Pattern; +import java.io.FileNotFoundException; +import java.io.File; +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Constructor; + +/** + * This is a low-level iterator designed to provide system-wide generic support for reading record-oriented data + * files. The only assumption made is that every line in the file provides a complete and separate data record. The records + * can be associated with coordinates or coordinate intervals, there can be one or more records associated with a given + * position/interval, or intervals can overlap. The records must be comprised of delimited fields, but the format is + * otherwise free. For any specific line-based data format, an appropriate implementation of ReferenceOrderedDatum must be + * provided that is capable of parsing itself from a single line of data. This implementation will be used, + * through reflection mechanism, as a callback to do all the work. + * + * The model is, hence, as follows: + * + * String dataRecord <---> RodImplementation ( ::parseLine(dataRecord.split(delimiter)) is aware of the format and fills + * an instance of RodImplementation with data values from dataRecord line). + * + * + * instantiation of RODRecordIterator(dataFile, trackName, RodImplementation.class) will immediately provide an iterator + * that walks along the dataFile line by line, and on each call to next() returns a new RodImplementation object + * representing a single line (record) of data. The returned object will be initialized with "track name" trackName - + * track names (as returned by ROD.getName()) are often used in other parts of the code to distinguish between + * multiple streams of (possibly heterogeneous) annotation data bound to an application. + * + * This generic iterator skips and ignores a) empty lines, b) lines starting with '#' (comments): they are never sent back + * to the ROD implementation class for processing. + * + * This iterator does not actually check if the ROD records (lines) in the file are indeed ordedered by coordinate, + * and it does not depend on such an order as it still implements a low-level line-based traversal of the data. Higher-level + * iterators/wrappers will perform all the necessary checks. + * + * Note: some data formats/ROD implementations may require a header line in the file. In this case the current (ugly) + * mechanism is as follows: + * 1) rod implementation's ::initialize(file) method should be able to open the file, find and read the header line + * and return the header object (to be kept by the iterator) + * 2) rod implementation's ::parseLine(header,line) method should be capable of making use of that saved header object now served to it + * and + * 3) ::parseLine(header,line) should be able to recognize the original header line in the file and skip it (after ROD's initialize() + * method is called, the iterator will re-open the file and start reading it from the very beginning; there is no + * other way, except for "smart" ::parseLine(), to avoid reading in the header line as "data"). + * + * Created by IntelliJ IDEA. + * User: asivache + * Date: Sep 10, 2009 + * Time: 1:22:23 PM + * To change this template use File | Settings | File Templates. + */ +public class RODRecordIterator implements Iterator { + + private PushbackIterator reader; + + // stores name of the track this iterator reads (will be also returned by getName() of ROD objects + // generated by this iterator) + private String name; + + // we keep the file object, only to use file name in error reports + private File file; + + // rod type; this is what we will instantiate for RODs at runtime + private Class type; + + private Object header = null; // Some RODs may use header + + // field delimiter in the file. Should it be the job of the iterator to split the lines though? RODs can do that! + private String fieldDelimiter; + + // constructor for the ROD objects we are going to return. Constructor that takes the track name as its single arg is required. + private Constructor named_constructor; + + // keep track of the lines we are reading. used for error messages only. + private long linenum = 0; + + private boolean allow_empty = true; + private boolean allow_comments = true; + public static Pattern EMPTYLINE_PATTERN = Pattern.compile("^\\s*$"); + + public RODRecordIterator(File file, String name, Class type) { + try { + reader = new PushbackIterator(new xReadLines(file)); + } catch (FileNotFoundException e) { + Utils.scareUser("Couldn't open file: " + file); + } + this.file = file; + this.name = name; + this.type = type; + try { + named_constructor = type.getConstructor(String.class); + } + catch (java.lang.NoSuchMethodException e) { + throw new StingException("ROD class "+type.getName()+" does not have constructor that accepts a single String argument (track name)"); + } + ROD rod = instantiateROD(name); + fieldDelimiter = rod.delimiterRegex(); // get delimiter from the ROD itself + try { + header = rod.initialize(file); + } catch (FileNotFoundException e) { + throw new StingException("ROD "+type.getName() + " failed to initialize properly from file "+file); + } + + } + + + /** + * Returns true if the iteration has more elements. (In other + * words, returns true if next would return an element + * rather than throwing an exception.) + * + * @return true if the iterator has more elements. + */ + public boolean hasNext() { + if ( allow_empty || allow_comments ) { + while ( reader.hasNext() ) { + String line = reader.next(); + if ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ) continue; // skip empty line + if ( allow_comments && line.charAt(0) == '#' ) continue; // skip comment lines + // the line is not empty and not a comment line, so we have next after all + reader.pushback(line); + return true; + } + return false; // oops, we end up here if there's nothing left + } else { + return reader.hasNext(); + } + } + + /** + * Returns the next valid ROD record in the file, skipping empty and comment lines. + * + * @return the next element in the iteration. + * @throws java.util.NoSuchElementException + * iteration has no more elements. + */ + public ROD next() { + ROD n = null; + boolean parsed_ok = false; + String line ; + + while ( ! parsed_ok && reader.hasNext() ) { + line = reader.next(); + linenum++; + while ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() || + allow_comments && line.charAt(0) == '#' ) { + if ( reader.hasNext() ) { + line = reader.next(); + linenum++; + } else { + line = null; + break; + } + } + + if ( line == null ) break; // if we ran out of lines while skipping empty lines/comments, then we are done + + String parts[] = line.split(fieldDelimiter); + + try { + n = instantiateROD(name); + parsed_ok = n.parseLine(header,parts) ; + } + catch ( Exception e ) { + throw new StingException("Failed to parse ROD data ("+type.getName()+") from file "+ file + " at line #"+linenum+ + "\nOffending line: "+line+ + "\nReason ("+e.getClass().getName()+"): "+e.getMessage()); + } + } + + + return n; + } + + /** + * Removes from the underlying collection the last element returned by the + * iterator (optional operation). This method can be called only once per + * call to next. The behavior of an iterator is unspecified if + * the underlying collection is modified while the iteration is in + * progress in any way other than by calling this method. + * + * @throws UnsupportedOperationException if the remove + * operation is not supported by this Iterator. + * @throws IllegalStateException if the next method has not + * yet been called, or the remove method has already + * been called after the last call to the next + * method. + */ + public void remove() { + throw new UnsupportedOperationException("remove() operation is not supported by RODRecordIterator"); + } + + /** Instantiates appropriate implementation of the ROD used by this iteratot. The 'name' argument is the name + * of the ROD track. + * @param name + * @return + */ + private ROD instantiateROD(final String name) { + try { + return (ROD) named_constructor.newInstance(name); + } catch (java.lang.InstantiationException e) { + throw new StingException("Failed to instantiate ROD object of class "+type.getName()); + } catch (java.lang.IllegalAccessException e) { + throw new StingException("Access violation attempt while instantiating ROD object of class "+type.getName()); + } catch (InvocationTargetException e) { + throw new StingException("InvocationTargetException: Failed to instantiate ROD object of class "+type.getName()); + } + } + +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordList.java b/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordList.java new file mode 100644 index 000000000..74e3a7a10 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordList.java @@ -0,0 +1,118 @@ +package org.broadinstitute.sting.gatk.refdata; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLocParser; + +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: asivache + * Date: Sep 10, 2009 + * Time: 6:10:48 PM + * To change this template use File | Settings | File Templates. + */ +public class RODRecordList implements Iterable, Comparable>, Cloneable { + private List records; + private GenomeLoc location = null; + private String name = null; + + private RODRecordList() {} // dummy constructor for internal use; does not initialize/allocate anything + + public RODRecordList(String name) { + records = new ArrayList(); + this.name = name; + } + + /** + * Fully qualified constructor: instantiates a new RODRecordList object with specified ROD track name, location on the + * reference, and list of associated RODs. This is a knee-deep COPY constructor: passed name, loc, and data element + * objects will be referenced from the created RODRecordList (so that changing them from outside will affect data + * in this object), however, the data elements will be copied into a newly + * allocated list, so that the 'data' collection argument can be modified afterwards without affecting the state + * of this record list. WARNING: this constructor is (semi-)validating: passed name and location + * are allowed to be nulls (although it maybe unsafe, use caution), but if they are not nulls, then passed non-null ROD data + * elements must have same track name, and their locations must overlap with the passed 'location' argument. Null + * data elements or null 'data' collection argument are allowed as well. + * @param name + * @param data + * @param loc + */ + public RODRecordList(String name, Collection data, GenomeLoc loc) { + this.records = new ArrayList(data==null?0:data.size()); + this.name = name; + this.location = loc; + if ( data == null || data.size() == 0 ) return; // empty dataset, nothing to do + for ( ROD r : data ) { + records.add(r); + if ( r == null ) continue; + if ( ! this.name.equals(r.getName() ) ) { + throw new StingException("Attempt to add ROD with non-matching name "+r.getName()+" to the track "+name); + } + if ( location != null && ! location.overlapsP(r.getLocation()) ) { + throw new StingException("Attempt to add ROD that lies outside of specified interval "+location+"; offending ROD:\n"+r.toString()); + } + } + } + + + public GenomeLoc getLocation() { return location; } + public void setLocation(GenomeLoc location) { this.location = location; } + public String getName() { return name; } + public List getRecords() { return records; } + public Iterator iterator() { return records.iterator() ; } + public void clear() { records.clear(); } + public void add(ROD record) { + if ( record != null ) { + if ( ! name.equals(record.getName() ) ) + throw new StingException("Attempt to add ROD with non-matching name "+record.getName()+" to the track "+name); + } + records.add(record); + } + public int size() { return records.size() ; } + + /** + * Compares this object with the specified object for order. Returns a + * negative integer, zero, or a positive integer as this object is less + * than, equal to, or greater than the specified object. + *

+ *

The implementor must ensure sgn(x.compareTo(y)) == + * -sgn(y.compareTo(x)) for all x and y. (This + * implies that x.compareTo(y) must throw an exception iff + * y.compareTo(x) throws an exception.) + *

+ *

The implementor must also ensure that the relation is transitive: + * (x.compareTo(y)>0 && y.compareTo(z)>0) implies + * x.compareTo(z)>0. + *

+ *

Finally, the implementor must ensure that x.compareTo(y)==0 + * implies that sgn(x.compareTo(z)) == sgn(y.compareTo(z)), for + * all z. + *

+ *

It is strongly recommended, but not strictly required that + * (x.compareTo(y)==0) == (x.equals(y)). Generally speaking, any + * class that implements the Comparable interface and violates + * this condition should clearly indicate this fact. The recommended + * language is "Note: this class has a natural ordering that is + * inconsistent with equals." + *

+ *

In the foregoing description, the notation + * sgn(expression) designates the mathematical + * signum function, which is defined to return one of -1, + * 0, or 1 according to whether the value of + * expression is negative, zero or positive. + * + * @param o the object to be compared. + * @return a negative integer, zero, or a positive integer as this object + * is less than, equal to, or greater than the specified object. + * @throws ClassCastException if the specified object's type prevents it + * from being compared to this object. + */ + public int compareTo(RODRecordList that) { +// if ( this.getLocation() == null ) { + // if ( that.getLocation() == null ) + // } + return getLocation().compareTo(that.getLocation()); //To change body of implemented methods use File | Settings | File Templates. + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java new file mode 100644 index 000000000..8289acc21 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java @@ -0,0 +1,311 @@ +package org.broadinstitute.sting.gatk.refdata; + +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.StingException; + +import java.util.Iterator; +import java.util.List; +import java.util.LinkedList; + +/** + * Wrapper class for iterators over ROD objects. It is assumed that the underlying iterator can only + * perform standard next() operation, which advances it to the next ROD in the stream (i.e. reads the data file + * line by line). This iterator 1) shifts the focus from record-based traversal to position-based traversal, + * and 2) adds querying seekForward() method. + * + * Namely, this iterator's next() method advances not to the next ROD in the underlying stream, but to the next + * genomic position covered by (at least one) ROD, and returns all RODs overlapping with that position as a RODRecordList + * collection-like object. Similarly, when seekForward(interval) is called, this iterator skips all the RODs from the + * underlying stream, until it reaches specified genomic interval, and returns the list of all RODs overlapping with that interval. + * + * NOTE: this iterator has a STATE: next() operation is not allowed after a seekForward() to a non-point (extended) interval + * of length > 1. Such a call would leave the iterator in an inconsistent state. seekForward() can always be called after + * either seekForward() or next() (as long as usual ordering criteria are satisfied: the query interval location can neither + * start before the current position, nor end before the previous query end). seekForward to an interval of length 1 + * reenables next() operation. + * + * Created by IntelliJ IDEA. + * User: asivache + * Date: Sep 10, 2009 + * Time: 6:20:46 PM + * To change this template use File | Settings | File Templates. + */ +public class SeekableRODIterator implements Iterator > { + private PushbackIterator it; + List records = null; // here we will keep a pile of records overlaping with current position; when we iterate + // and step out of record's scope, we purge it from the list + String name = null; // name of the ROD track wrapped by this iterator. Will be pulled from underlying iterator. + + long curr_position = 0; // where the iterator is currently positioned on the genome + long max_position = 0; // the rightmost stop position of currently loaded records + int curr_contig = -1; // what contig the iterator is currently on + boolean next_is_allowed = true; // see discussion below. next() is illegal after seek-forward queries of length > 1 + + // the stop position of the last query. We can query only in forward direction ("seek forward"); + // it is not only the start position of every successive query that can not be before the start + // of the previous one (curr_start), but it is also illegal for a query interval to *end* before + // the end of previous query, otherwise we can end up in an inconsistent state + long curr_query_end = -1; + + // EXAMPLE of inconsistency curr_query_end guards against: + // record 1 record 2 + // ---------- ----------- + // -------------------------------------------------- REF + // ------------------------- query 1 (interval 1) + // ---------- query 2 (interval 2) + // --------------- query 3 + // + // If we query first for interval 1, both record 1 and record 2 will be loaded. + // Query for interval 2, on the other hand, should return only record 1, but after + // query 1 was performed, record 2 is already loaded from the file. If, on the other hand, + // we try to un-load it from memory, we won't be able to read it again. Hence query 2 is not + // allowed after query 1. Note also, that curr_query_end is not equivalent to max_position: + // the latter only tracks where currently loaded records end (and hence helps to re-load records); + // after query 1 is performed, max_position will be the end of record 2, but query 3 is still + // perfectly legal after query 1. + // + // IMPORTANT NOTE: it follows from the above discussion and example that next() is illegal after ANY + // seek-forward query EXCEPT those that are performed with length-1 intervals (queryInterval.start=queryinteval.stop). + // Indeed, in the example above, after, e.g., query 1 is performed, the iterator is "located" at the start + // of interval 1, but record1 and record 2 are already loaded. On the other hand, a subsequent call to next() would + // need to shift iterator's position by 1 base and return only record 1. + // + // This implementation tracks the query history and makes next() illegal after a seekforward query of length > 1, + // but re-enables next() again after a length-1 query. + + public SeekableRODIterator(Iterator it) { + this.it = new PushbackIterator(it); + records = new LinkedList(); + ROD r = this.it.peek(); + name = (r==null?null:r.getName()); + } + + /** + * Returns true if the data we iterate over has records associated with (any, not necessarily adjacent) + * genomic position farther along the reference. + * @return + */ + public boolean hasNext() { + + // if we did not walk to the very end of the interval(s) covered by currently loaded + // annotations (records), then we definitely have data for next genomic location + if ( curr_position < max_position ) return true; + + // we are past currently loaded stuff; we have next if there are more lines to load: + return it.hasNext(); + } + + /** Advances iterator to the next genomic position that has ROD record(s) associated with it, + * and returns all the records overlapping with that position as a RODList. The location of the whole + * RODList object will be set to the smallest interval subsuming genomic intervals of all returned records. + * Note that next() is disabled (will throw an exception) after seekForward() operation with query length > 1. + * @return list of all RODs overlapping with the next "covered" genomic position + */ + public RODRecordList next() { + if ( ! next_is_allowed ) + throw new StingException("Illegal use of iterator: Can not advance iterator with next() after seek-forward query of length > 1"); + + curr_position++; + // curr_query_end = -1; + + if ( curr_position <= max_position ) { + + // we still have bases covered by at least one currently loaded record; + // we have to purge only subset of records, on which we moved past the end + purgeOutOfScopeRecords(); + } else { + // ooops, we are past the end of all loaded records - kill them all at once, + // load next record and reinitialize by fastforwarding current position to the start of next record + records.clear(); + ROD r = it.next(); // if hasNext() previously returned true, we are guaranteed that this call to reader.next() is safe + records.add( r ); + curr_contig = r.getLocation().getContigIndex(); + curr_position = r.getLocation().getStart(); + max_position = r.getLocation().getStop(); + } + + // current position is ste and at this point 'records' only keeps those annotations, on which we did not reach the end yet + // (we might have reloaded records completely if it was necessary); but we are not guaranteed yet that we + // hold ALL the records overlapping with the current position. Time to check if we just walked into the interval(s) + // covered by new records, so we need to load them too: + + while ( it.hasNext() ) { + ROD r = it.peek(); + if ( r == null ) { + it.next(); + continue; + } + int that_contig = r.getLocation().getContigIndex(); + if ( curr_contig > that_contig ) + throw new StingException("SeekableRODIterator: contig " +r.getLocation().getContig() + + " occurs out of order in track " + r.getName() ); + if ( curr_contig < that_contig ) break; // next record is on a higher contig, we do not need it yet... + + if ( r.getLocation().getStart() < curr_position ) + throw new StingException("SeekableRODIterator: track "+r.getName() + + " is out of coordinate order on contig "+r.getLocation().getContig()); + + if ( r.getLocation().getStart() > curr_position ) break; // next record starts after the current position; we do not need it yet + + r = it.next(); // we got here only if we do need next record, time to load it for real + + long stop = r.getLocation().getStop(); + if ( stop < curr_position ) throw new StingException("DEBUG: encountered contig that should have been loaded earlier"); // this should never happen + if ( stop > max_position ) max_position = stop; // max_position keeps the rightmost stop position across all loaded records + records.add(r); + } + + // 'records' and current position are fully updated. Last, we need to set the location of the whole track + // (collection of ROD records) to the genomic site we are currently looking at, and return the list + + return new RODRecordList(name,records,GenomeLocParser.createGenomeLoc(curr_contig,curr_position)); + } + + /** + * Removes from the underlying collection the last element returned by the + * iterator (optional operation). This method can be called only once per + * call to next. The behavior of an iterator is unspecified if + * the underlying collection is modified while the iteration is in + * progress in any way other than by calling this method. + * + * @throws UnsupportedOperationException if the remove + * operation is not supported by this Iterator. + * @throws IllegalStateException if the next method has not + * yet been called, or the remove method has already + * been called after the last call to the next + * method. + */ + public void remove() { + throw new UnsupportedOperationException("SeekableRODIterator does not implement remove() operation"); + } + + /** + * Returns the current "position" (not location!! ;) ) of this iterator. This method is used by the sharding + * system when it searches for available iterators in the pool that can be reused to resume traversal. + * When iterator is advanced using next(), current position + * is the same as 'location'. However, after a seekForward() query with extended interval, returned position + * will be set to the last position of the query interval, to disable (illegal) attempts to roll the iterator + * back and re-start traversal from current location. + * @return Current ending position of the iterator, or null if no position exists. + */ + public GenomeLoc position() { + if ( curr_contig < 0 ) return null; + if ( curr_query_end > curr_position ) { + // do not attempt to reuse this iterator if the position we need it for lies before the end of last query performed + return GenomeLocParser.createGenomeLoc(curr_contig,curr_query_end,curr_query_end); + } + else { + return GenomeLocParser.createGenomeLoc(curr_contig,curr_position); + } + } + + /** + * Seeks forward through the file until the specified interval is reached. + * The location object interval can be either a single point or an extended interval. All + * ROD records overlapping with the whole interval will be returned, or null if no such records exist. + * + * Query interval must start at or after the iterator's current location, or exception will be thrown. + * + * Query interval must end at or after the stop position of the previous query, if any, or an exception will + * be thrown: subsequent queries that end before the stop of previous ones are illegal. + * + * If seekForward() is performed to an extended (length > 1 i.e. start != stop) interval, next() operation becomes + * illegal (the iterator changes state). Only seekForward() calls are allowed thereafter, until a seekForward() call + * to a length-1 interval is performed, which re-enables next(). seekForward() queries with length-1 intervals can + * always be safely intermixed with next() (as long as ordering is respected and query intervals are at or after the + * current position). + * + * Note that in contrast to + * next() (which always advances current position of the iterator on the reference), this method scrolls + * forward ONLY if the specified interval is ahead of the current location of + * the iterator. However, if called again with the same 'interval' argument as before, seekForward will NOT + * advance, but will simply return the same ROD list as before. + * + * + * @param interval point-like genomic location to fastforward to. + * @return ROD object at (or overlapping with) the specified position, or null if no such ROD exists. + */ + public RODRecordList seekForward(GenomeLoc interval) { + + if ( interval.getContigIndex() < curr_contig ) + throw new StingException("Out of order query: query contig "+interval.getContig()+" is located before "+ + "the iterator's current contig"); + if ( interval.getContigIndex() == curr_contig ) { + if ( interval.getStart() < curr_position ) + throw new StingException("Out of order query: query position "+interval.getStart()+" is located before "+ + "the iterator's current position "+curr_position); + if ( interval.getStop() < curr_query_end ) + throw new StingException("Unsupported querying sequence: current query interval " + + interval+" ends before the end of previous query interval ("+curr_query_end+")"); + } + + curr_position = interval.getStart(); + curr_query_end = interval.getStop(); + + next_is_allowed = ( curr_position == curr_query_end ); // we can call next() later only if interval length is 1 + + if ( interval.getContigIndex() == curr_contig && curr_position <= max_position ) { + // some of the intervals we are currently keeping do overlap with the query interval + + purgeOutOfScopeRecords(); + } else { + // clean up and get ready for fast-forwarding towards the requested position + records.clear(); + max_position = -1; + curr_contig = interval.getContigIndex(); + } + + // curr_contig and curr_position are set to where we asked to scroll to + + while ( it.hasNext() ) { + ROD r = it.next(); + if ( r == null ) continue; + int that_contig = r.getLocation().getContigIndex(); + + if ( curr_contig > that_contig ) continue; // did not reach requested contig yet + if ( curr_contig < that_contig ) { + it.pushback(r); // next record is on the higher contig, we do not need it yet... + break; + } + + // we get here if we are on the requested contig: + + if ( r.getLocation().getStop() < curr_position ) continue; // did not reach the requested interval yet + + if ( r.getLocation().getStart() > interval.getStop() ) { + // past the query interval + it.pushback(r); + break; + } + + // we get here only if interval of the record r overlaps with query interval, so the record should be loaded + if ( r.getLocation().getStop() > max_position ) max_position = r.getLocation().getStop(); + records.add(r); + } + + if ( records.size() > 0 ) { + return new RODRecordList(name,records,interval.clone()); + } else { + return null; + } + + } + + /** + * Removes records that end before the curr_position from the list of currently kept records. This is a + * convenience (private) shortcut that does not perform extensive checking. In particular, it assumes that + * curr_position <= max_position, as well as that we are still on the same contig. + */ + private void purgeOutOfScopeRecords() { + Iterator i = records.iterator(); + while ( i.hasNext() ) { + ROD r = i.next(); + if ( r.getLocation().getStop() < curr_position ) { + i.remove(); // we moved past the end of interval the record r is associated with, purge the record forever + } + } + + } +}