diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java index 2da895d01..d17dc06ce 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RODIterator; +import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.GenomeLoc; @@ -43,7 +43,7 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView { public ManagingReferenceOrderedView( ShardDataProvider provider ) { //this.provider = provider; for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) - states.add( new ReferenceOrderedDataState( dataSource, (RODIterator)dataSource.seek(provider.getShard()) ) ); + states.add( new ReferenceOrderedDataState( dataSource, (SeekableRODIterator)dataSource.seek(provider.getShard()) ) ); provider.register(this); } @@ -78,9 +78,9 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView { */ private class ReferenceOrderedDataState { public final ReferenceOrderedDataSource dataSource; - public final RODIterator iterator; + public final SeekableRODIterator iterator; - public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, RODIterator iterator ) { + public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, SeekableRODIterator iterator ) { this.dataSource = dataSource; this.iterator = iterator; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index d9483d2bc..14ead32fb 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -1,8 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RODIterator; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLoc; @@ -37,11 +35,11 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { /** * The data sources along with their current states. */ - private MergingIterator rodQueue = null; + private MergingIterator> rodQueue = null; RefMetaDataTracker tracker = null; GenomeLoc lastLoc = null; - ReferenceOrderedDatum interval = null; + RODRecordList interval = null; // broken support for multi-locus rods //List multiLocusRODs = new LinkedList(); @@ -63,11 +61,11 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { GenomeLoc loc = provider.getShard().getGenomeLoc(); - List> iterators = new LinkedList>(); + List< Iterator> > iterators = new LinkedList< Iterator> >(); for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) { if ( DEBUG ) System.out.printf("Shard is %s%n", loc); - RODIterator it = (RODIterator)dataSource.seek(provider.getShard()); - ReferenceOrderedDatum x = it.seekForward(loc); + SeekableRODIterator it = (SeekableRODIterator)dataSource.seek(provider.getShard()); + RODRecordList x = it.seekForward(loc); // we need to special case the interval so we don't always think there's a rod at the first location if ( dataSource.getName().equals(INTERVAL_ROD_NAME) ) { @@ -75,11 +73,11 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { throw new RuntimeException("BUG: interval local variable already assigned " + interval); interval = x; } else { - iterators.add( (Iterator)it ); + iterators.add( it ); } } - rodQueue = new MergingIterator(iterators); + rodQueue = new MergingIterator>(iterators); } public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) { @@ -90,7 +88,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { if ( ! rodQueue.hasNext() ) return false; else { - ReferenceOrderedDatum peeked = rodQueue.peek(); + RODRecordList peeked = rodQueue.peek(); return ! peeked.getLocation().isPast(shard.getGenomeLoc()); } } @@ -102,14 +100,14 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { */ public AlignmentContext next() { if ( DEBUG ) System.out.printf("In RodLocusView.next()...%n"); - ReferenceOrderedDatum datum = rodQueue.next(); + RODRecordList datum = rodQueue.next(); if ( DEBUG ) System.out.printf("In RodLocusView.next(); datum = %s...%n", datum.getLocation()); if ( DEBUG ) System.out.printf("In RodLocusView.next(): creating tracker...%n"); // Update the tracker here for use - Collection allRODsHere = getSpanningRods(datum); - tracker = createTracker(allRODsHere); + Collection> allTracksHere = getSpanningTracks(datum); + tracker = createTracker(allTracksHere); GenomeLoc rodSite = datum.getLocation(); GenomeLoc site = GenomeLocParser.createGenomeLoc( rodSite.getContigIndex(), rodSite.getStart(), rodSite.getStart()); @@ -122,11 +120,11 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { return new AlignmentContext(site, new ArrayList(), new ArrayList(), skippedBases); } - private RefMetaDataTracker createTracker( Collection allRodsHere ) { + private RefMetaDataTracker createTracker( Collection> allTracksHere ) { RefMetaDataTracker t = new RefMetaDataTracker(); - for ( ReferenceOrderedDatum element : allRodsHere ) { - if ( ! t.hasROD(element.getName()) ) - t.bind(element.getName(), element); + for ( RODRecordList track : allTracksHere ) { + if ( ! t.hasROD(track.getName()) ) + t.bind(track.getName(), track); } // special case the interval again -- add it into the ROD @@ -135,7 +133,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { return t; } - private Collection getSpanningRods(ReferenceOrderedDatum marker) { + private Collection> getSpanningTracks(ReferenceOrderedDatum marker) { + RODRecordList wrapper = new RODRecordList(marker.getName(),Collections.singletonList(marker)); + return rodQueue.allElementsLTE(wrapper); + } + + private Collection> getSpanningTracks(RODRecordList marker) { return rodQueue.allElementsLTE(marker); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index a4ce5039e..e73458fbc 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.refdata.RODIterator; +import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; @@ -59,7 +59,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * @return Iterator through the data. */ public Iterator seek( Shard shard ) { - RODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(shard.getGenomeLoc()) ); + SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(shard.getGenomeLoc()) ); return iterator; } @@ -67,7 +67,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * Close the specified iterator, returning it to the pool. * @param iterator Iterator to close. */ - public void close( RODIterator iterator ) { + public void close( SeekableRODIterator iterator ) { this.iteratorPool.release(iterator); } @@ -76,7 +76,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { /** * A pool of reference-ordered data iterators. */ -class ReferenceOrderedDataPool extends ResourcePool { +class ReferenceOrderedDataPool extends ResourcePool { private final ReferenceOrderedData rod; public ReferenceOrderedDataPool( ReferenceOrderedData rod ) { @@ -88,7 +88,7 @@ class ReferenceOrderedDataPool extends ResourcePool { * to be completely independent of any other iterator. * @return The newly created resource. */ - public RODIterator createNewResource() { + public SeekableRODIterator createNewResource() { return rod.iterator(); } @@ -99,13 +99,13 @@ class ReferenceOrderedDataPool extends ResourcePool { * @param resources @{inheritedDoc} * @return @{inheritedDoc} */ - public RODIterator selectBestExistingResource( DataStreamSegment segment, List resources ) { + public SeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List resources ) { if( !(segment instanceof MappedStreamSegment) ) throw new StingException("Reference-ordered data cannot utilitize unmapped segments."); GenomeLoc position = ((MappedStreamSegment)segment).locus; - for( RODIterator iterator: resources ) { + for( SeekableRODIterator iterator: resources ) { if( (iterator.position() == null && iterator.hasNext()) || (iterator.position() != null && iterator.position().isBefore(position)) ) return iterator; @@ -116,14 +116,14 @@ class ReferenceOrderedDataPool extends ResourcePool { /** * In this case, the iterator is the resource. Pass it through. */ - public RODIterator createIteratorFromResource( DataStreamSegment segment, RODIterator resource ) { + public SeekableRODIterator createIteratorFromResource( DataStreamSegment segment, SeekableRODIterator resource ) { return resource; } /** * Don't worry about closing the resource; let the file handles expire naturally for the moment. */ - public void closeResource( RODIterator resource ) { + public void closeResource( SeekableRODIterator resource ) { } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RODIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/RODIterator.java deleted file mode 100755 index f689076af..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RODIterator.java +++ /dev/null @@ -1,149 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.util.Iterator; - -/** - * Adapter (decorator) class for rod iterators. The "raw" rod iterator wrapped into this class - * should be capable of reading the underlying ROD data file and iterating over successive - * genomic locations. The purpose of this adapter is to provide additional seekForward() method: - * upon a call to this method, the decorated iterator will fastforward to the specified position. - * NOTE 1: if a particular ROD data file is allowed to have multiple records (lines) - * associated with the same location, the "raw" iterator must be capable of dealing with this situation - * by loading all such records at once on a call to next(). - * NOTE 2: the object represented by this class is still a unidirectional iterator: after a call to seekForward(), - * subsequent calls to seekForward() or next() will work from the position the iterator was fastforwarded to. - * @author asivache - * - * @param - */ -public class RODIterator implements Iterator { - private PushbackIterator it; - private ROD current = null; - private GenomeLoc position = null; - - public RODIterator(Iterator it) { - this.it = new PushbackIterator(it); - } - - @Override - public boolean hasNext() { return it.hasNext(); } - - @Override - public ROD next() { - ROD next = it.next(); - if( next != null ) { - position = next.getLocation().clone(); - current = next; - } - return next; - } - -// @Override -// public boolean hasNext() { return current != null || it.hasNext(); } -// -// @Override -// public ROD next() { -// if ( current != null ) { -// ROD prev = current; -// current = null; -// return prev; -// } else { -// ROD next = it.next(); -// if( next != null ) { -// position = next.getLocation().clone(); -// //current = next; -// } -// -// return next; -// } -// } - - /** - * Returns the current position of this iterator. - * @return Current position of the iterator, or null if no position exists. - */ - public GenomeLoc position() { - return position; - } - - /** - * Seeks forward in the file until we reach (or cross) a record at contig / pos - * If we don't find anything and cross beyond contig / pos, we return null; - * subsequent call to next() will return the first record located after the specified - * position in this case. Otherwise, the first ROD record at or overlapping with - * the specified position is returned; the subsequent call to next() will return the - * next ROD record. - * - * NOTE 1: the location object loc should be a single point (not an interval); - * ROD locations, however, can be extended intervals, in which case first ROD that overlaps the specified - * position will be returned. - * - * NOTE 2: seekForward() is not exactly like next(): if we are strictly past a record, seekForward will not - * see it, but it will be returning the "current" record (i.e. the record returned by last call to next() or - * seekForward()) over and over again and will NOT advance the iterator for as long as the current record's location - * overlaps with the query position. - * - * @param loc point-like genomic location to fastforward to. - * @return ROD object at (or overlapping with) the specified position, or null if no such ROD exists. - */ - public ROD seekForward(final GenomeLoc loc) { - final boolean DEBUG = false; - - ROD result = null; - - //if (current != null && current.getName().equals("interval")) { - // boolean contains = current.getLocation().containsP(loc); - // System.out.printf(" %s : current is %s, seeking to %s, contains %b%n", current.getName(), current.getLocation(), loc, contains); - //} - - if ( current != null && current.getLocation().containsP(loc) ) - return current; - - if ( DEBUG ) System.out.printf(" *** starting seek to %s %d (contig %d) from current location %s %d%n", loc.getContig(), loc.getStart(), - loc.getContigIndex(),current==null?"null":current.getLocation().getContig(), current==null?-1:current.getLocation().getStart()); - while ( hasNext() ) { - ROD proposed = next(); - if( proposed == null ) - continue; - //System.out.printf(" -> Seeking to %s %d AT %s %d%n", contigName, pos, current.getContig(), current.getStart()); - if ( DEBUG ) System.out.println(" proposed at "+proposed.getLocation()+"; contig index="+proposed.getLocation().getContigIndex()); - boolean containedP = proposed.getLocation().containsP(loc); - //System.out.printf(" %s -> Seeking to %s, at %s => contains = %b%n", current.getName(), loc, current.getLocation(), containedP); - int cmp = proposed.getLocation().compareTo(loc); - if ( cmp < 0 ) { - if ( DEBUG ) System.out.println(" we are before..."); - // current occurs before loc, continue searching - continue; - } - else if ( cmp == 0 || containedP ) { - if ( DEBUG ) System.out.println(" we found overlap..."); - result = proposed; - break; - } else { - if ( DEBUG ) System.out.println(" we are after..."); - // current is after loc - it.pushback(proposed); - break; - } - } - - if ( DEBUG ) { - if ( result != null ) - System.out.printf(" ### Found %s%n", result.getLocation()); - } - - // make a note that the iterator last seeked to the specified position - current = result; - position = loc.clone(); - - // we ran out of elements or found something - return result; - } - - public void remove() { - throw new UnsupportedOperationException(); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 79661b899..b5f314c96 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -2,9 +2,7 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; -import java.util.Collection; -import java.util.HashMap; -import java.util.LinkedList; +import java.util.*; /** * This class represents the Reference Metadata available at a particular site in the genome. It can be @@ -23,31 +21,68 @@ import java.util.LinkedList; * Time: 3:05:23 PM */ public class RefMetaDataTracker { - final HashMap map = new HashMap(); + final HashMap> map = new HashMap>(); protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class); /** - * Finds the reference meta data named name, if it exists, otherwise returns the defaultValue - * + * Finds the reference meta data named name, if it exists, otherwise returns the defaultValue. + * This is a legacy method that works with "singleton" tracks, in which a single ROD record can be associated + * with any given site. If track provides multiple records associated with a site, this method will return + * the first one. * @param name * @param defaultValue * @return */ + @Deprecated public ReferenceOrderedDatum lookup(final String name, ReferenceOrderedDatum defaultValue) { + //logger.debug(String.format("Lookup %s%n", name)); + final String luName = canonicalName(name); + if ( map.containsKey(luName) ) { + RODRecordList value = map.get(luName) ; + if ( value != null ) { + List l = value.getRecords(); + if ( l != null & l.size() > 0 ) return value.getRecords().get(0); + } + } + return defaultValue; + } + + /** + * Finds the reference metadata track named 'name' and returns all ROD records from that track associated + * with the current site as a RODRecordList collection object. If no data track with specified name is available, + * returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up + * with track name set to 'name' and location set to null; otherwise the wrapper object will have name and + * location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution, + * defaultValue.getLocation() may be not equal to what RODRecordList's location would be expected to be otherwise: + * for instance, on locus traversal, location is usually expected to be a single base we are currently looking at, + * regardless of the presence of "extended" RODs overlapping with that location). + * @param name + * @param defaultValue + * @return + */ + public RODRecordList getTrackData(final String name, ReferenceOrderedDatum defaultValue) { //logger.debug(String.format("Lookup %s%n", name)); final String luName = canonicalName(name); if ( map.containsKey(luName) ) return map.get(luName); - else - return defaultValue; - } + else { + if ( defaultValue == null ) { + return new RODRecordList(luName, Collections.singletonList(defaultValue), null); + } else { + return new RODRecordList(defaultValue.getName(), + Collections.singletonList(defaultValue), + defaultValue.getLocation()); + } + } + } /** * @see this.lookup * @param name * @param defaultValue * @return */ + @Deprecated public Object lookup(final String name, Object defaultValue) { final String luName = canonicalName(name); if ( map.containsKey(luName) ) @@ -68,7 +103,7 @@ public class RefMetaDataTracker { } /** - * Is there a binding at this site to a ROD with name? + * Is there a binding at this site to a ROD/track with the specified name? * * @param name the name of the rod * @return true if it has the rod @@ -78,39 +113,64 @@ public class RefMetaDataTracker { } /** - * Get all of the RODs at the current site + * Get all of the RODs at the current site. The collection is "flattened": for any track that has multiple records + * at the current site, they all will be added to the list as separate elements. * * @return */ public Collection getAllRods() { - return map.values(); + List l = new ArrayList(); + for ( RODRecordList rl : map.values() ) { + if ( rl == null ) continue; // how do we get null value stored for a track? shouldn't the track be missing from the map alltogether? + l.addAll(rl.getRecords()); + } + return l; + } /** - * Get all of the RODs at the current site + * Get all of the ROD tracks at the current site. Each track is returned as a single compound + * object (RODRecordList) that may contain multiple ROD records associated with the current site. * * @return */ - public Collection getBoundRods() { - LinkedList bound = new LinkedList(); + public Collection> getBoundRodTracks() { + LinkedList> bound = new LinkedList>(); - for ( ReferenceOrderedDatum value : map.values() ) { - if ( value != null ) - bound.add(value); + for ( RODRecordList value : map.values() ) { + if ( value != null && value.size() != 0 ) bound.add(value); } return bound; } + public Collection getBoundRodRecords() { + LinkedList bound = new LinkedList(); + + for ( RODRecordList valueList : map.values() ) { + for ( ReferenceOrderedDatum value : valueList ) { + if ( value != null ) + bound.add(value); + } + } + + return bound; + } /** - * Binds the reference ordered datum ROD to name at this site. Should be used only but the traversal + * Binds the list of reference ordered data records (RODs) to track name at this site. Should be used only by the traversal * system to provide access to RODs in a structured way to the walkers. * * @param name * @param rod */ + public void bind(final String name, RODRecordList rod) { + //logger.debug(String.format("Binding %s to %s", name, rod)); + map.put(canonicalName(name), rod); + } +/* public void bind(final String name, ReferenceOrderedDatum rod) { //logger.debug(String.format("Binding %s to %s", name, rod)); map.put(canonicalName(name), rod); } + */ } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java index 2371fd02e..9bec6e50f 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java @@ -1,14 +1,10 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.MalformedGenomeLocException; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.xReadLines; import java.io.*; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.*; @@ -20,13 +16,13 @@ import java.util.*; * Time: 10:47:14 AM * To change this template use File | Settings | File Templates. */ -public class ReferenceOrderedData implements Iterable { +public class ReferenceOrderedData implements Iterable> { private String name; private File file = null; - private String fieldDelimiter; +// private String fieldDelimiter; /** Header object returned from the datum */ - private Object header = null; +// private Object header = null; private Class type = null; // runtime type information for object construction @@ -180,8 +176,8 @@ public class ReferenceOrderedData implements this.file = file; this.type = type; this.name = name; - this.header = initializeROD(name, file, type); - this.fieldDelimiter = newROD(name, type).delimiterRegex(); +// this.header = initializeROD(name, file, type); +// this.fieldDelimiter = newROD(name, type).delimiterRegex(); } public String getName() { return name; } @@ -200,13 +196,13 @@ public class ReferenceOrderedData implements return this.name.equals(name) && type.isAssignableFrom(this.type); } - public RODIterator iterator() { + public SeekableRODIterator iterator() { Iterator it; try { Method m = type.getDeclaredMethod("createIterator", String.class, java.io.File.class); it = (Iterator) m.invoke(null, name, file); } catch (java.lang.NoSuchMethodException e) { - it = new SimpleRODIterator(); + it = new RODRecordIterator(file,name,type); } catch (java.lang.NullPointerException e) { throw new RuntimeException(e); } catch (java.lang.SecurityException e) { @@ -218,7 +214,8 @@ public class ReferenceOrderedData implements } catch (java.lang.reflect.InvocationTargetException e) { throw new RuntimeException(e); } - return new RODIterator(it); + // return new RODIterator(it); + return new SeekableRODIterator(it); } // ---------------------------------------------------------------------- @@ -227,10 +224,10 @@ public class ReferenceOrderedData implements // // ---------------------------------------------------------------------- public void testMe() { - for (ReferenceOrderedDatum rec : this) { - System.out.println(rec.toString()); + for (RODRecordList rec : this) { + System.out.println(rec.getRecords().get(0).toString()); - RodGenotypeChipAsGFF gff = (RodGenotypeChipAsGFF) rec; + RodGenotypeChipAsGFF gff = (RodGenotypeChipAsGFF) rec.getRecords().get(0); String[] keys = {"LENGTH", "ALT", "FOBARBAR"}; for (String key : keys) { System.out.printf(" -> %s is (%s)%n", key, gff.containsAttribute(key) ? gff.getAttribute(key) : "none"); @@ -246,8 +243,10 @@ public class ReferenceOrderedData implements // ---------------------------------------------------------------------- public ArrayList readAll() { ArrayList elts = new ArrayList(); - for (ReferenceOrderedDatum rec : this) { - elts.add(rec); + for ( RODRecordList l : this ) { + for (ReferenceOrderedDatum rec : l) { + elts.add(rec); + } } elts.trimToSize(); return elts; @@ -269,12 +268,14 @@ public class ReferenceOrderedData implements public boolean validateFile() throws Exception { ReferenceOrderedDatum last = null; - for (ReferenceOrderedDatum rec : this) { - if (last != null && last.compareTo(rec) == 1) { - // It's out of order - throw new Exception("Out of order elements at \n" + last.toString() + "\n" + rec.toString()); + for ( RODRecordList l : this ) { + for (ReferenceOrderedDatum rec : l) { + if (last != null && last.compareTo(rec) > 1) { + // It's out of order + throw new Exception("Out of order elements at \n" + last.toString() + "\n" + rec.toString()); + } + last = rec; } - last = rec; } return true; } @@ -288,103 +289,103 @@ public class ReferenceOrderedData implements // Iteration // // ---------------------------------------------------------------------- - private class SimpleRODIterator implements Iterator { - private xReadLines parser = null; - - public SimpleRODIterator() { - try { - parser = new xReadLines(file); - } catch (FileNotFoundException e) { - Utils.scareUser("Couldn't open file: " + file); - } - } - - public boolean hasNext() { - //System.out.printf("Parser has next: %b%n", parser.hasNext()); - return parser.hasNext(); - } - - public ROD next() { - ROD n = null; - boolean success = false; - boolean firstFailure = true; - - do { - final String line = parser.next(); - //System.out.printf("Line is '%s'%n", line); - String parts[] = line.split(fieldDelimiter); - - try { - n = parseLine(parts); - // Two failure conditions: - // 1) parseLine throws an exception. - // 2) parseLine returns null. - // 3) parseLine throws a RuntimeException. - // TODO: Clean this up so that all errors are handled in one spot. - success = (n != null); - } - catch (MalformedGenomeLocException ex) { - if (firstFailure) { - Utils.warnUser("Failed to parse contig on line '" + line + "'. The reason given was: " + ex.getMessage() + " Skipping ahead to the next recognized GenomeLoc. "); - firstFailure = false; - } - if (!parser.hasNext()) - Utils.warnUser("Unable to find more valid reference-ordered data. Giving up."); - } - - } while (!success && parser.hasNext()); - - return n; - } - - public void remove() { - throw new UnsupportedOperationException(); - } - } +// private class SimpleRODIterator implements Iterator { +// private xReadLines parser = null; +// +// public SimpleRODIterator() { +// try { +// parser = new xReadLines(file); +// } catch (FileNotFoundException e) { +// Utils.scareUser("Couldn't open file: " + file); +// } +// } +// +// public boolean hasNext() { +// //System.out.printf("Parser has next: %b%n", parser.hasNext()); +// return parser.hasNext(); +// } +// +// public ROD next() { +// ROD n = null; +// boolean success = false; +// boolean firstFailure = true; +// +// do { +// final String line = parser.next(); +// //System.out.printf("Line is '%s'%n", line); +// String parts[] = line.split(fieldDelimiter); +// +// try { +// n = parseLine(parts); +// // Two failure conditions: +// // 1) parseLine throws an exception. +// // 2) parseLine returns null. +// // 3) parseLine throws a RuntimeException. +// // TODO: Clean this up so that all errors are handled in one spot. +// success = (n != null); +// } +// catch (MalformedGenomeLocException ex) { +// if (firstFailure) { +// Utils.warnUser("Failed to parse contig on line '" + line + "'. The reason given was: " + ex.getMessage() + " Skipping ahead to the next recognized GenomeLoc. "); +// firstFailure = false; +// } +// if (!parser.hasNext()) +// Utils.warnUser("Unable to find more valid reference-ordered data. Giving up."); +// } +// +// } while (!success && parser.hasNext()); +// +// return n; +// } +// +// public void remove() { +// throw new UnsupportedOperationException(); +// } +// } // ---------------------------------------------------------------------- // // Parsing // // ---------------------------------------------------------------------- - private Constructor parsing_constructor; +// private Constructor parsing_constructor; - private ROD newROD(final String name, final Class type) { - try { - return (ROD) parsing_constructor.newInstance(name); - } catch (java.lang.InstantiationException e) { - throw new RuntimeException(e); - } catch (java.lang.IllegalAccessException e) { - throw new RuntimeException(e); - } catch (InvocationTargetException e) { - throw new RuntimeException(e); - } - } +// private ROD newROD(final String name, final Class type) { +// try { +// return (ROD) parsing_constructor.newInstance(name); +// } catch (java.lang.InstantiationException e) { +// throw new RuntimeException(e); +// } catch (java.lang.IllegalAccessException e) { +// throw new RuntimeException(e); +// } catch (InvocationTargetException e) { +// throw new RuntimeException(e); +// } +// } - private Object initializeROD(final String name, final File file, final Class type) { - try { - parsing_constructor = type.getConstructor(String.class); - } - catch (java.lang.NoSuchMethodException e) { - throw new RuntimeException(e); - } - ROD rod = newROD(name, type); - try { - return rod.initialize(file); - } catch (FileNotFoundException e) { - throw new RuntimeException(e); - } - } +// private Object initializeROD(final String name, final File file, final Class type) { +// try { +// parsing_constructor = type.getConstructor(String.class); +// } +// catch (java.lang.NoSuchMethodException e) { +// throw new RuntimeException(e); +// } +// ROD rod = newROD(name, type); +// try { +// return rod.initialize(file); +// } catch (FileNotFoundException e) { +// throw new RuntimeException(e); +// } +// } - private ROD parseLine(final String[] parts) { - //System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts)); - ROD obj = newROD(name, type); - try { - if (!obj.parseLine(header, parts)) - obj = null; - } catch (IOException e) { - throw new RuntimeException("Badly formed ROD: " + e); - } - return obj; - } +// private ROD parseLine(final String[] parts) { +// //System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts)); +// ROD obj = newROD(name, type); +// try { +// if (!obj.parseLine(header, parts)) +// obj = null; +// } catch (IOException e) { +// throw new RuntimeException("Badly formed ROD: " + e); +// } +// return obj; +// } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/CountRodWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/CountRodWalker.java index fe778235c..829bfbdb9 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/CountRodWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/CountRodWalker.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.RODRecordList; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -55,8 +56,8 @@ public class CountRodWalker extends RodWalker rods = new LinkedList(); - for ( ReferenceOrderedDatum rod : tracker.getBoundRods() ) { + Collection> rods = new LinkedList>(); + for ( RODRecordList rod : tracker.getBoundRodTracks() ) { //System.out.printf("Considering rod %s%n", rod); if ( rod.getLocation().getStart() == context.getLocation().getStart() && ! rod.getName().equals("interval") ) { // only consider the first element @@ -70,7 +71,7 @@ public class CountRodWalker extends RodWalker 0 ) { if ( verbose ) { List names = new ArrayList(); - for ( ReferenceOrderedDatum rod : rods ) { + for ( RODRecordList rod : rods ) { names.add(rod.getName()); } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java index 17e3e4d53..8ab81c41d 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java @@ -9,10 +9,7 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.TabularROD; -import org.broadinstitute.sting.gatk.refdata.RODIterator; +import org.broadinstitute.sting.gatk.refdata.*; import java.io.File; import java.io.FileNotFoundException; @@ -58,12 +55,12 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testCreateSingleIterator() { ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod); - RODIterator iterator = (RODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); + SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); - TabularROD datum = (TabularROD)iterator.next(); + TabularROD datum = (TabularROD)iterator.next().getRecords().get(0); assertTrue(datum.getLocation().equals(testSite1)); assertTrue(datum.get("COL1").equals("A")); @@ -79,36 +76,36 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testCreateMultipleIterators() { ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); - RODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); + SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); // Create a new iterator at position 2. - RODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) ); + SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); // Test out-of-order access: first iterator2, then iterator1. // Ugh...first call to a region needs to be a seek. - TabularROD datum = (TabularROD)iterator2.seekForward(testSite2); + TabularROD datum = (TabularROD)iterator2.seekForward(testSite2).getRecords().get(0); assertTrue(datum.getLocation().equals(testSite2)); assertTrue(datum.get("COL1").equals("C")); assertTrue(datum.get("COL2").equals("D")); assertTrue(datum.get("COL3").equals("E")); - datum = (TabularROD)iterator1.next(); + datum = (TabularROD)iterator1.next().getRecords().get(0); assertTrue(datum.getLocation().equals(testSite1)); assertTrue(datum.get("COL1").equals("A")); assertTrue(datum.get("COL2").equals("B")); assertTrue(datum.get("COL3").equals("C")); // Advance iterator2, and make sure both iterator's contents are still correct. - datum = (TabularROD)iterator2.next(); + datum = (TabularROD)iterator2.next().getRecords().get(0); assertTrue(datum.getLocation().equals(testSite3)); assertTrue(datum.get("COL1").equals("F")); assertTrue(datum.get("COL2").equals("G")); assertTrue(datum.get("COL3").equals("H")); - datum = (TabularROD)iterator1.next(); + datum = (TabularROD)iterator1.next().getRecords().get(0); assertTrue(datum.getLocation().equals(testSite2)); assertTrue(datum.get("COL1").equals("C")); assertTrue(datum.get("COL2").equals("D")); @@ -129,12 +126,12 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testIteratorConservation() { ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); - RODIterator iterator = (RODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); + SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); - TabularROD datum = (TabularROD)iterator.next(); + TabularROD datum = (TabularROD)iterator.next().getRecords().get(0); assertTrue(datum.getLocation().equals(testSite1)); assertTrue(datum.get("COL1").equals("A")); assertTrue(datum.get("COL2").equals("B")); @@ -149,7 +146,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); - datum = (TabularROD)iterator.seekForward(testSite3); + datum = (TabularROD)iterator.seekForward(testSite3).getRecords().get(0); assertTrue(datum.getLocation().equals(testSite3)); assertTrue(datum.get("COL1").equals("F")); assertTrue(datum.get("COL2").equals("G")); @@ -164,12 +161,12 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testIteratorCreation() { ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); - RODIterator iterator = (RODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite3) ); + SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); - TabularROD datum = (TabularROD)iterator.seekForward(testSite3); + TabularROD datum = (TabularROD)iterator.seekForward(testSite3).getRecords().get(0); assertTrue(datum.getLocation().equals(testSite3)); assertTrue(datum.get("COL1").equals("F")); assertTrue(datum.get("COL2").equals("G")); @@ -184,7 +181,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators()); - datum = (TabularROD)iterator.next(); + datum = (TabularROD)iterator.next().getRecords().get(0); assertTrue(datum.getLocation().equals(testSite1)); assertTrue(datum.get("COL1").equals("A")); assertTrue(datum.get("COL2").equals("B")); diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java index e1f866ed2..bd41876a4 100755 --- a/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java @@ -26,7 +26,7 @@ import net.sf.picard.reference.ReferenceSequenceFile; public class TabularRODTest extends BaseTest { private static ReferenceSequenceFile seq; private ReferenceOrderedData ROD; - private RODIterator iter; + private SeekableRODIterator iter; @BeforeClass @@ -48,7 +48,8 @@ public class TabularRODTest extends BaseTest { @Test public void test1() { logger.warn("Executing test1"); - TabularROD one = (TabularROD)iter.next(); + RODRecordList oneList = iter.next(); + TabularROD one = oneList.getRecords().get(0); assertTrue(one.size() == 4); assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10))); assertTrue(one.get("COL1").equals("A")); @@ -59,8 +60,10 @@ public class TabularRODTest extends BaseTest { @Test public void test2() { logger.warn("Executing test2"); - TabularROD one = (TabularROD)iter.next(); - TabularROD two = (TabularROD)iter.next(); + RODRecordList oneList = iter.next(); + RODRecordList twoList = iter.next(); + TabularROD one = oneList.getRecords().get(0); + TabularROD two = twoList.getRecords().get(0); assertTrue(two.size() == 4); assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20))); assertTrue(two.get("COL1").equals("C")); @@ -71,9 +74,12 @@ public class TabularRODTest extends BaseTest { @Test public void test3() { logger.warn("Executing test3"); - TabularROD one = (TabularROD)iter.next(); - TabularROD two = (TabularROD)iter.next(); - TabularROD three = (TabularROD)iter.next(); + RODRecordList oneList = iter.next(); + RODRecordList twoList = iter.next(); + RODRecordList threeList = iter.next(); + TabularROD one = oneList.getRecords().get(0); + TabularROD two = twoList.getRecords().get(0); + TabularROD three = threeList.getRecords().get(0); assertTrue(three.size() == 4); assertTrue(three.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 30))); assertTrue(three.get("COL1").equals("F")); @@ -84,16 +90,20 @@ public class TabularRODTest extends BaseTest { @Test public void testDone() { logger.warn("Executing testDone"); - TabularROD one = (TabularROD)iter.next(); - TabularROD two = (TabularROD)iter.next(); - TabularROD three = (TabularROD)iter.next(); + RODRecordList oneList = iter.next(); + RODRecordList twoList = iter.next(); + RODRecordList threeList = iter.next(); + TabularROD one = oneList.getRecords().get(0); + TabularROD two = twoList.getRecords().get(0); + TabularROD three = threeList.getRecords().get(0); assertTrue(!iter.hasNext()); } @Test public void testSeek() { logger.warn("Executing testSeek"); - TabularROD two = (TabularROD)iter.seekForward(GenomeLocParser.createGenomeLoc("chrM", 20)); + RODRecordList twoList = iter.seekForward(GenomeLocParser.createGenomeLoc("chrM", 20)); + TabularROD two = twoList.getRecords().get(0); assertTrue(two.size() == 4); assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20))); assertTrue(two.get("COL1").equals("C")); @@ -104,7 +114,8 @@ public class TabularRODTest extends BaseTest { @Test public void testToString() { logger.warn("Executing testToString"); - TabularROD one = (TabularROD)iter.next(); + RODRecordList oneList = iter.next(); + TabularROD one = oneList.getRecords().get(0); assertTrue(one.toString().equals("chrM:10\tA\tB\tC")); } @@ -113,10 +124,11 @@ public class TabularRODTest extends BaseTest { public void testDelim1() { File file2 = new File(testDir + "TabularDataTest2.dat"); ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", file2, TabularROD.class); - RODIterator iter_commas = ROD_commas.iterator(); + SeekableRODIterator iter_commas = ROD_commas.iterator(); logger.warn("Executing testDelim1"); - TabularROD one2 = (TabularROD)iter_commas.next(); + RODRecordList one2List = iter_commas.next(); + TabularROD one2 = one2List.getRecords().get(0); assertTrue(one2.size() == 5); assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10))); assertTrue(one2.get("COL1").equals("A")); @@ -130,10 +142,11 @@ public class TabularRODTest extends BaseTest { TabularROD.setDelimiter(",",","); File file2 = new File(testDir + "TabularDataTest2.dat"); ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", file2, TabularROD.class); - RODIterator iter_commas = ROD_commas.iterator(); + SeekableRODIterator iter_commas = ROD_commas.iterator(); logger.warn("Executing testDelim1"); - TabularROD one2 = (TabularROD)iter_commas.next(); + RODRecordList one2List = iter_commas.next(); + TabularROD one2 = one2List.getRecords().get(0); assertTrue(one2.size() == 5); assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10))); assertTrue(one2.get("COL1").equals("A")); @@ -174,16 +187,18 @@ public class TabularRODTest extends BaseTest { out.println(row.toString()); ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", outputFile, TabularROD.class); - RODIterator iter_commas = ROD_commas.iterator(); + SeekableRODIterator iter_commas = ROD_commas.iterator(); - TabularROD one = (TabularROD)iter_commas.next(); + RODRecordList oneList = iter_commas.next(); + TabularROD one = oneList.getRecords().get(0); assertTrue(one.size() == 4); assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 1))); assertTrue(one.get("col1").equals("1")); assertTrue(one.get("col2").equals("2")); assertTrue(one.get("col3").equals("3")); - TabularROD two = (TabularROD)iter_commas.next(); + RODRecordList twoList = iter_commas.next(); + TabularROD two = twoList.getRecords().get(0); assertTrue(two.size() == 4); assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 2))); assertTrue(two.get("col1").equals("3"));