Changing the carpet under your feet!! New incremental update to th eROD system has arrived.

all the updated classes now make use of new SeekableRodIterator instead of RODIterator. RODIterator class deleted. This batch makes only trivial updates to tests dictated by the change in the ROD system interface. Few less trivial updates to follow. This is a partial commit; a few walkers also still need to be updated, hold on...

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1667 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-09-21 16:55:22 +00:00
parent 4c67a49ccb
commit a6bd509593
9 changed files with 280 additions and 352 deletions

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RODIterator;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -43,7 +43,7 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
public ManagingReferenceOrderedView( ShardDataProvider provider ) {
//this.provider = provider;
for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() )
states.add( new ReferenceOrderedDataState( dataSource, (RODIterator)dataSource.seek(provider.getShard()) ) );
states.add( new ReferenceOrderedDataState( dataSource, (SeekableRODIterator)dataSource.seek(provider.getShard()) ) );
provider.register(this);
}
@ -78,9 +78,9 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
*/
private class ReferenceOrderedDataState {
public final ReferenceOrderedDataSource dataSource;
public final RODIterator iterator;
public final SeekableRODIterator iterator;
public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, RODIterator iterator ) {
public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, SeekableRODIterator iterator ) {
this.dataSource = dataSource;
this.iterator = iterator;
}

View File

@ -1,8 +1,6 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RODIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -37,11 +35,11 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
/**
* The data sources along with their current states.
*/
private MergingIterator<ReferenceOrderedDatum> rodQueue = null;
private MergingIterator<RODRecordList<ReferenceOrderedDatum>> rodQueue = null;
RefMetaDataTracker tracker = null;
GenomeLoc lastLoc = null;
ReferenceOrderedDatum interval = null;
RODRecordList<ReferenceOrderedDatum> interval = null;
// broken support for multi-locus rods
//List<ReferenceOrderedDatum> multiLocusRODs = new LinkedList<ReferenceOrderedDatum>();
@ -63,11 +61,11 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
GenomeLoc loc = provider.getShard().getGenomeLoc();
List<Iterator<ReferenceOrderedDatum>> iterators = new LinkedList<Iterator<ReferenceOrderedDatum>>();
List< Iterator<RODRecordList<ReferenceOrderedDatum>> > iterators = new LinkedList< Iterator<RODRecordList<ReferenceOrderedDatum>> >();
for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) {
if ( DEBUG ) System.out.printf("Shard is %s%n", loc);
RODIterator it = (RODIterator)dataSource.seek(provider.getShard());
ReferenceOrderedDatum x = it.seekForward(loc);
SeekableRODIterator it = (SeekableRODIterator)dataSource.seek(provider.getShard());
RODRecordList<ReferenceOrderedDatum> x = it.seekForward(loc);
// we need to special case the interval so we don't always think there's a rod at the first location
if ( dataSource.getName().equals(INTERVAL_ROD_NAME) ) {
@ -75,11 +73,11 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
throw new RuntimeException("BUG: interval local variable already assigned " + interval);
interval = x;
} else {
iterators.add( (Iterator<ReferenceOrderedDatum>)it );
iterators.add( it );
}
}
rodQueue = new MergingIterator<ReferenceOrderedDatum>(iterators);
rodQueue = new MergingIterator<RODRecordList<ReferenceOrderedDatum>>(iterators);
}
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
@ -90,7 +88,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
if ( ! rodQueue.hasNext() )
return false;
else {
ReferenceOrderedDatum peeked = rodQueue.peek();
RODRecordList<ReferenceOrderedDatum> peeked = rodQueue.peek();
return ! peeked.getLocation().isPast(shard.getGenomeLoc());
}
}
@ -102,14 +100,14 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
*/
public AlignmentContext next() {
if ( DEBUG ) System.out.printf("In RodLocusView.next()...%n");
ReferenceOrderedDatum datum = rodQueue.next();
RODRecordList<ReferenceOrderedDatum> datum = rodQueue.next();
if ( DEBUG ) System.out.printf("In RodLocusView.next(); datum = %s...%n", datum.getLocation());
if ( DEBUG ) System.out.printf("In RodLocusView.next(): creating tracker...%n");
// Update the tracker here for use
Collection<ReferenceOrderedDatum> allRODsHere = getSpanningRods(datum);
tracker = createTracker(allRODsHere);
Collection<RODRecordList<ReferenceOrderedDatum>> allTracksHere = getSpanningTracks(datum);
tracker = createTracker(allTracksHere);
GenomeLoc rodSite = datum.getLocation();
GenomeLoc site = GenomeLocParser.createGenomeLoc( rodSite.getContigIndex(), rodSite.getStart(), rodSite.getStart());
@ -122,11 +120,11 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
return new AlignmentContext(site, new ArrayList<SAMRecord>(), new ArrayList<Integer>(), skippedBases);
}
private RefMetaDataTracker createTracker( Collection<ReferenceOrderedDatum> allRodsHere ) {
private RefMetaDataTracker createTracker( Collection<RODRecordList<ReferenceOrderedDatum>> allTracksHere ) {
RefMetaDataTracker t = new RefMetaDataTracker();
for ( ReferenceOrderedDatum element : allRodsHere ) {
if ( ! t.hasROD(element.getName()) )
t.bind(element.getName(), element);
for ( RODRecordList<ReferenceOrderedDatum> track : allTracksHere ) {
if ( ! t.hasROD(track.getName()) )
t.bind(track.getName(), track);
}
// special case the interval again -- add it into the ROD
@ -135,7 +133,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
return t;
}
private Collection<ReferenceOrderedDatum> getSpanningRods(ReferenceOrderedDatum marker) {
private Collection<RODRecordList<ReferenceOrderedDatum>> getSpanningTracks(ReferenceOrderedDatum marker) {
RODRecordList<ReferenceOrderedDatum> wrapper = new RODRecordList<ReferenceOrderedDatum>(marker.getName(),Collections.singletonList(marker));
return rodQueue.allElementsLTE(wrapper);
}
private Collection<RODRecordList<ReferenceOrderedDatum>> getSpanningTracks(RODRecordList<ReferenceOrderedDatum> marker) {
return rodQueue.allElementsLTE(marker);
}

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.RODIterator;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
@ -59,7 +59,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* @return Iterator through the data.
*/
public Iterator seek( Shard shard ) {
RODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(shard.getGenomeLoc()) );
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(shard.getGenomeLoc()) );
return iterator;
}
@ -67,7 +67,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* Close the specified iterator, returning it to the pool.
* @param iterator Iterator to close.
*/
public void close( RODIterator iterator ) {
public void close( SeekableRODIterator iterator ) {
this.iteratorPool.release(iterator);
}
@ -76,7 +76,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
/**
* A pool of reference-ordered data iterators.
*/
class ReferenceOrderedDataPool extends ResourcePool<RODIterator,RODIterator> {
class ReferenceOrderedDataPool extends ResourcePool<SeekableRODIterator,SeekableRODIterator> {
private final ReferenceOrderedData<? extends ReferenceOrderedDatum> rod;
public ReferenceOrderedDataPool( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod ) {
@ -88,7 +88,7 @@ class ReferenceOrderedDataPool extends ResourcePool<RODIterator,RODIterator> {
* to be completely independent of any other iterator.
* @return The newly created resource.
*/
public RODIterator createNewResource() {
public SeekableRODIterator createNewResource() {
return rod.iterator();
}
@ -99,13 +99,13 @@ class ReferenceOrderedDataPool extends ResourcePool<RODIterator,RODIterator> {
* @param resources @{inheritedDoc}
* @return @{inheritedDoc}
*/
public RODIterator selectBestExistingResource( DataStreamSegment segment, List<RODIterator> resources ) {
public SeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List<SeekableRODIterator> resources ) {
if( !(segment instanceof MappedStreamSegment) )
throw new StingException("Reference-ordered data cannot utilitize unmapped segments.");
GenomeLoc position = ((MappedStreamSegment)segment).locus;
for( RODIterator iterator: resources ) {
for( SeekableRODIterator iterator: resources ) {
if( (iterator.position() == null && iterator.hasNext()) ||
(iterator.position() != null && iterator.position().isBefore(position)) )
return iterator;
@ -116,14 +116,14 @@ class ReferenceOrderedDataPool extends ResourcePool<RODIterator,RODIterator> {
/**
* In this case, the iterator is the resource. Pass it through.
*/
public RODIterator createIteratorFromResource( DataStreamSegment segment, RODIterator resource ) {
public SeekableRODIterator createIteratorFromResource( DataStreamSegment segment, SeekableRODIterator resource ) {
return resource;
}
/**
* Don't worry about closing the resource; let the file handles expire naturally for the moment.
*/
public void closeResource( RODIterator resource ) {
public void closeResource( SeekableRODIterator resource ) {
}
}

View File

@ -1,149 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Iterator;
/**
* Adapter (decorator) class for rod iterators. The "raw" rod iterator wrapped into this class
* should be capable of reading the underlying ROD data file and iterating over successive
* genomic locations. The purpose of this adapter is to provide additional seekForward() method:
* upon a call to this method, the decorated iterator will fastforward to the specified position.
* NOTE 1: if a particular ROD data file is allowed to have multiple records (lines)
* associated with the same location, the "raw" iterator must be capable of dealing with this situation
* by loading all such records at once on a call to next().
* NOTE 2: the object represented by this class is still a unidirectional iterator: after a call to seekForward(),
* subsequent calls to seekForward() or next() will work from the position the iterator was fastforwarded to.
* @author asivache
*
* @param <ROD>
*/
public class RODIterator<ROD extends ReferenceOrderedDatum> implements Iterator<ROD> {
private PushbackIterator<ROD> it;
private ROD current = null;
private GenomeLoc position = null;
public RODIterator(Iterator<ROD> it) {
this.it = new PushbackIterator<ROD>(it);
}
@Override
public boolean hasNext() { return it.hasNext(); }
@Override
public ROD next() {
ROD next = it.next();
if( next != null ) {
position = next.getLocation().clone();
current = next;
}
return next;
}
// @Override
// public boolean hasNext() { return current != null || it.hasNext(); }
//
// @Override
// public ROD next() {
// if ( current != null ) {
// ROD prev = current;
// current = null;
// return prev;
// } else {
// ROD next = it.next();
// if( next != null ) {
// position = next.getLocation().clone();
// //current = next;
// }
//
// return next;
// }
// }
/**
* Returns the current position of this iterator.
* @return Current position of the iterator, or null if no position exists.
*/
public GenomeLoc position() {
return position;
}
/**
* Seeks forward in the file until we reach (or cross) a record at contig / pos
* If we don't find anything and cross beyond contig / pos, we return null;
* subsequent call to next() will return the first record located after the specified
* position in this case. Otherwise, the first ROD record at or overlapping with
* the specified position is returned; the subsequent call to next() will return the
* next ROD record.
*
* NOTE 1: the location object <code>loc</code> should be a single point (not an interval);
* ROD locations, however, can be extended intervals, in which case first ROD that overlaps the specified
* position will be returned.
*
* NOTE 2: seekForward() is not exactly like next(): if we are strictly past a record, seekForward will not
* see it, but it will be returning the "current" record (i.e. the record returned by last call to next() or
* seekForward()) over and over again and will NOT advance the iterator for as long as the current record's location
* overlaps with the query position.
*
* @param loc point-like genomic location to fastforward to.
* @return ROD object at (or overlapping with) the specified position, or null if no such ROD exists.
*/
public ROD seekForward(final GenomeLoc loc) {
final boolean DEBUG = false;
ROD result = null;
//if (current != null && current.getName().equals("interval")) {
// boolean contains = current.getLocation().containsP(loc);
// System.out.printf(" %s : current is %s, seeking to %s, contains %b%n", current.getName(), current.getLocation(), loc, contains);
//}
if ( current != null && current.getLocation().containsP(loc) )
return current;
if ( DEBUG ) System.out.printf(" *** starting seek to %s %d (contig %d) from current location %s %d%n", loc.getContig(), loc.getStart(),
loc.getContigIndex(),current==null?"null":current.getLocation().getContig(), current==null?-1:current.getLocation().getStart());
while ( hasNext() ) {
ROD proposed = next();
if( proposed == null )
continue;
//System.out.printf(" -> Seeking to %s %d AT %s %d%n", contigName, pos, current.getContig(), current.getStart());
if ( DEBUG ) System.out.println(" proposed at "+proposed.getLocation()+"; contig index="+proposed.getLocation().getContigIndex());
boolean containedP = proposed.getLocation().containsP(loc);
//System.out.printf(" %s -> Seeking to %s, at %s => contains = %b%n", current.getName(), loc, current.getLocation(), containedP);
int cmp = proposed.getLocation().compareTo(loc);
if ( cmp < 0 ) {
if ( DEBUG ) System.out.println(" we are before...");
// current occurs before loc, continue searching
continue;
}
else if ( cmp == 0 || containedP ) {
if ( DEBUG ) System.out.println(" we found overlap...");
result = proposed;
break;
} else {
if ( DEBUG ) System.out.println(" we are after...");
// current is after loc
it.pushback(proposed);
break;
}
}
if ( DEBUG ) {
if ( result != null )
System.out.printf(" ### Found %s%n", result.getLocation());
}
// make a note that the iterator last seeked to the specified position
current = result;
position = loc.clone();
// we ran out of elements or found something
return result;
}
public void remove() {
throw new UnsupportedOperationException();
}
}

View File

@ -2,9 +2,7 @@ package org.broadinstitute.sting.gatk.refdata;
import org.apache.log4j.Logger;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.*;
/**
* This class represents the Reference Metadata available at a particular site in the genome. It can be
@ -23,31 +21,68 @@ import java.util.LinkedList;
* Time: 3:05:23 PM
*/
public class RefMetaDataTracker {
final HashMap<String, ReferenceOrderedDatum> map = new HashMap<String, ReferenceOrderedDatum>();
final HashMap<String, RODRecordList<ReferenceOrderedDatum>> map = new HashMap<String, RODRecordList<ReferenceOrderedDatum>>();
protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
/**
* Finds the reference meta data named name, if it exists, otherwise returns the defaultValue
*
* Finds the reference meta data named name, if it exists, otherwise returns the defaultValue.
* This is a legacy method that works with "singleton" tracks, in which a single ROD record can be associated
* with any given site. If track provides multiple records associated with a site, this method will return
* the first one.
* @param name
* @param defaultValue
* @return
*/
@Deprecated
public ReferenceOrderedDatum lookup(final String name, ReferenceOrderedDatum defaultValue) {
//logger.debug(String.format("Lookup %s%n", name));
final String luName = canonicalName(name);
if ( map.containsKey(luName) ) {
RODRecordList<ReferenceOrderedDatum> value = map.get(luName) ;
if ( value != null ) {
List<ReferenceOrderedDatum> l = value.getRecords();
if ( l != null & l.size() > 0 ) return value.getRecords().get(0);
}
}
return defaultValue;
}
/**
* Finds the reference metadata track named 'name' and returns all ROD records from that track associated
* with the current site as a RODRecordList collection object. If no data track with specified name is available,
* returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up
* with track name set to 'name' and location set to null; otherwise the wrapper object will have name and
* location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution,
* defaultValue.getLocation() may be not equal to what RODRecordList's location would be expected to be otherwise:
* for instance, on locus traversal, location is usually expected to be a single base we are currently looking at,
* regardless of the presence of "extended" RODs overlapping with that location).
* @param name
* @param defaultValue
* @return
*/
public RODRecordList<ReferenceOrderedDatum> getTrackData(final String name, ReferenceOrderedDatum defaultValue) {
//logger.debug(String.format("Lookup %s%n", name));
final String luName = canonicalName(name);
if ( map.containsKey(luName) )
return map.get(luName);
else
return defaultValue;
}
else {
if ( defaultValue == null ) {
return new RODRecordList<ReferenceOrderedDatum>(luName, Collections.singletonList(defaultValue), null);
} else {
return new RODRecordList<ReferenceOrderedDatum>(defaultValue.getName(),
Collections.singletonList(defaultValue),
defaultValue.getLocation());
}
}
}
/**
* @see this.lookup
* @param name
* @param defaultValue
* @return
*/
@Deprecated
public Object lookup(final String name, Object defaultValue) {
final String luName = canonicalName(name);
if ( map.containsKey(luName) )
@ -68,7 +103,7 @@ public class RefMetaDataTracker {
}
/**
* Is there a binding at this site to a ROD with name?
* Is there a binding at this site to a ROD/track with the specified name?
*
* @param name the name of the rod
* @return true if it has the rod
@ -78,39 +113,64 @@ public class RefMetaDataTracker {
}
/**
* Get all of the RODs at the current site
* Get all of the RODs at the current site. The collection is "flattened": for any track that has multiple records
* at the current site, they all will be added to the list as separate elements.
*
* @return
*/
public Collection<ReferenceOrderedDatum> getAllRods() {
return map.values();
List<ReferenceOrderedDatum> l = new ArrayList<ReferenceOrderedDatum>();
for ( RODRecordList<ReferenceOrderedDatum> rl : map.values() ) {
if ( rl == null ) continue; // how do we get null value stored for a track? shouldn't the track be missing from the map alltogether?
l.addAll(rl.getRecords());
}
return l;
}
/**
* Get all of the RODs at the current site
* Get all of the ROD tracks at the current site. Each track is returned as a single compound
* object (RODRecordList) that may contain multiple ROD records associated with the current site.
*
* @return
*/
public Collection<ReferenceOrderedDatum> getBoundRods() {
LinkedList<ReferenceOrderedDatum> bound = new LinkedList<ReferenceOrderedDatum>();
public Collection<RODRecordList<ReferenceOrderedDatum>> getBoundRodTracks() {
LinkedList<RODRecordList<ReferenceOrderedDatum>> bound = new LinkedList<RODRecordList<ReferenceOrderedDatum>>();
for ( ReferenceOrderedDatum value : map.values() ) {
if ( value != null )
bound.add(value);
for ( RODRecordList<ReferenceOrderedDatum> value : map.values() ) {
if ( value != null && value.size() != 0 ) bound.add(value);
}
return bound;
}
public Collection<ReferenceOrderedDatum> getBoundRodRecords() {
LinkedList<ReferenceOrderedDatum> bound = new LinkedList<ReferenceOrderedDatum>();
for ( RODRecordList<ReferenceOrderedDatum> valueList : map.values() ) {
for ( ReferenceOrderedDatum value : valueList ) {
if ( value != null )
bound.add(value);
}
}
return bound;
}
/**
* Binds the reference ordered datum ROD to name at this site. Should be used only but the traversal
* Binds the list of reference ordered data records (RODs) to track name at this site. Should be used only by the traversal
* system to provide access to RODs in a structured way to the walkers.
*
* @param name
* @param rod
*/
public void bind(final String name, RODRecordList<ReferenceOrderedDatum> rod) {
//logger.debug(String.format("Binding %s to %s", name, rod));
map.put(canonicalName(name), rod);
}
/*
public void bind(final String name, ReferenceOrderedDatum rod) {
//logger.debug(String.format("Binding %s to %s", name, rod));
map.put(canonicalName(name), rod);
}
*/
}

View File

@ -1,14 +1,10 @@
package org.broadinstitute.sting.gatk.refdata;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MalformedGenomeLocException;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.xReadLines;
import java.io.*;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.*;
@ -20,13 +16,13 @@ import java.util.*;
* Time: 10:47:14 AM
* To change this template use File | Settings | File Templates.
*/
public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements Iterable<ROD> {
public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements Iterable<RODRecordList<ROD>> {
private String name;
private File file = null;
private String fieldDelimiter;
// private String fieldDelimiter;
/** Header object returned from the datum */
private Object header = null;
// private Object header = null;
private Class<ROD> type = null; // runtime type information for object construction
@ -180,8 +176,8 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
this.file = file;
this.type = type;
this.name = name;
this.header = initializeROD(name, file, type);
this.fieldDelimiter = newROD(name, type).delimiterRegex();
// this.header = initializeROD(name, file, type);
// this.fieldDelimiter = newROD(name, type).delimiterRegex();
}
public String getName() { return name; }
@ -200,13 +196,13 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
return this.name.equals(name) && type.isAssignableFrom(this.type);
}
public RODIterator<ROD> iterator() {
public SeekableRODIterator<ROD> iterator() {
Iterator<ROD> it;
try {
Method m = type.getDeclaredMethod("createIterator", String.class, java.io.File.class);
it = (Iterator<ROD>) m.invoke(null, name, file);
} catch (java.lang.NoSuchMethodException e) {
it = new SimpleRODIterator();
it = new RODRecordIterator(file,name,type);
} catch (java.lang.NullPointerException e) {
throw new RuntimeException(e);
} catch (java.lang.SecurityException e) {
@ -218,7 +214,8 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
} catch (java.lang.reflect.InvocationTargetException e) {
throw new RuntimeException(e);
}
return new RODIterator<ROD>(it);
// return new RODIterator<ROD>(it);
return new SeekableRODIterator(it);
}
// ----------------------------------------------------------------------
@ -227,10 +224,10 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
//
// ----------------------------------------------------------------------
public void testMe() {
for (ReferenceOrderedDatum rec : this) {
System.out.println(rec.toString());
for (RODRecordList<ROD> rec : this) {
System.out.println(rec.getRecords().get(0).toString());
RodGenotypeChipAsGFF gff = (RodGenotypeChipAsGFF) rec;
RodGenotypeChipAsGFF gff = (RodGenotypeChipAsGFF) rec.getRecords().get(0);
String[] keys = {"LENGTH", "ALT", "FOBARBAR"};
for (String key : keys) {
System.out.printf(" -> %s is (%s)%n", key, gff.containsAttribute(key) ? gff.getAttribute(key) : "none");
@ -246,8 +243,10 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
// ----------------------------------------------------------------------
public ArrayList<ReferenceOrderedDatum> readAll() {
ArrayList<ReferenceOrderedDatum> elts = new ArrayList<ReferenceOrderedDatum>();
for (ReferenceOrderedDatum rec : this) {
elts.add(rec);
for ( RODRecordList<ROD> l : this ) {
for (ReferenceOrderedDatum rec : l) {
elts.add(rec);
}
}
elts.trimToSize();
return elts;
@ -269,12 +268,14 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
public boolean validateFile() throws Exception {
ReferenceOrderedDatum last = null;
for (ReferenceOrderedDatum rec : this) {
if (last != null && last.compareTo(rec) == 1) {
// It's out of order
throw new Exception("Out of order elements at \n" + last.toString() + "\n" + rec.toString());
for ( RODRecordList<ROD> l : this ) {
for (ReferenceOrderedDatum rec : l) {
if (last != null && last.compareTo(rec) > 1) {
// It's out of order
throw new Exception("Out of order elements at \n" + last.toString() + "\n" + rec.toString());
}
last = rec;
}
last = rec;
}
return true;
}
@ -288,103 +289,103 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
// Iteration
//
// ----------------------------------------------------------------------
private class SimpleRODIterator implements Iterator<ROD> {
private xReadLines parser = null;
public SimpleRODIterator() {
try {
parser = new xReadLines(file);
} catch (FileNotFoundException e) {
Utils.scareUser("Couldn't open file: " + file);
}
}
public boolean hasNext() {
//System.out.printf("Parser has next: %b%n", parser.hasNext());
return parser.hasNext();
}
public ROD next() {
ROD n = null;
boolean success = false;
boolean firstFailure = true;
do {
final String line = parser.next();
//System.out.printf("Line is '%s'%n", line);
String parts[] = line.split(fieldDelimiter);
try {
n = parseLine(parts);
// Two failure conditions:
// 1) parseLine throws an exception.
// 2) parseLine returns null.
// 3) parseLine throws a RuntimeException.
// TODO: Clean this up so that all errors are handled in one spot.
success = (n != null);
}
catch (MalformedGenomeLocException ex) {
if (firstFailure) {
Utils.warnUser("Failed to parse contig on line '" + line + "'. The reason given was: " + ex.getMessage() + " Skipping ahead to the next recognized GenomeLoc. ");
firstFailure = false;
}
if (!parser.hasNext())
Utils.warnUser("Unable to find more valid reference-ordered data. Giving up.");
}
} while (!success && parser.hasNext());
return n;
}
public void remove() {
throw new UnsupportedOperationException();
}
}
// private class SimpleRODIterator implements Iterator<ROD> {
// private xReadLines parser = null;
//
// public SimpleRODIterator() {
// try {
// parser = new xReadLines(file);
// } catch (FileNotFoundException e) {
// Utils.scareUser("Couldn't open file: " + file);
// }
// }
//
// public boolean hasNext() {
// //System.out.printf("Parser has next: %b%n", parser.hasNext());
// return parser.hasNext();
// }
//
// public ROD next() {
// ROD n = null;
// boolean success = false;
// boolean firstFailure = true;
//
// do {
// final String line = parser.next();
// //System.out.printf("Line is '%s'%n", line);
// String parts[] = line.split(fieldDelimiter);
//
// try {
// n = parseLine(parts);
// // Two failure conditions:
// // 1) parseLine throws an exception.
// // 2) parseLine returns null.
// // 3) parseLine throws a RuntimeException.
// // TODO: Clean this up so that all errors are handled in one spot.
// success = (n != null);
// }
// catch (MalformedGenomeLocException ex) {
// if (firstFailure) {
// Utils.warnUser("Failed to parse contig on line '" + line + "'. The reason given was: " + ex.getMessage() + " Skipping ahead to the next recognized GenomeLoc. ");
// firstFailure = false;
// }
// if (!parser.hasNext())
// Utils.warnUser("Unable to find more valid reference-ordered data. Giving up.");
// }
//
// } while (!success && parser.hasNext());
//
// return n;
// }
//
// public void remove() {
// throw new UnsupportedOperationException();
// }
// }
// ----------------------------------------------------------------------
//
// Parsing
//
// ----------------------------------------------------------------------
private Constructor<ROD> parsing_constructor;
// private Constructor<ROD> parsing_constructor;
private ROD newROD(final String name, final Class<ROD> type) {
try {
return (ROD) parsing_constructor.newInstance(name);
} catch (java.lang.InstantiationException e) {
throw new RuntimeException(e);
} catch (java.lang.IllegalAccessException e) {
throw new RuntimeException(e);
} catch (InvocationTargetException e) {
throw new RuntimeException(e);
}
}
// private ROD newROD(final String name, final Class<ROD> type) {
// try {
// return (ROD) parsing_constructor.newInstance(name);
// } catch (java.lang.InstantiationException e) {
// throw new RuntimeException(e);
// } catch (java.lang.IllegalAccessException e) {
// throw new RuntimeException(e);
// } catch (InvocationTargetException e) {
// throw new RuntimeException(e);
// }
// }
private Object initializeROD(final String name, final File file, final Class<ROD> type) {
try {
parsing_constructor = type.getConstructor(String.class);
}
catch (java.lang.NoSuchMethodException e) {
throw new RuntimeException(e);
}
ROD rod = newROD(name, type);
try {
return rod.initialize(file);
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
// private Object initializeROD(final String name, final File file, final Class<ROD> type) {
// try {
// parsing_constructor = type.getConstructor(String.class);
// }
// catch (java.lang.NoSuchMethodException e) {
// throw new RuntimeException(e);
// }
// ROD rod = newROD(name, type);
// try {
// return rod.initialize(file);
// } catch (FileNotFoundException e) {
// throw new RuntimeException(e);
// }
// }
private ROD parseLine(final String[] parts) {
//System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts));
ROD obj = newROD(name, type);
try {
if (!obj.parseLine(header, parts))
obj = null;
} catch (IOException e) {
throw new RuntimeException("Badly formed ROD: " + e);
}
return obj;
}
// private ROD parseLine(final String[] parts) {
// //System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts));
// ROD obj = newROD(name, type);
// try {
// if (!obj.parseLine(header, parts))
// obj = null;
// } catch (IOException e) {
// throw new RuntimeException("Badly formed ROD: " + e);
// }
// return obj;
// }
}

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -55,8 +56,8 @@ public class CountRodWalker extends RodWalker<CountRodWalker.Datum, Pair<Expandi
nRodsHere = -1; // don't update this
nTotalBases = context.getSkippedBases();
} else {
Collection<ReferenceOrderedDatum> rods = new LinkedList<ReferenceOrderedDatum>();
for ( ReferenceOrderedDatum rod : tracker.getBoundRods() ) {
Collection<RODRecordList<ReferenceOrderedDatum>> rods = new LinkedList<RODRecordList<ReferenceOrderedDatum>>();
for ( RODRecordList<ReferenceOrderedDatum> rod : tracker.getBoundRodTracks() ) {
//System.out.printf("Considering rod %s%n", rod);
if ( rod.getLocation().getStart() == context.getLocation().getStart() && ! rod.getName().equals("interval") ) {
// only consider the first element
@ -70,7 +71,7 @@ public class CountRodWalker extends RodWalker<CountRodWalker.Datum, Pair<Expandi
if ( nRodsHere > 0 ) {
if ( verbose ) {
List<String> names = new ArrayList<String>();
for ( ReferenceOrderedDatum rod : rods ) {
for ( RODRecordList<ReferenceOrderedDatum> rod : rods ) {
names.add(rod.getName());
}

View File

@ -9,10 +9,7 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.TabularROD;
import org.broadinstitute.sting.gatk.refdata.RODIterator;
import org.broadinstitute.sting.gatk.refdata.*;
import java.io.File;
import java.io.FileNotFoundException;
@ -58,12 +55,12 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testCreateSingleIterator() {
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod);
RODIterator iterator = (RODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
TabularROD datum = (TabularROD)iterator.next();
TabularROD datum = (TabularROD)iterator.next().getRecords().get(0);
assertTrue(datum.getLocation().equals(testSite1));
assertTrue(datum.get("COL1").equals("A"));
@ -79,36 +76,36 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testCreateMultipleIterators() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
RODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
// Create a new iterator at position 2.
RODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
// Test out-of-order access: first iterator2, then iterator1.
// Ugh...first call to a region needs to be a seek.
TabularROD datum = (TabularROD)iterator2.seekForward(testSite2);
TabularROD datum = (TabularROD)iterator2.seekForward(testSite2).getRecords().get(0);
assertTrue(datum.getLocation().equals(testSite2));
assertTrue(datum.get("COL1").equals("C"));
assertTrue(datum.get("COL2").equals("D"));
assertTrue(datum.get("COL3").equals("E"));
datum = (TabularROD)iterator1.next();
datum = (TabularROD)iterator1.next().getRecords().get(0);
assertTrue(datum.getLocation().equals(testSite1));
assertTrue(datum.get("COL1").equals("A"));
assertTrue(datum.get("COL2").equals("B"));
assertTrue(datum.get("COL3").equals("C"));
// Advance iterator2, and make sure both iterator's contents are still correct.
datum = (TabularROD)iterator2.next();
datum = (TabularROD)iterator2.next().getRecords().get(0);
assertTrue(datum.getLocation().equals(testSite3));
assertTrue(datum.get("COL1").equals("F"));
assertTrue(datum.get("COL2").equals("G"));
assertTrue(datum.get("COL3").equals("H"));
datum = (TabularROD)iterator1.next();
datum = (TabularROD)iterator1.next().getRecords().get(0);
assertTrue(datum.getLocation().equals(testSite2));
assertTrue(datum.get("COL1").equals("C"));
assertTrue(datum.get("COL2").equals("D"));
@ -129,12 +126,12 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testIteratorConservation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
RODIterator iterator = (RODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
TabularROD datum = (TabularROD)iterator.next();
TabularROD datum = (TabularROD)iterator.next().getRecords().get(0);
assertTrue(datum.getLocation().equals(testSite1));
assertTrue(datum.get("COL1").equals("A"));
assertTrue(datum.get("COL2").equals("B"));
@ -149,7 +146,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
datum = (TabularROD)iterator.seekForward(testSite3);
datum = (TabularROD)iterator.seekForward(testSite3).getRecords().get(0);
assertTrue(datum.getLocation().equals(testSite3));
assertTrue(datum.get("COL1").equals("F"));
assertTrue(datum.get("COL2").equals("G"));
@ -164,12 +161,12 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testIteratorCreation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
RODIterator iterator = (RODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite3) );
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
TabularROD datum = (TabularROD)iterator.seekForward(testSite3);
TabularROD datum = (TabularROD)iterator.seekForward(testSite3).getRecords().get(0);
assertTrue(datum.getLocation().equals(testSite3));
assertTrue(datum.get("COL1").equals("F"));
assertTrue(datum.get("COL2").equals("G"));
@ -184,7 +181,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators());
datum = (TabularROD)iterator.next();
datum = (TabularROD)iterator.next().getRecords().get(0);
assertTrue(datum.getLocation().equals(testSite1));
assertTrue(datum.get("COL1").equals("A"));
assertTrue(datum.get("COL2").equals("B"));

View File

@ -26,7 +26,7 @@ import net.sf.picard.reference.ReferenceSequenceFile;
public class TabularRODTest extends BaseTest {
private static ReferenceSequenceFile seq;
private ReferenceOrderedData ROD;
private RODIterator iter;
private SeekableRODIterator<TabularROD> iter;
@BeforeClass
@ -48,7 +48,8 @@ public class TabularRODTest extends BaseTest {
@Test
public void test1() {
logger.warn("Executing test1");
TabularROD one = (TabularROD)iter.next();
RODRecordList<TabularROD> oneList = iter.next();
TabularROD one = oneList.getRecords().get(0);
assertTrue(one.size() == 4);
assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10)));
assertTrue(one.get("COL1").equals("A"));
@ -59,8 +60,10 @@ public class TabularRODTest extends BaseTest {
@Test
public void test2() {
logger.warn("Executing test2");
TabularROD one = (TabularROD)iter.next();
TabularROD two = (TabularROD)iter.next();
RODRecordList<TabularROD> oneList = iter.next();
RODRecordList<TabularROD> twoList = iter.next();
TabularROD one = oneList.getRecords().get(0);
TabularROD two = twoList.getRecords().get(0);
assertTrue(two.size() == 4);
assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20)));
assertTrue(two.get("COL1").equals("C"));
@ -71,9 +74,12 @@ public class TabularRODTest extends BaseTest {
@Test
public void test3() {
logger.warn("Executing test3");
TabularROD one = (TabularROD)iter.next();
TabularROD two = (TabularROD)iter.next();
TabularROD three = (TabularROD)iter.next();
RODRecordList<TabularROD> oneList = iter.next();
RODRecordList<TabularROD> twoList = iter.next();
RODRecordList<TabularROD> threeList = iter.next();
TabularROD one = oneList.getRecords().get(0);
TabularROD two = twoList.getRecords().get(0);
TabularROD three = threeList.getRecords().get(0);
assertTrue(three.size() == 4);
assertTrue(three.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 30)));
assertTrue(three.get("COL1").equals("F"));
@ -84,16 +90,20 @@ public class TabularRODTest extends BaseTest {
@Test
public void testDone() {
logger.warn("Executing testDone");
TabularROD one = (TabularROD)iter.next();
TabularROD two = (TabularROD)iter.next();
TabularROD three = (TabularROD)iter.next();
RODRecordList<TabularROD> oneList = iter.next();
RODRecordList<TabularROD> twoList = iter.next();
RODRecordList<TabularROD> threeList = iter.next();
TabularROD one = oneList.getRecords().get(0);
TabularROD two = twoList.getRecords().get(0);
TabularROD three = threeList.getRecords().get(0);
assertTrue(!iter.hasNext());
}
@Test
public void testSeek() {
logger.warn("Executing testSeek");
TabularROD two = (TabularROD)iter.seekForward(GenomeLocParser.createGenomeLoc("chrM", 20));
RODRecordList<TabularROD> twoList = iter.seekForward(GenomeLocParser.createGenomeLoc("chrM", 20));
TabularROD two = twoList.getRecords().get(0);
assertTrue(two.size() == 4);
assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20)));
assertTrue(two.get("COL1").equals("C"));
@ -104,7 +114,8 @@ public class TabularRODTest extends BaseTest {
@Test
public void testToString() {
logger.warn("Executing testToString");
TabularROD one = (TabularROD)iter.next();
RODRecordList<TabularROD> oneList = iter.next();
TabularROD one = oneList.getRecords().get(0);
assertTrue(one.toString().equals("chrM:10\tA\tB\tC"));
}
@ -113,10 +124,11 @@ public class TabularRODTest extends BaseTest {
public void testDelim1() {
File file2 = new File(testDir + "TabularDataTest2.dat");
ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", file2, TabularROD.class);
RODIterator iter_commas = ROD_commas.iterator();
SeekableRODIterator<TabularROD> iter_commas = ROD_commas.iterator();
logger.warn("Executing testDelim1");
TabularROD one2 = (TabularROD)iter_commas.next();
RODRecordList<TabularROD> one2List = iter_commas.next();
TabularROD one2 = one2List.getRecords().get(0);
assertTrue(one2.size() == 5);
assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10)));
assertTrue(one2.get("COL1").equals("A"));
@ -130,10 +142,11 @@ public class TabularRODTest extends BaseTest {
TabularROD.setDelimiter(",",",");
File file2 = new File(testDir + "TabularDataTest2.dat");
ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", file2, TabularROD.class);
RODIterator iter_commas = ROD_commas.iterator();
SeekableRODIterator<TabularROD> iter_commas = ROD_commas.iterator();
logger.warn("Executing testDelim1");
TabularROD one2 = (TabularROD)iter_commas.next();
RODRecordList<TabularROD> one2List = iter_commas.next();
TabularROD one2 = one2List.getRecords().get(0);
assertTrue(one2.size() == 5);
assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10)));
assertTrue(one2.get("COL1").equals("A"));
@ -174,16 +187,18 @@ public class TabularRODTest extends BaseTest {
out.println(row.toString());
ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", outputFile, TabularROD.class);
RODIterator iter_commas = ROD_commas.iterator();
SeekableRODIterator<TabularROD> iter_commas = ROD_commas.iterator();
TabularROD one = (TabularROD)iter_commas.next();
RODRecordList<TabularROD> oneList = iter_commas.next();
TabularROD one = oneList.getRecords().get(0);
assertTrue(one.size() == 4);
assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 1)));
assertTrue(one.get("col1").equals("1"));
assertTrue(one.get("col2").equals("2"));
assertTrue(one.get("col3").equals("3"));
TabularROD two = (TabularROD)iter_commas.next();
RODRecordList<TabularROD> twoList = iter_commas.next();
TabularROD two = twoList.getRecords().get(0);
assertTrue(two.size() == 4);
assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 2)));
assertTrue(two.get("col1").equals("3"));