diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java index 653b90e52..fa226f152 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java @@ -1,8 +1,9 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; import java.util.List; @@ -37,7 +38,7 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView { */ public ManagingReferenceOrderedView( ShardDataProvider provider ) { for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) - states.add( new ReferenceOrderedDataState( dataSource, (SeekableRODIterator)dataSource.seek(provider.getShard()) ) ); + states.add( new ReferenceOrderedDataState( dataSource, ((FlashBackIterator)dataSource.seek(provider.getShard()) )) ); provider.register(this); } @@ -73,9 +74,9 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView { */ class ReferenceOrderedDataState { public final ReferenceOrderedDataSource dataSource; - public final SeekableRODIterator iterator; + public final FlashBackIterator iterator; - public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, SeekableRODIterator iterator ) { + public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, FlashBackIterator iterator ) { this.dataSource = dataSource; this.iterator = iterator; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 990cc9346..cf129d074 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MergingIterator; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -59,14 +61,14 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { GenomeLoc loc = provider.getLocus(); - List< Iterator> > iterators = new LinkedList< Iterator> >(); + List< Iterator > iterators = new LinkedList< Iterator >(); for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) { if ( DEBUG ) System.out.printf("Shard is %s%n", provider.getLocus()); // grab the ROD iterator from the data source, and compute the first location in this shard, forwarding // the iterator to immediately before it, so that it can be added to the merging iterator primed for // next() to return the first real ROD in this shard - SeekableRODIterator it = (SeekableRODIterator)dataSource.seek(provider.getShard()); + FlashBackIterator it = (FlashBackIterator)dataSource.seek(provider.getShard()); it.seekForward(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart()-1)); states.add(new ReferenceOrderedDataState(dataSource,it)); @@ -199,7 +201,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { */ public void close() { for( ReferenceOrderedDataState state: states ) - state.dataSource.close( state.iterator ); + state.dataSource.close( state.iterator ); rodQueue = null; tracker = null; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index 8cab5ffc1..c86c0d4ed 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -2,8 +2,8 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; @@ -68,15 +68,15 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { */ public Iterator seek( Shard shard ) { DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream(); - SeekableRODIterator iterator = iteratorPool.iterator(dataStreamSegment); - return iterator; + FlashBackIterator RODIterator = iteratorPool.iterator(dataStreamSegment); + return RODIterator; } /** * Close the specified iterator, returning it to the pool. * @param iterator Iterator to close. */ - public void close( SeekableRODIterator iterator ) { + public void close( FlashBackIterator iterator ) { this.iteratorPool.release(iterator); } @@ -85,9 +85,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { /** * A pool of reference-ordered data iterators. */ -class ReferenceOrderedDataPool extends ResourcePool { +class ReferenceOrderedDataPool extends ResourcePool { private final ReferenceOrderedData rod; - public ReferenceOrderedDataPool( ReferenceOrderedData rod ) { this.rod = rod; } @@ -97,8 +96,8 @@ class ReferenceOrderedDataPool extends ResourcePool resources ) { + public FlashBackIterator selectBestExistingResource( DataStreamSegment segment, List resources ) { if(segment instanceof MappedStreamSegment) { GenomeLoc position = ((MappedStreamSegment)segment).getFirstLocation(); - //######################################### -//## System.out.printf("Searching for iterator at locus %s; %d resources available%n", position, resources.size()); - for( SeekableRODIterator iterator: resources ) { -//##System.out.printf("Examining iterator at position %s [last query location: %s]%n", iterator.position(),iterator.lastQueryLocation()); - if( (iterator.position() == null && iterator.hasNext()) || - (iterator.position() != null && iterator.position().isBefore(position)) ) - return iterator; + + for( FlashBackIterator RODIterator : resources ) { + + if( (RODIterator.position() == null && RODIterator.hasNext()) || + (RODIterator.position() != null && RODIterator.position().isBefore(position)) ) + return RODIterator; + if ((RODIterator.position() != null && RODIterator.canFlashBackTo(position))) { + RODIterator.flashBackTo(position); + return RODIterator; + } + } -//##System.out.printf("Failed to find iterator at locus %s%n", position); return null; } else if(segment instanceof EntireStream) { @@ -135,15 +137,15 @@ class ReferenceOrderedDataPool extends ResourcePool implements Comparable, Cloneable { +public class RODRecordListImpl extends AbstractList implements Comparable, Cloneable, RODRecordList { private List records; private GenomeLoc location = null; private String name = null; - public RODRecordList(String name) { + public RODRecordListImpl(String name) { records = new ArrayList(); this.name = name; } @@ -37,7 +37,7 @@ public class RODRecordList extends AbstractList implement * @param data * @param loc */ - public RODRecordList(String name, Collection data, GenomeLoc loc) { + public RODRecordListImpl(String name, Collection data, GenomeLoc loc) { this.records = new ArrayList(data==null?0:data.size()); this.name = name; this.location = loc; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 12219f2ad..19617d334 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; @@ -43,8 +44,8 @@ public class RefMetaDataTracker { if ( map.containsKey(luName) ) { RODRecordList value = map.get(luName) ; if ( value != null ) { - List l = value.getRecords(); - if ( l != null & l.size() > 0 ) return value.getRecords().get(0); + List l = value; + if ( l != null & l.size() > 0 ) return value.get(0); } } return defaultValue; @@ -76,9 +77,9 @@ public class RefMetaDataTracker { for ( Map.Entry datum : map.entrySet() ) { final String rodName = datum.getKey(); if ( rodName.startsWith(luName) ) { - if ( trackData == null ) trackData = new RODRecordList(name); + if ( trackData == null ) trackData = new RODRecordListImpl(name); //System.out.printf("Adding bindings from %s to %s at %s%n", rodName, name, datum.getValue().getLocation()); - trackData.add(datum.getValue(), true); + ((RODRecordListImpl)trackData).add(datum.getValue(), true); } } } @@ -88,7 +89,7 @@ public class RefMetaDataTracker { else if ( defaultValue == null ) return null; else - return new RODRecordList(defaultValue.getName(), + return new RODRecordListImpl(defaultValue.getName(), Collections.singletonList(defaultValue), defaultValue.getLocation()); } @@ -144,7 +145,7 @@ public class RefMetaDataTracker { List l = new ArrayList(); for ( RODRecordList rl : map.values() ) { if ( rl == null ) continue; // how do we get null value stored for a track? shouldn't the track be missing from the map alltogether? - l.addAll(rl.getRecords()); + l.addAll(rl); } return l; @@ -285,7 +286,7 @@ public class RefMetaDataTracker { } private void addVariantContexts(Collection contexts, RODRecordList rodList, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { - for ( ReferenceOrderedDatum rec : rodList.getRecords() ) { + for ( ReferenceOrderedDatum rec : rodList ) { if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec) ) { // ok, we might actually be able to turn this record in a variant context VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java index 41a65d6f9..19b66548a 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java @@ -5,6 +5,8 @@ import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException; import org.broadinstitute.sting.gatk.refdata.tracks.RODRMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.oneoffprojects.refdata.HapmapVCFROD; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; @@ -21,7 +23,7 @@ import java.util.*; * Time: 10:47:14 AM * To change this template use File | Settings | File Templates. */ -public class ReferenceOrderedData implements Iterable> { // }, RMDTrackBuilder { +public class ReferenceOrderedData implements Iterable { // }, RMDTrackBuilder { private String name; private File file = null; // private String fieldDelimiter; @@ -235,7 +237,7 @@ public class ReferenceOrderedData implements return this.name.equals(name) && type.isAssignableFrom(this.type); } - public SeekableRODIterator iterator() { + public LocationAwareSeekableRODIterator iterator() { Iterator it; try { Method m = type.getDeclaredMethod("createIterator", String.class, java.io.File.class); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java index 6d21f62be..43f778b19 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java @@ -1,7 +1,8 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; -import org.broadinstitute.sting.gatk.iterators.PeekingIterator; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.StingException; @@ -33,7 +34,7 @@ import java.util.LinkedList; * Time: 6:20:46 PM * To change this template use File | Settings | File Templates. */ -public class SeekableRODIterator implements Iterator> { +public class SeekableRODIterator implements LocationAwareSeekableRODIterator { private PushbackIterator it; List records = null; // here we will keep a pile of records overlaping with current position; when we iterate // and step out of record's scope, we purge it from the list @@ -161,12 +162,12 @@ public class SeekableRODIterator implements Iterator } int that_contig = r.getLocation().getContigIndex(); if ( curr_contig > that_contig ) - throw new StingException("SeekableRODIterator: contig " +r.getLocation().getContig() + + throw new StingException("LocationAwareSeekableRODIterator: contig " +r.getLocation().getContig() + " occurs out of order in track " + r.getName() ); if ( curr_contig < that_contig ) break; // next record is on a higher contig, we do not need it yet... if ( r.getLocation().getStart() < curr_position ) - throw new StingException("SeekableRODIterator: track "+r.getName() + + throw new StingException("LocationAwareSeekableRODIterator: track "+r.getName() + " is out of coordinate order on contig "+r.getLocation().getContig()); if ( r.getLocation().getStart() > curr_position ) break; // next record starts after the current position; we do not need it yet @@ -182,7 +183,7 @@ public class SeekableRODIterator implements Iterator // 'records' and current position are fully updated. Last, we need to set the location of the whole track // (collection of ROD records) to the genomic site we are currently looking at, and return the list - return new RODRecordList(name,records, GenomeLocParser.createGenomeLoc(curr_contig,curr_position)); + return new RODRecordListImpl(name,records, GenomeLocParser.createGenomeLoc(curr_contig,curr_position)); } /** @@ -200,22 +201,9 @@ public class SeekableRODIterator implements Iterator * method. */ public void remove() { - throw new UnsupportedOperationException("SeekableRODIterator does not implement remove() operation"); + throw new UnsupportedOperationException("LocationAwareSeekableRODIterator does not implement remove() operation"); } - /** - * - */ - public GenomeLoc lastQueryLocation() { - if ( curr_contig < 0 ) return null; - if ( curr_query_end > curr_position ) { - return GenomeLocParser.createGenomeLoc(curr_contig,curr_position,curr_query_end); - } - else { - return GenomeLocParser.createGenomeLoc(curr_contig,curr_position); - } - - } /** * Returns the current "position" (not location!! ;) ) of this iterator. This method is used by the sharding @@ -322,7 +310,7 @@ public class SeekableRODIterator implements Iterator } if ( records.size() > 0 ) { - return new RODRecordList(name,records,interval.clone()); + return new RODRecordListImpl(name,records,interval.clone()); } else { return null; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java index 8f6ef29ee..fdd23c379 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.refdata; import net.sf.samtools.util.SequenceUtil; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.*; import java.util.ArrayList; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java index fec7106e2..10b79543f 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.refdata; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLocParser; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RODRMDTrack.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RODRMDTrack.java index f4c348720..863582da9 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RODRMDTrack.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RODRMDTrack.java @@ -23,10 +23,11 @@ package org.broadinstitute.sting.gatk.refdata.tracks; -import org.broadinstitute.sting.gatk.refdata.RODRecordList; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import java.io.File; import java.util.Iterator; @@ -72,9 +73,9 @@ public class RODRMDTrack extends RMDTrack { class SRIToIterator implements Iterator { private RODRecordList list = null; - private SeekableRODIterator iterator = null; + private LocationAwareSeekableRODIterator iterator = null; - SRIToIterator(SeekableRODIterator iter) { + SRIToIterator(LocationAwareSeekableRODIterator iter) { iterator = iter; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIterator.java new file mode 100644 index 000000000..5df43635d --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIterator.java @@ -0,0 +1,127 @@ +package org.broadinstitute.sting.gatk.refdata.utils; + +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; + + +/** + * + * @author aaron + * + * Class FlashBackIterator + * + * better than acid washed jeans...more like a Delorean that flies through time + * + * This iterator buffers a certain amount of ROD data to 'flash back' to. This + * is needed for using ROD's in read traversals, because between shards we sometimes + * (actually often) need to go back to before the current iterators location and + * get RODs that overlap the current read. + */ +public class FlashBackIterator implements LocationAwareSeekableRODIterator { + private LocationAwareSeekableRODIterator iterator; + private LinkedList list = new LinkedList(); + private int MAX_QUEUE = 5000; + private boolean usingQueue = false; + + public FlashBackIterator(LocationAwareSeekableRODIterator iterator) { + this.iterator = iterator; + } + + @Override + public GenomeLoc peekNextLocation() { + return iterator.peekNextLocation(); + } + + @Override + public GenomeLoc position() { + return (usingQueue) ? list.getFirst().getLocation() : iterator.position(); + } + + @Override + public RODRecordList seekForward(GenomeLoc interval) { + RODRecordList lt = iterator.seekForward(interval); + if (lt != null) list.addLast(new ComparableList(lt)); + return lt; + } + + @Override + public boolean hasNext() { + if (usingQueue) return (list.size() > 0 || iterator.hasNext()); + return iterator.hasNext(); + } + + @Override + public RODRecordList next() { + RODRecordList ret; + if (!usingQueue || list.size() < 1) { + usingQueue = false; + ret = iterator.next(); + list.addLast(new ComparableList(ret)); + if (list.size() > MAX_QUEUE) list.removeFirst(); + } else { + ret = list.getFirst().getList(); + list.removeFirst(); + } + return ret; + } + + @Override + public void remove() { + throw new UnsupportedOperationException("We don't support remove"); + } + + /** + * can we flash back to the specified location? + * + * @param location the location to try and flash back to + * + * @return true if we can, false otherwise + */ + public boolean canFlashBackTo(GenomeLoc location) { + GenomeLoc farthestBack = (list.size() > 0) ? list.getFirst().getLocation() : iterator.peekNextLocation(); + System.err.println("farthestBack = " + farthestBack + " loc = " + location); + return (!farthestBack.isPast(location)); + } + + /** + * flashback! Throws an unsupported operation exception + * + * @param location where to flash back to + */ + public void flashBackTo(GenomeLoc location) { + if (!canFlashBackTo(location)) throw new UnsupportedOperationException("we can't flash back to " + location); + if (list.size() > 0 && !list.getLast().getLocation().isBefore(location)) + usingQueue = true; + } +} + +class ComparableList implements Comparator { + private RODRecordList list; + private GenomeLoc location = null; + public ComparableList(RODRecordList list) { + this.list = list; + if (list != null && list.size() != 0) location = list.get(0).getLocation(); + else throw new IllegalStateException("Bad voodoo!"); + } + + @Override + public int compare(ComparableList list1, ComparableList list2) { + if (list1.location == null && list2.location == null) + return 0; + if (list1.location == null) return 1; + if (list2.location == null) return -1; + return (list1.location.compareTo(list2.location)); + } + + public GenomeLoc getLocation() { + return location; + } + + public RODRecordList getList() { + return list; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java new file mode 100644 index 000000000..df2cc8b33 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java @@ -0,0 +1,23 @@ +package org.broadinstitute.sting.gatk.refdata.utils; + +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.Iterator; +import java.util.List; + +/** + * @author aaron + *

+ * Interface LocationAwareSeekableRODIterator + *

+ * combine iteration with a position aware interface + */ +public interface LocationAwareSeekableRODIterator extends Iterator { + public GenomeLoc peekNextLocation(); + + public GenomeLoc position(); + + public RODRecordList seekForward(GenomeLoc interval); + +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RODRecordList.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RODRecordList.java new file mode 100644 index 000000000..ca1b2631c --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RODRecordList.java @@ -0,0 +1,20 @@ +package org.broadinstitute.sting.gatk.refdata.utils; + +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.List; + + +/** + * @author aaron + *

+ * Class RODRecordList + *

+ * make the RODRecord list an interface, so we can stub in other implementations + * during testing. + */ +public interface RODRecordList extends List, Comparable { + public GenomeLoc getLocation(); + public String getName(); +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 7f4cb182c..fa4fadb78 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.*; @@ -187,11 +188,11 @@ public class VariantAnnotator extends LocusWalker { RODRecordList rods = tracker.getTrackData("variant", null); // ignore places where we don't have a variant - if ( rods == null || rods.getRecords().size() == 0 ) + if ( rods == null || rods.size() == 0 ) return 0; Map annotations = new HashMap(); - VariationRod variant = (VariationRod)rods.getRecords().get(0); + VariationRod variant = (VariationRod)rods.get(0); // if the reference base is not ambiguous, we can annotate if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/fasta/PickSequenomProbes.java b/java/src/org/broadinstitute/sting/gatk/walkers/fasta/PickSequenomProbes.java index 8d93fe34e..0f08239ff 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/fasta/PickSequenomProbes.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/fasta/PickSequenomProbes.java @@ -4,6 +4,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -26,7 +28,7 @@ public class PickSequenomProbes extends RefWalker { String project_id = null; private byte [] maskFlags = new byte[401]; - private SeekableRODIterator snpMaskIterator=null; + private LocationAwareSeekableRODIterator snpMaskIterator=null; public void initialize() { if ( SNP_MASK != null ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 97251a172..8086033a8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.*; @@ -117,10 +118,10 @@ public class VariantFiltrationWalker extends RodWalker { RODRecordList rods = tracker.getTrackData("variant", null); // ignore places where we don't have a variant - if ( rods == null || rods.getRecords().size() == 0 ) + if ( rods == null || rods.size() == 0 ) return 0; - RodVCF variant = (RodVCF)rods.getRecords().get(0); + RodVCF variant = (RodVCF)rods.get(0); Pair varContext = new Pair(tracker, variant); // if we're still initializing the context, do so @@ -148,7 +149,7 @@ public class VariantFiltrationWalker extends RodWalker { // test for SNP mask, if present RODRecordList mask = context.first.getTrackData("mask", null); - if ( mask != null && mask.getRecords().size() > 0 ) + if ( mask != null && mask.size() > 0 ) addFilter(filterString, MASK_NAME); // test for clustered SNPs if requested diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java index b1ef8812c..5e4115ac8 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.indels; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.filters.Platform454Filter; @@ -80,7 +82,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { boolean outOfContigUserWarned = false; - private SeekableRODIterator refseqIterator=null; + private LocationAwareSeekableRODIterator refseqIterator=null; private Set normalReadGroups; // we are going to remember which read groups are normals and which are tumors in order to be able private Set tumorReadGroups ; // to properly assign the reads coming from a merged stream @@ -581,7 +583,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { else b.append(annUnknown); // we have no idea what this is. this may actually happen when we have a fully non-coding exon... } b.append('\t'); - b.append(((Transcript)ann.getRecords().get(0)).getGeneName()); // there is at least one transcript in the list, guaranteed + b.append(((Transcript)ann.get(0)).getGeneName()); // there is at least one transcript in the list, guaranteed // while ( it.hasNext() ) { // // t.getGeneName() // } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java index b5974385b..7ba31f3a8 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java @@ -3,8 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.qc; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.RODRecordList; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.cmdLine.Argument; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/VCFValidator.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/VCFValidator.java index 0ae947aae..a301abd1e 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/VCFValidator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/VCFValidator.java @@ -1,13 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RODRecordList; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.StingException; @@ -27,7 +26,7 @@ public class VCFValidator extends RodWalker { if ( tracker != null ) { RODRecordList rodlist = tracker.getTrackData("vcf", null); if ( rodlist != null ) { - RodVCF rod = (RodVCF)rodlist.getRecords().get(0); + RodVCF rod = (RodVCF)rodlist.get(0); if ( (rod.isSNP() || rod.isReference()) && Character.toUpperCase(rod.getReference().charAt(0)) != Character.toUpperCase(ref.getBase()) ) throw new StingException("The reference base (" + ref.getBase() + ") does not match the base from the VCF record (" + rod.getReference() + ")"); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ChipConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ChipConcordance.java index 84ba40a1f..03dc790bc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ChipConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ChipConcordance.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; @@ -79,7 +80,7 @@ public abstract class ChipConcordance extends BasicVariantAnalysis { HashMap chips = new HashMap(); for ( String name : rodNames ) { RODRecordList rods = tracker.getTrackData(name, null); - Variation chip = (rods == null ? null : (Variation)rods.getRecords().get(0)); + Variation chip = (rods == null ? null : (Variation)rods.get(0)); if ( chip != null ) { // chips must be Genotypes if ( !(chip instanceof VariantBackedByGenotype) ) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantDBCoverage.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantDBCoverage.java index 44296e64a..00682af9f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantDBCoverage.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantDBCoverage.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.Variation; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index aae2bd684..02e6f22f1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.utils.StingException; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/HapmapPoolAllelicInfoWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/HapmapPoolAllelicInfoWalker.java index dc5acd7cb..cc28250e3 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/HapmapPoolAllelicInfoWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/HapmapPoolAllelicInfoWalker.java @@ -1,9 +1,8 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.RODRecordList; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -136,7 +135,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker> chips = new ArrayList >(rodNames.length); for ( String name : rodNames ) { RODRecordList rods = tracker.getTrackData(name, null); - Variation chip = (rods == null ? null : (Variation)rods.getRecords().get(0)); + Variation chip = (rods == null ? null : (Variation)rods.get(0)); if ( chip != null ) { // chips must be Genotypes if ( !(chip instanceof VariantBackedByGenotype) ) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ThousandGenomesAnnotator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ThousandGenomesAnnotator.java index f60bdcdb9..888f7c88e 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ThousandGenomesAnnotator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ThousandGenomesAnnotator.java @@ -2,16 +2,13 @@ package org.broadinstitute.sting.oneoffprojects.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RODRecordList; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotation; import org.broadinstitute.sting.oneoffprojects.refdata.HapmapVCFROD; import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; -import java.util.List; import java.util.Map; /** @@ -42,9 +39,9 @@ public class ThousandGenomesAnnotator implements VariantAnnotation { if ( pilot1 == null && pilot2 == null) { return "0"; } else { - if ( pilot1 != null && ! ( (HapmapVCFROD) pilot1.getRecords().get(0)).getRecord().isFiltered() ) { + if ( pilot1 != null && ! ( (HapmapVCFROD) pilot1.get(0)).getRecord().isFiltered() ) { return "1"; - } else if ( pilot2 != null && ! ( (HapmapVCFROD) pilot2.getRecords().get(0)).getRecord().isFiltered() ) { + } else if ( pilot2 != null && ! ( (HapmapVCFROD) pilot2.get(0)).getRecord().isFiltered() ) { return "1"; } else { return "0"; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/SNPDensity.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/SNPDensity.java index 68efeedaa..6bced46b3 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/SNPDensity.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/SNPDensity.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.diagnostics; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/hybridselection/HybSelPerformanceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/hybridselection/HybSelPerformanceWalker.java index 0c233bd4f..6f89a473f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/hybridselection/HybSelPerformanceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/hybridselection/HybSelPerformanceWalker.java @@ -2,6 +2,8 @@ package org.broadinstitute.sting.playground.gatk.walkers.hybridselection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.By; import org.broadinstitute.sting.gatk.walkers.DataSource; @@ -56,7 +58,7 @@ public class HybSelPerformanceWalker extends LocusWalker { RODRecordList rods = tracker.getTrackData("variant", null); // ignore places where we don't have a variant - if ( rods == null || rods.getRecords().size() == 0 ) + if ( rods == null || rods.size() == 0 ) return 0; - RodVCF variant = (RodVCF)rods.getRecords().get(0); + RodVCF variant = (RodVCF)rods.get(0); boolean someoneMatched = false; for ( MatchExp exp : matchExpressions ) { Map infoMap = new HashMap(variant.mCurrentRecord.getInfoValues()); diff --git a/java/src/org/broadinstitute/sting/utils/MergingIterator.java b/java/src/org/broadinstitute/sting/utils/MergingIterator.java index 06942f182..fa3278942 100644 --- a/java/src/org/broadinstitute/sting/utils/MergingIterator.java +++ b/java/src/org/broadinstitute/sting/utils/MergingIterator.java @@ -1,10 +1,8 @@ package org.broadinstitute.sting.utils; -import org.broadinstitute.sting.gatk.iterators.PeekingIterator; -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; -import org.broadinstitute.sting.gatk.refdata.RODRecordList; +import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import java.util.*; @@ -12,17 +10,17 @@ public class MergingIterator implements Iterator, Iterable queue = new PriorityQueue(); private class Element implements Comparable { - public SeekableRODIterator it = null; + public FlashBackIterator it = null; //public E value = null; public GenomeLoc nextLoc = null; - public Element(Iterator> it) { - if ( it instanceof SeekableRODIterator ) { - this.it = (SeekableRODIterator)it; + public Element(Iterator it) { + if ( it instanceof FlashBackIterator) { + this.it = (FlashBackIterator)it; if ( ! it.hasNext() ) throw new StingException("Iterator is empty"); update(); } else { - throw new StingException("Iterator passed to MergingIterator is not SeekableRODIterator"); + throw new StingException("Iterator passed to MergingIterator is not LocationAwareSeekableRODIterator"); } } @@ -57,12 +55,12 @@ public class MergingIterator implements Iterator, Iterable> it) { + public MergingIterator(Iterator it) { add(it); } - public MergingIterator(Collection>> its) { - for ( Iterator> it : its ) { + public MergingIterator(Collection> its) { + for ( Iterator it : its ) { add(it); } } @@ -71,7 +69,7 @@ public class MergingIterator implements Iterator, Iterable> it) { + public void add(Iterator it) { if ( it.hasNext() ) queue.add(new Element(it)); } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java index c4d51be7b..f5f2fe01e 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; +import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -55,12 +56,12 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testCreateSingleIterator() { ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod); - SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); + FlashBackIterator iterator = (FlashBackIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); - TabularROD datum = (TabularROD)iterator.next().getRecords().get(0); + TabularROD datum = (TabularROD)iterator.next().get(0); assertTrue(datum.getLocation().equals(testSite1)); assertTrue(datum.get("COL1").equals("A")); @@ -76,36 +77,36 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testCreateMultipleIterators() { ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); - SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); + FlashBackIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); // Create a new iterator at position 2. - SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) ); + FlashBackIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); // Test out-of-order access: first iterator2, then iterator1. // Ugh...first call to a region needs to be a seek. - TabularROD datum = (TabularROD)iterator2.seekForward(testSite2).getRecords().get(0); + TabularROD datum = (TabularROD)iterator2.seekForward(testSite2).get(0); assertTrue(datum.getLocation().equals(testSite2)); assertTrue(datum.get("COL1").equals("C")); assertTrue(datum.get("COL2").equals("D")); assertTrue(datum.get("COL3").equals("E")); - datum = (TabularROD)iterator1.next().getRecords().get(0); + datum = (TabularROD)iterator1.next().get(0); assertTrue(datum.getLocation().equals(testSite1)); assertTrue(datum.get("COL1").equals("A")); assertTrue(datum.get("COL2").equals("B")); assertTrue(datum.get("COL3").equals("C")); // Advance iterator2, and make sure both iterator's contents are still correct. - datum = (TabularROD)iterator2.next().getRecords().get(0); + datum = (TabularROD)iterator2.next().get(0); assertTrue(datum.getLocation().equals(testSite3)); assertTrue(datum.get("COL1").equals("F")); assertTrue(datum.get("COL2").equals("G")); assertTrue(datum.get("COL3").equals("H")); - datum = (TabularROD)iterator1.next().getRecords().get(0); + datum = (TabularROD)iterator1.next().get(0); assertTrue(datum.getLocation().equals(testSite2)); assertTrue(datum.get("COL1").equals("C")); assertTrue(datum.get("COL2").equals("D")); @@ -126,12 +127,12 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testIteratorConservation() { ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); - SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); + FlashBackIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); - TabularROD datum = (TabularROD)iterator.next().getRecords().get(0); + TabularROD datum = (TabularROD)iterator.next().get(0); assertTrue(datum.getLocation().equals(testSite1)); assertTrue(datum.get("COL1").equals("A")); assertTrue(datum.get("COL2").equals("B")); @@ -146,7 +147,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); - datum = (TabularROD)iterator.seekForward(testSite3).getRecords().get(0); + datum = (TabularROD)iterator.seekForward(testSite3).get(0); assertTrue(datum.getLocation().equals(testSite3)); assertTrue(datum.get("COL1").equals("F")); assertTrue(datum.get("COL2").equals("G")); @@ -161,12 +162,12 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testIteratorCreation() { ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); - SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); + FlashBackIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); - TabularROD datum = (TabularROD)iterator.seekForward(testSite3).getRecords().get(0); + TabularROD datum = (TabularROD)iterator.seekForward(testSite3).get(0); assertTrue(datum.getLocation().equals(testSite3)); assertTrue(datum.get("COL1").equals("F")); assertTrue(datum.get("COL2").equals("G")); @@ -181,7 +182,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators()); - datum = (TabularROD)iterator.next().getRecords().get(0); + datum = (TabularROD)iterator.next().get(0); assertTrue(datum.getLocation().equals(testSite1)); assertTrue(datum.get("COL1").equals("A")); assertTrue(datum.get("COL2").equals("B")); diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java index 0d5a8e4a9..bd8888f4a 100755 --- a/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/TabularRODTest.java @@ -4,6 +4,8 @@ package org.broadinstitute.sting.gatk.refdata; // the imports for unit testing. +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.junit.*; import static org.junit.Assert.assertTrue; import org.broadinstitute.sting.BaseTest; @@ -26,7 +28,7 @@ import net.sf.picard.reference.ReferenceSequenceFile; public class TabularRODTest extends BaseTest { private static ReferenceSequenceFile seq; private ReferenceOrderedData ROD; - private SeekableRODIterator iter; + private LocationAwareSeekableRODIterator iter; @BeforeClass @@ -49,7 +51,7 @@ public class TabularRODTest extends BaseTest { public void test1() { logger.warn("Executing test1"); RODRecordList oneList = iter.next(); - TabularROD one = (TabularROD)oneList.getRecords().get(0); + TabularROD one = (TabularROD)oneList.get(0); assertTrue(one.size() == 4); assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10))); assertTrue(one.get("COL1").equals("A")); @@ -62,8 +64,8 @@ public class TabularRODTest extends BaseTest { logger.warn("Executing test2"); RODRecordList oneList = iter.next(); RODRecordList twoList = iter.next(); - TabularROD one = (TabularROD)oneList.getRecords().get(0); - TabularROD two = (TabularROD)twoList.getRecords().get(0); + TabularROD one = (TabularROD)oneList.get(0); + TabularROD two = (TabularROD)twoList.get(0); assertTrue(two.size() == 4); assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20))); assertTrue(two.get("COL1").equals("C")); @@ -77,9 +79,9 @@ public class TabularRODTest extends BaseTest { RODRecordList oneList = iter.next(); RODRecordList twoList = iter.next(); RODRecordList threeList = iter.next(); - TabularROD one = (TabularROD)oneList.getRecords().get(0); - TabularROD two = (TabularROD)twoList.getRecords().get(0); - TabularROD three = (TabularROD)threeList.getRecords().get(0); + TabularROD one = (TabularROD)oneList.get(0); + TabularROD two = (TabularROD)twoList.get(0); + TabularROD three = (TabularROD)threeList.get(0); assertTrue(three.size() == 4); assertTrue(three.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 30))); assertTrue(three.get("COL1").equals("F")); @@ -93,9 +95,9 @@ public class TabularRODTest extends BaseTest { RODRecordList oneList = iter.next(); RODRecordList twoList = iter.next(); RODRecordList threeList = iter.next(); - TabularROD one = (TabularROD)oneList.getRecords().get(0); - TabularROD two = (TabularROD)twoList.getRecords().get(0); - TabularROD three = (TabularROD)threeList.getRecords().get(0); + TabularROD one = (TabularROD)oneList.get(0); + TabularROD two = (TabularROD)twoList.get(0); + TabularROD three = (TabularROD)threeList.get(0); assertTrue(!iter.hasNext()); } @@ -103,7 +105,7 @@ public class TabularRODTest extends BaseTest { public void testSeek() { logger.warn("Executing testSeek"); RODRecordList twoList = iter.seekForward(GenomeLocParser.createGenomeLoc("chrM", 20)); - TabularROD two = (TabularROD)twoList.getRecords().get(0); + TabularROD two = (TabularROD)twoList.get(0); assertTrue(two.size() == 4); assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20))); assertTrue(two.get("COL1").equals("C")); @@ -115,7 +117,7 @@ public class TabularRODTest extends BaseTest { public void testToString() { logger.warn("Executing testToString"); RODRecordList oneList = iter.next(); - TabularROD one = (TabularROD)oneList.getRecords().get(0); + TabularROD one = (TabularROD)oneList.get(0); assertTrue(one.toString().equals("chrM:10\tA\tB\tC")); } @@ -124,11 +126,11 @@ public class TabularRODTest extends BaseTest { public void testDelim1() { File file2 = new File(testDir + "TabularDataTest2.dat"); ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", file2, TabularROD.class); - SeekableRODIterator iter_commas = ROD_commas.iterator(); + LocationAwareSeekableRODIterator iter_commas = ROD_commas.iterator(); logger.warn("Executing testDelim1"); RODRecordList one2List = iter_commas.next(); - TabularROD one2 = (TabularROD)one2List.getRecords().get(0); + TabularROD one2 = (TabularROD)one2List.get(0); assertTrue(one2.size() == 5); assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10))); assertTrue(one2.get("COL1").equals("A")); @@ -142,11 +144,11 @@ public class TabularRODTest extends BaseTest { TabularROD.setDelimiter(",",","); File file2 = new File(testDir + "TabularDataTest2.dat"); ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", file2, TabularROD.class); - SeekableRODIterator iter_commas = ROD_commas.iterator(); + LocationAwareSeekableRODIterator iter_commas = ROD_commas.iterator(); logger.warn("Executing testDelim1"); RODRecordList one2List = iter_commas.next(); - TabularROD one2 = (TabularROD)one2List.getRecords().get(0); + TabularROD one2 = (TabularROD)one2List.get(0); assertTrue(one2.size() == 5); assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10))); assertTrue(one2.get("COL1").equals("A")); @@ -187,10 +189,10 @@ public class TabularRODTest extends BaseTest { out.println(row.toString()); ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", outputFile, TabularROD.class); - SeekableRODIterator iter_commas = ROD_commas.iterator(); + LocationAwareSeekableRODIterator iter_commas = ROD_commas.iterator(); RODRecordList oneList = iter_commas.next(); - TabularROD one = (TabularROD)oneList.getRecords().get(0); + TabularROD one = (TabularROD)oneList.get(0); assertTrue(one.size() == 4); assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 1))); assertTrue(one.get("col1").equals("1")); @@ -198,7 +200,7 @@ public class TabularRODTest extends BaseTest { assertTrue(one.get("col3").equals("3")); RODRecordList twoList = iter_commas.next(); - TabularROD two = (TabularROD)twoList.getRecords().get(0); + TabularROD two = (TabularROD)twoList.get(0); assertTrue(two.size() == 4); assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 2))); assertTrue(two.get("col1").equals("3")); diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorTest.java new file mode 100644 index 000000000..db704f8f8 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorTest.java @@ -0,0 +1,239 @@ +package org.broadinstitute.sting.gatk.refdata.utils; + +import net.sf.samtools.SAMFileHeader; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.AbstractList; +import java.util.ArrayList; +import java.util.List; + + +/** + * @author aaron + *

+ * Class FlashBackIteratorTest + *

+ * just like a greatful dead show...this will be prone to flashbacks + */ +public class FlashBackIteratorTest extends BaseTest { + private SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE); + private static final int NUMBER_OF_CHROMOSOMES = 5; + private static final int STARTING_CHROMOSOME = 1; + private static final int CHROMOSOME_SIZE = 1000; + + @Before + public void setup() { + GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + } + + @Test + public void testBasicIteration() { + GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,0,0); + FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); + GenomeLoc lastLocation = null; + for (int x = 0; x < 10; x++) { + iter.next(); + GenomeLoc cur = iter.position(); + if (lastLocation != null) { + Assert.assertTrue(lastLocation.isBefore(cur)); + } + lastLocation = cur; + } + } + + @Test + public void testBasicIterationThenFlashBack() { + GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,0,0); + FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); + GenomeLoc lastLocation = null; + for (int x = 0; x < 10; x++) { + iter.next(); + GenomeLoc cur = iter.position(); + if (lastLocation != null) { + Assert.assertTrue(lastLocation.isBefore(cur)); + } + lastLocation = cur; + } + iter.flashBackTo(GenomeLocParser.createGenomeLoc(0,2)); + } + + @Test + public void testBasicIterationThenFlashBackThenIterate() { + GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,0,0); + FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); + GenomeLoc lastLocation = null; + for (int x = 0; x < 10; x++) { + iter.next(); + GenomeLoc cur = iter.position(); + if (lastLocation != null) { + Assert.assertTrue(lastLocation.isBefore(cur)); + } + lastLocation = cur; + } + iter.flashBackTo(GenomeLocParser.createGenomeLoc(0,1)); + int count = 0; + while (iter.hasNext()) { + count++; + iter.next(); + } + Assert.assertEquals(10,count); + } + +} + + +class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator { + + // current location + private GenomeLoc location; + private FakeRODatum curROD; + private int recordCount = 10; + public FakeSeekableRODIterator(GenomeLoc startingLoc) { + this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(),startingLoc.getStart()+1,startingLoc.getStop()+1);; + } + + @Override + public GenomeLoc peekNextLocation() { + System.err.println("Peek Next -> " + location); + return location; + } + + @Override + public GenomeLoc position() { + return location; + } + + @Override + public RODRecordList seekForward(GenomeLoc interval) { + this.location = interval; + return next(); + } + + @Override + public boolean hasNext() { + return (recordCount > 0); + } + + @Override + public RODRecordList next() { + RODRecordList list = new FakeRODRecordList(); + curROD = new FakeRODatum(location); + location = GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart()+1,location.getStop()+1); + list.add(curROD); + recordCount--; + return list; + } + + @Override + public void remove() { + throw new IllegalStateException("GRRR"); + } +} + + +/** for testing only */ +class FakeRODatum implements ReferenceOrderedDatum { + + final GenomeLoc location; + + public FakeRODatum(GenomeLoc location) { + this.location = location; + } + + @Override + public String getName() { + return "false"; + } + + @Override + public boolean parseLine(Object header, String[] parts) throws IOException { + return false; + } + + @Override + public String toSimpleString() { + return ""; + } + + @Override + public String repl() { + return ""; + } + + /** + * Used by the ROD system to determine how to split input lines + * + * @return Regex string delimiter separating fields + */ + @Override + public String delimiterRegex() { + return ""; + } + + @Override + public GenomeLoc getLocation() { + return location; + } + + @Override + public int compareTo(ReferenceOrderedDatum that) { + return location.compareTo(that.getLocation()); + } + + /** + * Backdoor hook to read header, meta-data, etc. associated with the file. Will be + * called by the ROD system before streaming starts + * + * @param source source data file on disk from which this rod stream will be pulled + * + * @return a header object that will be passed to parseLine command + */ + @Override + public Object initialize(File source) throws FileNotFoundException { + return null; + } +} + +class FakeRODRecordList extends AbstractList implements RODRecordList { + private final List list = new ArrayList(); + + public boolean add(ReferenceOrderedDatum data) { + return list.add(data); + } + + @Override + public ReferenceOrderedDatum get(int i) { + return list.get(i); + } + + @Override + public int size() { + return list.size(); + } + + @Override + public GenomeLoc getLocation() { + return list.get(0).getLocation(); + } + + @Override + public String getName() { + return "test"; + } + + @Override + public int compareTo(RODRecordList rodRecordList) { + return this.list.get(0).getLocation().compareTo(rodRecordList.getLocation()); + } +} \ No newline at end of file