Part I of GSA-462: Consistent RODBinding access across Ref and Read trackers
-- ReadMetaDataTracker is dead! Long live the RefMetaDataTracker. Read walkers will soon just take RefMetaDataTracker objects. In this commit they take a class that trivially extends them
-- Rewrote ReadBasedReferenceOrderedView to produce RefMetaDataTrackers not the old class.
-- This new implementation produces thread-safe objects (i.e., holds no points to shared state). Suitable for use (to be tested) with nano scheduling
-- Simplified interfaces to use the simplest data structures (PeekableIterator) not the LocusAwareSeekableIterator, since I both hate those classes and this is on the long term trajectory to remove those from the GATK entirely.
-- Massively expanded DataProvider unit tests for ReadBasedReferenceOrderedView
-- Note that the old implementation of offset -> ROD in ReadRefMetaDataTracker was broken for any read not completely matching the reference. Rather than provide broken code the ReadMetaDataTracker only provides a "bag of RODs" interface. If you want to work with the relationship between the read and the RODs in your tool you need to manage the CIGAR element itself.
-- This commit breaks the new read walker BQSR, but Ryan knows this is coming
-- Subsequent commit will be retiring / fixing ValidateRODForReads
This commit is contained in:
parent
8fc6a0a68b
commit
972be8b4a4
|
|
@ -0,0 +1,143 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordListImpl;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.LinkedList;
|
||||
import java.util.ListIterator;
|
||||
|
||||
/**
|
||||
* Key algorithmic helper for ReadBasedReferenceOrderedData
|
||||
*
|
||||
* Takes a single iterator of features, and provides a single capability that returns
|
||||
* the list of RODs that overlap an interval. Allows sequential getOverlapping calls
|
||||
* from intervals provided that these intervals always have increasing getStart() values.
|
||||
*
|
||||
*/
|
||||
class IntervalOverlappingRODsFromStream {
|
||||
/**
|
||||
* Only held for QC purposes
|
||||
*/
|
||||
GenomeLoc lastQuery = null;
|
||||
|
||||
private final String name;
|
||||
private final LinkedList<GATKFeature> currentFeatures = new LinkedList<GATKFeature>();
|
||||
private final PeekableIterator<RODRecordList> futureFeatures;
|
||||
|
||||
/**
|
||||
* Create a new IntervalOverlappingRODsFromStream that reads elements from futureFeatures and
|
||||
* returns RODRecordLists having name
|
||||
*
|
||||
* @param name
|
||||
* @param futureFeatures
|
||||
*/
|
||||
IntervalOverlappingRODsFromStream(final String name, final PeekableIterator<RODRecordList> futureFeatures) {
|
||||
if ( futureFeatures == null ) throw new IllegalArgumentException("futureFeatures cannot be null");
|
||||
|
||||
this.name = name;
|
||||
this.futureFeatures = futureFeatures;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of RODs overlapping loc from this stream of RODs.
|
||||
*
|
||||
* Sequential calls to this function must obey the rule that loc2.getStart >= loc1.getStart
|
||||
*
|
||||
* @param loc the interval to query
|
||||
* @return a non-null RODRecordList containing the overlapping RODs, which may be empty
|
||||
*/
|
||||
@Ensures({"overlaps(loc, result)",
|
||||
"! futureFeatures.hasNext() || futureFeatures.peek().getLocation().isPast(loc)",
|
||||
"result != null"})
|
||||
public RODRecordList getOverlapping(final GenomeLoc loc) {
|
||||
if ( lastQuery != null && loc.getStart() < lastQuery.getStart() )
|
||||
throw new IllegalArgumentException(String.format("BUG: query interval (%s) starts before the previous interval %s", loc, lastQuery));
|
||||
|
||||
trimCurrentFeaturesToLoc(loc);
|
||||
readOverlappingFutureFeatures(loc);
|
||||
return new RODRecordListImpl(name, subsetToOverlapping(loc, currentFeatures), loc);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* For contract assurance. Checks that all bindings in loc overlap
|
||||
*
|
||||
* @param loc
|
||||
* @param bindings
|
||||
* @return
|
||||
*/
|
||||
@Requires({"loc != null", "bindings != null"})
|
||||
private boolean overlaps(final GenomeLoc loc, final RODRecordList bindings) {
|
||||
for ( final GATKFeature feature : bindings )
|
||||
if ( ! feature.getLocation().overlapsP(loc) )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subset the features in all to those that overlap with loc
|
||||
*
|
||||
* The current features list contains everything read that cannot be thrown away yet, but not
|
||||
* everything in there necessarily overlaps with loc. Subset to just those that do overlap
|
||||
*
|
||||
* @param loc the location that features must overlap
|
||||
* @param all the list of all features
|
||||
* @return a subset of all that overlaps with loc
|
||||
*/
|
||||
@Requires({"loc != null", "all != null"})
|
||||
@Ensures("result.size() <= all.size()")
|
||||
private Collection<GATKFeature> subsetToOverlapping(final GenomeLoc loc, final Collection<GATKFeature> all) {
|
||||
final LinkedList<GATKFeature> overlapping = new LinkedList<GATKFeature>();
|
||||
for ( final GATKFeature feature : all )
|
||||
if ( feature.getLocation().overlapsP(loc) )
|
||||
overlapping.add(feature);
|
||||
return overlapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update function. Remove all elements of currentFeatures that end before loc
|
||||
*
|
||||
* @param loc the location to use
|
||||
*/
|
||||
@Requires("loc != null")
|
||||
@Ensures("currentFeatures.size() <= old(currentFeatures.size())")
|
||||
private void trimCurrentFeaturesToLoc(final GenomeLoc loc) {
|
||||
final ListIterator<GATKFeature> it = currentFeatures.listIterator();
|
||||
while ( it.hasNext() ) {
|
||||
final GATKFeature feature = it.next();
|
||||
if ( feature.getLocation().isBefore(loc) )
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update function: Read all elements from futureFeatures that overlap with loc
|
||||
*
|
||||
* Stops at the first element that starts before the end of loc, or the stream empties
|
||||
*
|
||||
* @param loc
|
||||
*/
|
||||
@Requires("loc != null")
|
||||
@Ensures("currentFeatures.size() >= old(currentFeatures.size())")
|
||||
private void readOverlappingFutureFeatures(final GenomeLoc loc) {
|
||||
while ( futureFeatures.hasNext() ) {
|
||||
final GenomeLoc nextLoc = futureFeatures.peek().getLocation();
|
||||
if ( nextLoc.isBefore(loc) ) {
|
||||
futureFeatures.next(); // next rod element is before loc, throw it away and keep looking
|
||||
} else if ( nextLoc.isPast(loc) ) {
|
||||
break; // next element is past loc, stop looking but don't pop it
|
||||
} else if ( nextLoc.overlapsP(loc) ) {
|
||||
// add overlapping elements to our current features, removing from stream
|
||||
for ( final GATKFeature feature : futureFeatures.next() ) {
|
||||
currentFeatures.add(feature);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -23,40 +23,63 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/** a ROD view for reads. This provides the Read traversals a way of getting a ReadMetaDataTracker */
|
||||
public class ReadBasedReferenceOrderedView implements View {
|
||||
private final WindowedData window;
|
||||
// a list of the RMDDataState (location->iterators)
|
||||
private final List<RMDDataState> states = new ArrayList<RMDDataState>(1);
|
||||
private final static ReadMetaDataTracker EMPTY_TRACKER = new ReadMetaDataTracker();
|
||||
|
||||
public ReadBasedReferenceOrderedView(ShardDataProvider provider) {
|
||||
window = new WindowedData(provider);
|
||||
/**
|
||||
* Used to get genome locs for reads
|
||||
*/
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* The total extent of all reads in this span. We create iterators from our RODs
|
||||
* from the start of this span, to the end.
|
||||
*/
|
||||
private final GenomeLoc shardSpan;
|
||||
|
||||
public ReadBasedReferenceOrderedView(final ShardDataProvider provider) {
|
||||
this(provider.getGenomeLocParser(), provider.getShard().getLocation());
|
||||
provider.register(this);
|
||||
|
||||
if ( provider.getReferenceOrderedData() != null && ! shardSpan.isUnmapped() ) {
|
||||
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
||||
states.add(new RMDDataState(dataSource, dataSource.seek(shardSpan)));
|
||||
}
|
||||
}
|
||||
|
||||
private ReadBasedReferenceOrderedView(final GenomeLocParser genomeLocParser, final GenomeLoc shardSpan) {
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.shardSpan = shardSpan;
|
||||
}
|
||||
|
||||
/**
|
||||
* for testing only please
|
||||
*
|
||||
* @param data the window provider
|
||||
* Testing constructor
|
||||
*/
|
||||
ReadBasedReferenceOrderedView(WindowedData data) {
|
||||
window = data;
|
||||
}
|
||||
|
||||
public ReadMetaDataTracker getReferenceOrderedDataForRead(SAMRecord read) {
|
||||
return window.getTracker(read);
|
||||
protected ReadBasedReferenceOrderedView(final GenomeLocParser genomeLocParser,
|
||||
final GenomeLoc shardSpan,
|
||||
final List<String> names,
|
||||
final List<PeekableIterator<RODRecordList>> featureSources) {
|
||||
this(genomeLocParser, shardSpan);
|
||||
for ( int i = 0; i < names.size(); i++ )
|
||||
states.add(new RMDDataState(names.get(i), featureSources.get(i)));
|
||||
}
|
||||
|
||||
public Collection<Class<? extends View>> getConflictingViews() {
|
||||
|
|
@ -65,74 +88,6 @@ public class ReadBasedReferenceOrderedView implements View {
|
|||
return classes;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
if (window != null) window.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** stores a window of data, dropping RODs if we've passed the new reads start point. */
|
||||
class WindowedData {
|
||||
// the queue of possibly in-frame RODs; RODs are removed as soon as they are out of scope
|
||||
private final TreeMap<Integer, RODMetaDataContainer> mapping = new TreeMap<Integer, RODMetaDataContainer>();
|
||||
|
||||
// our current location from the last read we processed
|
||||
private GenomeLoc currentLoc;
|
||||
|
||||
// a list of the RMDDataState (location->iterators)
|
||||
private List<RMDDataState> states;
|
||||
|
||||
// the provider; where we get all our information
|
||||
private final ShardDataProvider provider;
|
||||
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
private static Logger logger = Logger.getLogger(WindowedData.class);
|
||||
|
||||
/**
|
||||
* create a WindowedData given a shard provider
|
||||
*
|
||||
* @param provider the ShardDataProvider
|
||||
*/
|
||||
public WindowedData(ShardDataProvider provider) {
|
||||
this.provider = provider;
|
||||
}
|
||||
|
||||
/**
|
||||
* load the states dynamically, since the only way to get a genome loc is from the read (the shard doesn't have one)
|
||||
*
|
||||
* @param provider the ShardDataProvider
|
||||
* @param rec the current read
|
||||
*/
|
||||
private void getStates(ShardDataProvider provider, SAMRecord rec) {
|
||||
|
||||
int stop = Integer.MAX_VALUE;
|
||||
// figure out the appropriate alignment stop
|
||||
if (provider.hasReference()) {
|
||||
stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength();
|
||||
}
|
||||
|
||||
// calculate the range of positions we need to look at
|
||||
GenomeLoc range = provider.getGenomeLocParser().createGenomeLoc(rec.getReferenceName(),
|
||||
rec.getAlignmentStart(),
|
||||
stop);
|
||||
states = new ArrayList<RMDDataState>();
|
||||
if (provider.getReferenceOrderedData() != null)
|
||||
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
||||
states.add(new RMDDataState(dataSource, dataSource.seek(range)));
|
||||
}
|
||||
|
||||
/**
|
||||
* this function is for testing only
|
||||
*
|
||||
* @param states a list of RMDDataState to initialize with
|
||||
*/
|
||||
WindowedData(List<RMDDataState> states) {
|
||||
this.states = states;
|
||||
provider = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a ReadMetaDataTracker given the current read
|
||||
*
|
||||
|
|
@ -140,60 +95,65 @@ class WindowedData {
|
|||
*
|
||||
* @return a ReadMetaDataTracker for the read, from which you can get ROD -> read alignments
|
||||
*/
|
||||
public ReadMetaDataTracker getTracker(SAMRecord rec) {
|
||||
updatePosition(rec);
|
||||
return new ReadMetaDataTracker(provider.getGenomeLocParser(), rec, mapping);
|
||||
@Requires("rec != null")
|
||||
@Ensures("result != null")
|
||||
public ReadMetaDataTracker getReferenceOrderedDataForRead(final SAMRecord rec) {
|
||||
if ( rec.getReadUnmappedFlag() )
|
||||
// empty RODs for unmapped reads
|
||||
return new ReadMetaDataTracker();
|
||||
else
|
||||
return getReferenceOrderedDataForInterval(genomeLocParser.createGenomeLoc(rec));
|
||||
}
|
||||
|
||||
/**
|
||||
* update the position we're storing
|
||||
*
|
||||
* @param rec the read to use for start and end
|
||||
*/
|
||||
private void updatePosition(SAMRecord rec) {
|
||||
if (states == null) getStates(this.provider, rec);
|
||||
currentLoc = provider.getGenomeLocParser().createGenomeLoc(rec);
|
||||
|
||||
// flush the queue looking for records we've passed over
|
||||
while (mapping.size() > 0 && mapping.firstKey() < currentLoc.getStart())
|
||||
mapping.pollFirstEntry(); // toss away records that we've passed
|
||||
|
||||
// add new data to the queue
|
||||
for (RMDDataState state : states) {
|
||||
// move into position
|
||||
while (state.iterator.hasNext() && state.iterator.peekNextLocation().isBefore(currentLoc))
|
||||
state.iterator.next();
|
||||
while (state.iterator.hasNext() && state.iterator.peekNextLocation().overlapsP(currentLoc)) {
|
||||
RODRecordList list = state.iterator.next();
|
||||
for (GATKFeature datum : list) {
|
||||
if (!mapping.containsKey(list.getLocation().getStart()))
|
||||
mapping.put(list.getLocation().getStart(), new RODMetaDataContainer());
|
||||
mapping.get(list.getLocation().getStart()).addEntry(datum);
|
||||
}
|
||||
}
|
||||
@Requires({"interval != null", "shardSpan.containsP(interval)"})
|
||||
@Ensures("result != null")
|
||||
public ReadMetaDataTracker getReferenceOrderedDataForInterval(final GenomeLoc interval) {
|
||||
if ( states.isEmpty() ) // optimization for no bindings (common for read walkers)
|
||||
return EMPTY_TRACKER;
|
||||
else {
|
||||
final List<RODRecordList> bindings = new ArrayList<RODRecordList>(states.size());
|
||||
for ( final RMDDataState state : states )
|
||||
bindings.add(state.stream.getOverlapping(interval));
|
||||
return new ReadMetaDataTracker(bindings);
|
||||
}
|
||||
}
|
||||
|
||||
/** Closes the current view. */
|
||||
/**
|
||||
* Closes the current view.
|
||||
*/
|
||||
public void close() {
|
||||
if (states == null) return;
|
||||
for (RMDDataState state : states)
|
||||
state.dataSource.close( state.iterator );
|
||||
for (final RMDDataState state : states)
|
||||
state.close();
|
||||
|
||||
// Clear out the existing data so that post-close() accesses to this data will fail-fast.
|
||||
states = null;
|
||||
states.clear();
|
||||
}
|
||||
|
||||
/** Models the traversal state of a given ROD lane. */
|
||||
private static class RMDDataState {
|
||||
public final ReferenceOrderedDataSource dataSource;
|
||||
public final IntervalOverlappingRODsFromStream stream;
|
||||
private final LocationAwareSeekableRODIterator iterator;
|
||||
|
||||
}
|
||||
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
|
||||
this.dataSource = dataSource;
|
||||
this.iterator = iterator;
|
||||
this.stream = new IntervalOverlappingRODsFromStream(dataSource.getName(), new PeekableIterator<RODRecordList>(iterator));
|
||||
}
|
||||
|
||||
/** Models the traversal state of a given ROD lane. */
|
||||
class RMDDataState {
|
||||
public final ReferenceOrderedDataSource dataSource;
|
||||
public final LocationAwareSeekableRODIterator iterator;
|
||||
/**
|
||||
* For testing
|
||||
*/
|
||||
public RMDDataState(final String name, final PeekableIterator<RODRecordList> iterator) {
|
||||
this.dataSource = null;
|
||||
this.iterator = null;
|
||||
this.stream = new IntervalOverlappingRODsFromStream(name, new PeekableIterator<RODRecordList>(iterator));
|
||||
}
|
||||
|
||||
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
|
||||
this.dataSource = dataSource;
|
||||
this.iterator = iterator;
|
||||
public void close() {
|
||||
if ( dataSource != null )
|
||||
dataSource.close( iterator );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
|
|
@ -39,141 +40,12 @@ import java.util.*;
|
|||
* <p/>
|
||||
* a read-based meta data tracker
|
||||
*/
|
||||
public class ReadMetaDataTracker {
|
||||
/**
|
||||
* The parser, used to create new GenomeLocs.
|
||||
*/
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
|
||||
private final SAMRecord record;
|
||||
|
||||
// the buffer of positions and RODs we've stored
|
||||
private final TreeMap<Integer, RODMetaDataContainer> mapping;
|
||||
|
||||
/**
|
||||
* create a read meta data tracker, given the read and a queue of RODatum positions
|
||||
*
|
||||
* @param record the read to create offset from
|
||||
* @param mapping the mapping of reference ordered datum
|
||||
*/
|
||||
public ReadMetaDataTracker(GenomeLocParser genomeLocParser, SAMRecord record, TreeMap<Integer, RODMetaDataContainer> mapping) {
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.record = record;
|
||||
this.mapping = mapping;
|
||||
public class ReadMetaDataTracker extends RefMetaDataTracker {
|
||||
public ReadMetaDataTracker() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* create an alignment of read position to reference ordered datum
|
||||
*
|
||||
* @param record the SAMRecord
|
||||
* @param queue the queue (as a tree set)
|
||||
* @param cl the class name, null if not filtered by classname
|
||||
* @param name the datum track name, null if not filtered by name
|
||||
*
|
||||
* @return a mapping from the position in the read to the reference ordered datum
|
||||
*/
|
||||
private Map<Integer, Collection<GATKFeature>> createReadAlignment(SAMRecord record, TreeMap<Integer, RODMetaDataContainer> queue, Class cl, String name) {
|
||||
if (name != null && cl != null) throw new IllegalStateException("Both a class and name cannot be specified");
|
||||
Map<Integer, Collection<GATKFeature>> ret = new LinkedHashMap<Integer, Collection<GATKFeature>>();
|
||||
GenomeLoc location = genomeLocParser.createGenomeLoc(record);
|
||||
int length = record.getReadLength();
|
||||
for (Integer loc : queue.keySet()) {
|
||||
Integer position = loc - location.getStart();
|
||||
if (position >= 0 && position < length) {
|
||||
Collection<GATKFeature> set;
|
||||
if (cl != null)
|
||||
set = queue.get(loc).getSet(cl);
|
||||
else
|
||||
set = queue.get(loc).getSet(name);
|
||||
if (set != null && set.size() > 0)
|
||||
ret.put(position, set);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* create an alignment of read position to reference ordered datum
|
||||
*
|
||||
* @return a mapping from the position in the read to the reference ordered datum
|
||||
*/
|
||||
private Map<Integer, Collection<GATKFeature>> createGenomeLocAlignment(SAMRecord record, TreeMap<Integer, RODMetaDataContainer> mapping, Class cl, String name) {
|
||||
Map<Integer, Collection<GATKFeature>> ret = new LinkedHashMap<Integer, Collection<GATKFeature>>();
|
||||
int start = record.getAlignmentStart();
|
||||
int stop = record.getAlignmentEnd();
|
||||
for (Integer location : mapping.keySet()) {
|
||||
if (location >= start && location <= stop)
|
||||
if (cl != null)
|
||||
ret.put(location, mapping.get(location).getSet(cl));
|
||||
else
|
||||
ret.put(location, mapping.get(location).getSet(name));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the position mapping, from read offset to ROD
|
||||
*
|
||||
* @return a mapping of read offset to ROD(s)
|
||||
*/
|
||||
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping() {
|
||||
return createReadAlignment(record, mapping, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the position mapping, from read offset to ROD
|
||||
*
|
||||
* @return a mapping of genome loc position to ROD(s)
|
||||
*/
|
||||
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping() {
|
||||
return createGenomeLocAlignment(record, mapping, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the position mapping, from read offset to ROD
|
||||
*
|
||||
* @return a mapping of read offset to ROD(s)
|
||||
*/
|
||||
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping(String name) {
|
||||
return createReadAlignment(record, mapping, null, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the position mapping, from read offset to ROD
|
||||
*
|
||||
* @return a mapping of genome loc position to ROD(s)
|
||||
*/
|
||||
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping(String name) {
|
||||
return createGenomeLocAlignment(record, mapping, null, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the position mapping, from read offset to ROD
|
||||
*
|
||||
* @return a mapping of read offset to ROD(s)
|
||||
*/
|
||||
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping(Class cl) {
|
||||
return createReadAlignment(record, mapping, cl, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the position mapping, from read offset to ROD
|
||||
*
|
||||
* @return a mapping of genome loc position to ROD(s)
|
||||
*/
|
||||
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping(Class cl) {
|
||||
return createGenomeLocAlignment(record, mapping, cl, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the list of all the RODS overlapping this read, without any information about their position
|
||||
* @return a Collection (no order guaranteed), of all the RODs covering this read
|
||||
*/
|
||||
public List<GATKFeature> getAllCoveringRods() {
|
||||
List<GATKFeature> ret = new ArrayList<GATKFeature>();
|
||||
for (Map.Entry<Integer, RODMetaDataContainer> entry : mapping.entrySet())
|
||||
ret.addAll(entry.getValue().getSet());
|
||||
return ret;
|
||||
public ReadMetaDataTracker(Collection<RODRecordList> allBindings) {
|
||||
super(allBindings);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -620,16 +620,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
private void populateKnownIndels(ReadMetaDataTracker metaDataTracker, ReferenceContext ref) {
|
||||
for ( Collection<GATKFeature> rods : metaDataTracker.getContigOffsetMapping().values() ) {
|
||||
Iterator<GATKFeature> rodIter = rods.iterator();
|
||||
while ( rodIter.hasNext() ) {
|
||||
Object rod = rodIter.next().getUnderlyingObject();
|
||||
if ( indelRodsSeen.contains(rod) )
|
||||
continue;
|
||||
indelRodsSeen.add(rod);
|
||||
if ( rod instanceof VariantContext )
|
||||
knownIndelsToTry.add((VariantContext)rod);
|
||||
}
|
||||
for ( final VariantContext vc : metaDataTracker.getValues(known) ) {
|
||||
if ( indelRodsSeen.contains(vc) )
|
||||
continue;
|
||||
indelRodsSeen.add(vc);
|
||||
knownIndelsToTry.add(vc);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -125,6 +125,15 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
return ! discontinuousP( that );
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if this GenomeLoc represents the UNMAPPED location
|
||||
* @return
|
||||
*/
|
||||
public final boolean isUnmapped() {
|
||||
return isUnmapped(this);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a new GenomeLoc that represents the entire span of this and that. Requires that
|
||||
* this and that GenomeLoc are contiguous and both mapped
|
||||
|
|
|
|||
|
|
@ -1,207 +1,347 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.testng.Assert;
|
||||
import org.broad.tribble.BasicFeature;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordListImpl;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTrackerUnitTest;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class ReadBasedReferenceOrderedViewUnitTest
|
||||
* <p/>
|
||||
* test out the ReadBasedReferenceOrderedView class
|
||||
* @author depristo
|
||||
*/
|
||||
public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
private static int startingChr = 1;
|
||||
private static int endingChr = 2;
|
||||
private static int readCount = 100;
|
||||
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
|
||||
private static String contig;
|
||||
private static SAMFileHeader header;
|
||||
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
contig = header.getSequence(0).getSequenceName();
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
|
||||
initializeTests();
|
||||
}
|
||||
|
||||
@BeforeMethod
|
||||
public void beforeEach() {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateReadMetaDataTrackerOnePerSite() {
|
||||
// make ten reads,
|
||||
List<SAMRecord> records = new ArrayList<SAMRecord>();
|
||||
for (int x = 1; x < 11; x++) {
|
||||
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "name", 0, x, 10);
|
||||
private class CompareFeatures implements Comparator<Feature> {
|
||||
@Override
|
||||
public int compare(Feature o1, Feature o2) {
|
||||
return genomeLocParser.createGenomeLoc(o1).compareTo(genomeLocParser.createGenomeLoc(o2));
|
||||
}
|
||||
GenomeLoc start = genomeLocParser.createGenomeLoc(header.getSequenceDictionary().getSequence(0).getSequenceName(), 0, 0);
|
||||
List<RMDDataState> list = new ArrayList<RMDDataState>();
|
||||
list.add(new RMDDataState(null, new FakePeekingRODIterator(genomeLocParser,start, "fakeName")));
|
||||
ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(new WindowedData(list));
|
||||
}
|
||||
|
||||
for (SAMRecord rec : records) {
|
||||
ReadMetaDataTracker tracker = view.getReferenceOrderedDataForRead(rec);
|
||||
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping();
|
||||
for (Integer i : map.keySet()) {
|
||||
Assert.assertEquals(map.get(i).size(), 1);
|
||||
private class ReadMetaDataTrackerRODStreamTest extends TestDataProvider {
|
||||
final List<Feature> allFeatures;
|
||||
final List<GenomeLoc> intervals;
|
||||
|
||||
public ReadMetaDataTrackerRODStreamTest(final List<Feature> allFeatures, final GenomeLoc interval) {
|
||||
this(allFeatures, Collections.singletonList(interval));
|
||||
}
|
||||
|
||||
public ReadMetaDataTrackerRODStreamTest(final List<Feature> allFeatures, final List<GenomeLoc> intervals) {
|
||||
super(ReadMetaDataTrackerRODStreamTest.class);
|
||||
this.allFeatures = new ArrayList<Feature>(allFeatures);
|
||||
Collections.sort(this.allFeatures, new CompareFeatures());
|
||||
this.intervals = new ArrayList<GenomeLoc>(intervals);
|
||||
Collections.sort(this.intervals);
|
||||
setName(String.format("%s nFeatures %d intervals %s", getClass().getSimpleName(), allFeatures.size(),
|
||||
intervals.size() == 1 ? intervals.get(0) : "size " + intervals.size()));
|
||||
}
|
||||
|
||||
public PeekableIterator<RODRecordList> getIterator(final String name) {
|
||||
return new PeekableIterator<RODRecordList>(new TribbleIteratorFromCollection(name, genomeLocParser, allFeatures));
|
||||
}
|
||||
|
||||
public Set<Feature> getExpectedOverlaps(final GenomeLoc interval) {
|
||||
final Set<Feature> overlapping = new HashSet<Feature>();
|
||||
for ( final Feature f : allFeatures )
|
||||
if ( genomeLocParser.createGenomeLoc(f).overlapsP(interval) )
|
||||
overlapping.add(f);
|
||||
return overlapping;
|
||||
}
|
||||
}
|
||||
|
||||
public void initializeTests() {
|
||||
final List<Feature> handPickedFeatures = new ArrayList<Feature>();
|
||||
|
||||
handPickedFeatures.add(new BasicFeature(contig, 1, 1));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 2, 5));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 4, 4));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 6, 6));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 9, 10));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 10, 10));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 10, 11));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 13, 20));
|
||||
|
||||
createTestsForFeatures(handPickedFeatures);
|
||||
|
||||
// test in the present of a large spanning element
|
||||
{
|
||||
List<Feature> oneLargeSpan = new ArrayList<Feature>(handPickedFeatures);
|
||||
oneLargeSpan.add(new BasicFeature(contig, 1, 100));
|
||||
createTestsForFeatures(oneLargeSpan);
|
||||
}
|
||||
|
||||
// test in the presence of a partially spanning element
|
||||
{
|
||||
List<Feature> partialSpanStart = new ArrayList<Feature>(handPickedFeatures);
|
||||
partialSpanStart.add(new BasicFeature(contig, 1, 6));
|
||||
createTestsForFeatures(partialSpanStart);
|
||||
}
|
||||
|
||||
// test in the presence of a partially spanning element at the end
|
||||
{
|
||||
List<Feature> partialSpanEnd = new ArrayList<Feature>(handPickedFeatures);
|
||||
partialSpanEnd.add(new BasicFeature(contig, 10, 100));
|
||||
createTestsForFeatures(partialSpanEnd);
|
||||
}
|
||||
|
||||
// no data at all
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, 5, 5);
|
||||
new ReadMetaDataTrackerRODStreamTest(Collections.<Feature>emptyList(), loc);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// tests for the lower level IntervalOverlappingRODsFromStream
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "ReadMetaDataTrackerRODStreamTest")
|
||||
public Object[][] createReadMetaDataTrackerRODStreamTest() {
|
||||
return ReadMetaDataTrackerRODStreamTest.getTests(ReadMetaDataTrackerRODStreamTest.class);
|
||||
}
|
||||
|
||||
private GenomeLoc span(final List<GenomeLoc> features) {
|
||||
int featuresStart = 1; for ( final GenomeLoc f : features ) featuresStart = Math.min(featuresStart, f.getStart());
|
||||
int featuresStop = 1; for ( final GenomeLoc f : features ) featuresStop = Math.max(featuresStop, f.getStop());
|
||||
return genomeLocParser.createGenomeLoc(contig, featuresStart, featuresStop);
|
||||
}
|
||||
|
||||
private void createTestsForFeatures(final List<Feature> features) {
|
||||
int featuresStart = 1; for ( final Feature f : features ) featuresStart = Math.min(featuresStart, f.getStart());
|
||||
int featuresStop = 1; for ( final Feature f : features ) featuresStop = Math.max(featuresStop, f.getEnd());
|
||||
|
||||
for ( final int size : Arrays.asList(1, 5, 10, 100, 1000) ) {
|
||||
final List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
|
||||
// regularly spaced
|
||||
for ( int start = featuresStart; start < featuresStop; start++) {
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, start, start + size - 1);
|
||||
allIntervals.add(loc);
|
||||
new ReadMetaDataTrackerRODStreamTest(features, loc);
|
||||
}
|
||||
Assert.assertEquals(map.keySet().size(), 10);
|
||||
|
||||
// starting and stopping at every feature
|
||||
for ( final Feature f : features ) {
|
||||
// just at the feature
|
||||
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart(), f.getEnd()));
|
||||
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
|
||||
|
||||
// up to end
|
||||
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() - 1, f.getEnd()));
|
||||
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
|
||||
|
||||
// missing by 1
|
||||
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() + 1, f.getEnd() + 1));
|
||||
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
|
||||
|
||||
// just spanning
|
||||
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() - 1, f.getEnd() + 1));
|
||||
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
|
||||
}
|
||||
|
||||
new ReadMetaDataTrackerRODStreamTest(features, allIntervals);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "ReadMetaDataTrackerRODStreamTest")
|
||||
public void runReadMetaDataTrackerRODStreamTest_singleQuery(final ReadMetaDataTrackerRODStreamTest data) {
|
||||
if ( data.intervals.size() == 1 ) {
|
||||
final String name = "testName";
|
||||
final PeekableIterator<RODRecordList> iterator = data.getIterator(name);
|
||||
final IntervalOverlappingRODsFromStream stream = new IntervalOverlappingRODsFromStream(name, iterator);
|
||||
testRODStream(data, stream, Collections.singletonList(data.intervals.get(0)));
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "ReadMetaDataTrackerRODStreamTest", dependsOnMethods = "runReadMetaDataTrackerRODStreamTest_singleQuery")
|
||||
public void runReadMetaDataTrackerRODStreamTest_multipleQueries(final ReadMetaDataTrackerRODStreamTest data) {
|
||||
if ( data.intervals.size() > 1 ) {
|
||||
final String name = "testName";
|
||||
final PeekableIterator<RODRecordList> iterator = data.getIterator(name);
|
||||
final IntervalOverlappingRODsFromStream stream = new IntervalOverlappingRODsFromStream(name, iterator);
|
||||
testRODStream(data, stream, data.intervals);
|
||||
}
|
||||
}
|
||||
|
||||
private void testRODStream(final ReadMetaDataTrackerRODStreamTest test, final IntervalOverlappingRODsFromStream stream, final List<GenomeLoc> intervals) {
|
||||
for ( final GenomeLoc interval : intervals ) {
|
||||
final RODRecordList query = stream.getOverlapping(interval);
|
||||
final HashSet<Feature> queryFeatures = new HashSet<Feature>();
|
||||
for ( final GATKFeature f : query ) queryFeatures.add((Feature)f.getUnderlyingObject());
|
||||
final Set<Feature> overlaps = test.getExpectedOverlaps(interval);
|
||||
|
||||
Assert.assertEquals(queryFeatures.size(), overlaps.size(), "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
|
||||
" Expected size = " + overlaps.size() + " but saw " + queryFeatures.size());
|
||||
|
||||
BaseTest.assertEqualsSet(queryFeatures, overlaps, "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
|
||||
" Expected = " + Utils.join(",", overlaps) + " but saw " + Utils.join(",", queryFeatures));
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// tests for the higher level tracker itself
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "ReadMetaDataTrackerTests")
|
||||
public Object[][] createTrackerTests() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final Object[][] singleTests = ReadMetaDataTrackerRODStreamTest.getTests(ReadMetaDataTrackerRODStreamTest.class);
|
||||
final List<ReadMetaDataTrackerRODStreamTest> multiSiteTests = new ArrayList<ReadMetaDataTrackerRODStreamTest>();
|
||||
for ( final Object[] singleTest : singleTests ) {
|
||||
if ( ((ReadMetaDataTrackerRODStreamTest)singleTest[0]).intervals.size() > 1 )
|
||||
multiSiteTests.add((ReadMetaDataTrackerRODStreamTest)singleTest[0]);
|
||||
}
|
||||
|
||||
// all pairwise tests
|
||||
for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 2, false)) {
|
||||
tests.add(new Object[]{singleTest});
|
||||
}
|
||||
|
||||
// all 3 way pairwise tests
|
||||
for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 3, false)) {
|
||||
tests.add(new Object[]{singleTest});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
}
|
||||
@Test(enabled = true, dataProvider = "ReadMetaDataTrackerTests", dependsOnMethods = "runReadMetaDataTrackerRODStreamTest_multipleQueries")
|
||||
public void runReadMetaDataTrackerTest(final List<ReadMetaDataTrackerRODStreamTest> RODs) {
|
||||
final List<String> names = new ArrayList<String>();
|
||||
final List<PeekableIterator<RODRecordList>> iterators = new ArrayList<PeekableIterator<RODRecordList>>();
|
||||
final List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
|
||||
final List<RodBinding<Feature>> rodBindings = new ArrayList<RodBinding<Feature>>();
|
||||
|
||||
for ( int i = 0; i < RODs.size(); i++ ) {
|
||||
final RodBinding<Feature> rodBinding = new RodBinding<Feature>(Feature.class, "name"+i);
|
||||
rodBindings.add(rodBinding);
|
||||
final String name = rodBinding.getName();
|
||||
names.add(name);
|
||||
iterators.add(RODs.get(i).getIterator(name));
|
||||
intervals.addAll(RODs.get(i).intervals);
|
||||
}
|
||||
|
||||
class FakePeekingRODIterator implements LocationAwareSeekableRODIterator {
|
||||
private GenomeLocParser genomeLocParser;
|
||||
Collections.sort(intervals);
|
||||
final GenomeLoc span = span(intervals);
|
||||
final ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(genomeLocParser, span, names, iterators);
|
||||
|
||||
// current location
|
||||
private GenomeLoc location;
|
||||
private GATKFeature curROD;
|
||||
private final String name;
|
||||
for ( final GenomeLoc interval : intervals ) {
|
||||
final ReadMetaDataTracker tracker = view.getReferenceOrderedDataForInterval(interval);
|
||||
|
||||
public FakePeekingRODIterator(GenomeLocParser genomeLocParser, GenomeLoc startingLoc, String name) {
|
||||
this.name = name;
|
||||
this.location = genomeLocParser.createGenomeLoc(startingLoc.getContig(), startingLoc.getStart() + 1, startingLoc.getStop() + 1);
|
||||
for ( int i = 0; i < RODs.size(); i++ ) {
|
||||
final ReadMetaDataTrackerRODStreamTest test = RODs.get(i);
|
||||
final List<Feature> queryFeaturesList = tracker.getValues(rodBindings.get(i));
|
||||
final Set<Feature> queryFeatures = new HashSet<Feature>(queryFeaturesList);
|
||||
final Set<Feature> overlaps = test.getExpectedOverlaps(interval);
|
||||
|
||||
Assert.assertEquals(queryFeatures.size(), overlaps.size(), "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
|
||||
" Expected size = " + overlaps.size() + " but saw " + queryFeatures.size());
|
||||
|
||||
BaseTest.assertEqualsSet(queryFeatures, overlaps, "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
|
||||
" Expected = " + Utils.join(",", overlaps) + " but saw " + Utils.join(",", queryFeatures));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header associated with the backing input stream.
|
||||
* @return the ROD header.
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 8/29/12
|
||||
* Time: 1:19 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@Override
|
||||
public Object getHeader() {
|
||||
return null;
|
||||
}
|
||||
static class TribbleIteratorFromCollection implements Iterator<RODRecordList> {
|
||||
// current location
|
||||
private final String name;
|
||||
final Queue<GATKFeature> gatkFeatures;
|
||||
|
||||
/**
|
||||
* Gets the sequence dictionary associated with the backing input stream.
|
||||
* @return sequence dictionary from the ROD header.
|
||||
*/
|
||||
@Override
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
return null;
|
||||
}
|
||||
public TribbleIteratorFromCollection(final String name, final GenomeLocParser genomeLocParser, final List<Feature> features) {
|
||||
this.name = name;
|
||||
|
||||
this.gatkFeatures = new LinkedList<GATKFeature>();
|
||||
for ( final Feature f : features )
|
||||
gatkFeatures.add(new GATKFeature.TribbleGATKFeature(genomeLocParser, f, name));
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc peekNextLocation() {
|
||||
System.err.println("Peek Next -> " + location);
|
||||
return location;
|
||||
}
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ! gatkFeatures.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc position() {
|
||||
return location;
|
||||
}
|
||||
@Override
|
||||
public RODRecordList next() {
|
||||
final GATKFeature first = gatkFeatures.poll();
|
||||
final Collection<GATKFeature> myFeatures = new LinkedList<GATKFeature>();
|
||||
myFeatures.add(first);
|
||||
while ( gatkFeatures.peek() != null && gatkFeatures.peek().getLocation().getStart() == first.getStart() )
|
||||
myFeatures.add(gatkFeatures.poll());
|
||||
|
||||
@Override
|
||||
public RODRecordList seekForward(GenomeLoc interval) {
|
||||
while (location.isBefore(interval))
|
||||
next();
|
||||
return next(); // we always move by one, we know the next location will be right
|
||||
}
|
||||
GenomeLoc loc = first.getLocation();
|
||||
for ( final GATKFeature feature : myFeatures )
|
||||
loc = loc.merge(feature.getLocation());
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return true; // we always have next
|
||||
}
|
||||
return new RODRecordListImpl(name, myFeatures, loc); // is this safe?
|
||||
}
|
||||
|
||||
@Override
|
||||
public RODRecordList next() {
|
||||
System.err.println("Next -> " + location);
|
||||
curROD = new ReadMetaDataTrackerUnitTest.FakeRODatum(location, name);
|
||||
location = genomeLocParser.createGenomeLoc(location.getContig(), location.getStart() + 1, location.getStop() + 1);
|
||||
FakeRODRecordList list = new FakeRODRecordList();
|
||||
list.add(curROD);
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new IllegalStateException("GRRR");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// nothing to do
|
||||
@Override public void remove() { throw new IllegalStateException("GRRR"); }
|
||||
}
|
||||
}
|
||||
|
||||
class FakeRODRecordList extends AbstractList<GATKFeature> implements RODRecordList {
|
||||
private final List<GATKFeature> list = new ArrayList<GATKFeature>();
|
||||
|
||||
public boolean add(GATKFeature data) {
|
||||
return list.add(data);
|
||||
}
|
||||
|
||||
@Override
|
||||
public GATKFeature get(int i) {
|
||||
return list.get(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return list.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return list.get(0).getLocation();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "test";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(RODRecordList rodRecordList) {
|
||||
return this.list.get(0).getLocation().compareTo(rodRecordList.getLocation());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,276 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class ReadMetaDataTrackerUnitTest
|
||||
* <p/>
|
||||
* test out the ReadMetaDataTracker
|
||||
*/
|
||||
public class ReadMetaDataTrackerUnitTest extends BaseTest {
|
||||
private static int startingChr = 1;
|
||||
private static int endingChr = 2;
|
||||
private static int readCount = 100;
|
||||
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
|
||||
private static SAMFileHeader header;
|
||||
private Set<String> nameSet;
|
||||
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
@BeforeMethod
|
||||
public void beforeEach() {
|
||||
nameSet = new TreeSet<String>();
|
||||
nameSet.add("default");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void twoRodsAtEachReadBase() {
|
||||
nameSet.add("default2");
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
||||
|
||||
// count the positions
|
||||
int count = 0;
|
||||
for (Integer x : tracker.getReadOffsetMapping().keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 2);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void rodAtEachReadBase() {
|
||||
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
||||
|
||||
// count the positions
|
||||
int count = 0;
|
||||
for (Integer x : tracker.getReadOffsetMapping().keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void filterByName() {
|
||||
nameSet.add("default2");
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
||||
|
||||
// count the positions
|
||||
int count = 0;
|
||||
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping("default");
|
||||
for (Integer x : map.keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(map.get(x).size(), 1);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void filterByDupType() {
|
||||
nameSet.add("default2");
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type
|
||||
// count the positions
|
||||
int count = 0;
|
||||
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(FakeRODatum.class);
|
||||
for (Integer x : map.keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(map.get(x).size(), 2);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
|
||||
// @Test this test can be uncommented to determine the speed impacts of any changes to the RODs for reads system
|
||||
|
||||
public void filterByMassiveDupType() {
|
||||
|
||||
for (int y = 0; y < 20; y++) {
|
||||
nameSet.add("default" + String.valueOf(y));
|
||||
long firstTime = System.currentTimeMillis();
|
||||
for (int lp = 0; lp < 1000; lp++) {
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type
|
||||
// count the positions
|
||||
int count = 0;
|
||||
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(FakeRODatum.class);
|
||||
for (Integer x : map.keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(map.get(x).size(), y + 2);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
System.err.println(y + " = " + (System.currentTimeMillis() - firstTime));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void filterByType() {
|
||||
nameSet.add("default2");
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
||||
|
||||
// count the positions
|
||||
int count = 0;
|
||||
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(Fake2RODatum.class);
|
||||
for (int x : map.keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(map.get(x).size(), 1);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void sparceRODsForRead() {
|
||||
ReadMetaDataTracker tracker = getRMDT(7, nameSet, true);
|
||||
|
||||
// count the positions
|
||||
int count = 0;
|
||||
for (Integer x : tracker.getReadOffsetMapping().keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1);
|
||||
}
|
||||
Assert.assertEquals(count, 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void rodByGenomeLoc() {
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
||||
|
||||
// count the positions
|
||||
int count = 0;
|
||||
for (Integer x : tracker.getContigOffsetMapping().keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(tracker.getContigOffsetMapping().get(x).size(), 1);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create a ReadMetaDataTracker given:
|
||||
*
|
||||
* @param incr the spacing between site locations
|
||||
* @param names the names of the reference ordered data to create: one will be created at every location for each name
|
||||
*
|
||||
* @return a ReadMetaDataTracker
|
||||
*/
|
||||
private ReadMetaDataTracker getRMDT(int incr, Set<String> names, boolean alternateTypes) {
|
||||
SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10);
|
||||
TreeMap<Integer, RODMetaDataContainer> data = new TreeMap<Integer, RODMetaDataContainer>();
|
||||
for (int x = 0; x < record.getAlignmentEnd(); x += incr) {
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc(record.getReferenceName(), record.getAlignmentStart() + x, record.getAlignmentStart() + x);
|
||||
RODMetaDataContainer set = new RODMetaDataContainer();
|
||||
|
||||
int cnt = 0;
|
||||
for (String name : names) {
|
||||
if (alternateTypes)
|
||||
set.addEntry((cnt % 2 == 0) ? new FakeRODatum(loc, name) : new Fake2RODatum(loc, name));
|
||||
else
|
||||
set.addEntry(new FakeRODatum(loc, name));
|
||||
cnt++;
|
||||
}
|
||||
data.put(record.getAlignmentStart() + x, set);
|
||||
}
|
||||
ReadMetaDataTracker tracker = new ReadMetaDataTracker(genomeLocParser, record, data);
|
||||
return tracker;
|
||||
}
|
||||
|
||||
|
||||
/** for testing, we want a fake rod with a different classname, for the get-by-class-name functions */
|
||||
static public class Fake2RODatum extends FakeRODatum {
|
||||
|
||||
public Fake2RODatum(GenomeLoc location, String name) {
|
||||
super(location, name);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** for testing only */
|
||||
static public class FakeRODatum extends GATKFeature {
|
||||
|
||||
final GenomeLoc location;
|
||||
final String name;
|
||||
|
||||
public FakeRODatum(GenomeLoc location, String name) {
|
||||
super(name);
|
||||
this.location = location;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return this.location;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getUnderlyingObject() {
|
||||
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getChr() {
|
||||
return location.getContig();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getStart() {
|
||||
return (int)this.location.getStart();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getEnd() {
|
||||
return (int)this.location.getStop();
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue