Part I of GSA-462: Consistent RODBinding access across Ref and Read trackers

-- ReadMetaDataTracker is dead!  Long live the RefMetaDataTracker.  Read walkers will soon just take RefMetaDataTracker objects.  In this commit they take a class that trivially extends them
-- Rewrote ReadBasedReferenceOrderedView to produce RefMetaDataTrackers not the old class.
    -- This new implementation produces thread-safe objects (i.e., holds no points to shared state).  Suitable for use (to be tested) with nano scheduling
    -- Simplified interfaces to use the simplest data structures (PeekableIterator) not the LocusAwareSeekableIterator, since I both hate those classes and this is on the long term trajectory to remove those from the GATK entirely.
-- Massively expanded DataProvider unit tests for ReadBasedReferenceOrderedView
-- Note that the old implementation of offset -> ROD in ReadRefMetaDataTracker was broken for any read not completely matching the reference.  Rather than provide broken code the ReadMetaDataTracker only provides a "bag of RODs" interface.  If you want to work with the relationship between the read and the RODs in your tool you need to manage the CIGAR element itself.
    -- This commit breaks the new read walker BQSR, but Ryan knows this is coming
-- Subsequent commit will be retiring / fixing ValidateRODForReads
This commit is contained in:
Mark DePristo 2012-08-29 16:58:21 -04:00
parent 8fc6a0a68b
commit 972be8b4a4
7 changed files with 537 additions and 694 deletions

View File

@ -0,0 +1,143 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.gatk.refdata.RODRecordListImpl;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Collection;
import java.util.LinkedList;
import java.util.ListIterator;
/**
* Key algorithmic helper for ReadBasedReferenceOrderedData
*
* Takes a single iterator of features, and provides a single capability that returns
* the list of RODs that overlap an interval. Allows sequential getOverlapping calls
* from intervals provided that these intervals always have increasing getStart() values.
*
*/
class IntervalOverlappingRODsFromStream {
/**
* Only held for QC purposes
*/
GenomeLoc lastQuery = null;
private final String name;
private final LinkedList<GATKFeature> currentFeatures = new LinkedList<GATKFeature>();
private final PeekableIterator<RODRecordList> futureFeatures;
/**
* Create a new IntervalOverlappingRODsFromStream that reads elements from futureFeatures and
* returns RODRecordLists having name
*
* @param name
* @param futureFeatures
*/
IntervalOverlappingRODsFromStream(final String name, final PeekableIterator<RODRecordList> futureFeatures) {
if ( futureFeatures == null ) throw new IllegalArgumentException("futureFeatures cannot be null");
this.name = name;
this.futureFeatures = futureFeatures;
}
/**
* Get the list of RODs overlapping loc from this stream of RODs.
*
* Sequential calls to this function must obey the rule that loc2.getStart >= loc1.getStart
*
* @param loc the interval to query
* @return a non-null RODRecordList containing the overlapping RODs, which may be empty
*/
@Ensures({"overlaps(loc, result)",
"! futureFeatures.hasNext() || futureFeatures.peek().getLocation().isPast(loc)",
"result != null"})
public RODRecordList getOverlapping(final GenomeLoc loc) {
if ( lastQuery != null && loc.getStart() < lastQuery.getStart() )
throw new IllegalArgumentException(String.format("BUG: query interval (%s) starts before the previous interval %s", loc, lastQuery));
trimCurrentFeaturesToLoc(loc);
readOverlappingFutureFeatures(loc);
return new RODRecordListImpl(name, subsetToOverlapping(loc, currentFeatures), loc);
}
/**
* For contract assurance. Checks that all bindings in loc overlap
*
* @param loc
* @param bindings
* @return
*/
@Requires({"loc != null", "bindings != null"})
private boolean overlaps(final GenomeLoc loc, final RODRecordList bindings) {
for ( final GATKFeature feature : bindings )
if ( ! feature.getLocation().overlapsP(loc) )
return false;
return true;
}
/**
* Subset the features in all to those that overlap with loc
*
* The current features list contains everything read that cannot be thrown away yet, but not
* everything in there necessarily overlaps with loc. Subset to just those that do overlap
*
* @param loc the location that features must overlap
* @param all the list of all features
* @return a subset of all that overlaps with loc
*/
@Requires({"loc != null", "all != null"})
@Ensures("result.size() <= all.size()")
private Collection<GATKFeature> subsetToOverlapping(final GenomeLoc loc, final Collection<GATKFeature> all) {
final LinkedList<GATKFeature> overlapping = new LinkedList<GATKFeature>();
for ( final GATKFeature feature : all )
if ( feature.getLocation().overlapsP(loc) )
overlapping.add(feature);
return overlapping;
}
/**
* Update function. Remove all elements of currentFeatures that end before loc
*
* @param loc the location to use
*/
@Requires("loc != null")
@Ensures("currentFeatures.size() <= old(currentFeatures.size())")
private void trimCurrentFeaturesToLoc(final GenomeLoc loc) {
final ListIterator<GATKFeature> it = currentFeatures.listIterator();
while ( it.hasNext() ) {
final GATKFeature feature = it.next();
if ( feature.getLocation().isBefore(loc) )
it.remove();
}
}
/**
* Update function: Read all elements from futureFeatures that overlap with loc
*
* Stops at the first element that starts before the end of loc, or the stream empties
*
* @param loc
*/
@Requires("loc != null")
@Ensures("currentFeatures.size() >= old(currentFeatures.size())")
private void readOverlappingFutureFeatures(final GenomeLoc loc) {
while ( futureFeatures.hasNext() ) {
final GenomeLoc nextLoc = futureFeatures.peek().getLocation();
if ( nextLoc.isBefore(loc) ) {
futureFeatures.next(); // next rod element is before loc, throw it away and keep looking
} else if ( nextLoc.isPast(loc) ) {
break; // next element is past loc, stop looking but don't pop it
} else if ( nextLoc.overlapsP(loc) ) {
// add overlapping elements to our current features, removing from stream
for ( final GATKFeature feature : futureFeatures.next() ) {
currentFeatures.add(feature);
}
}
}
}
}

View File

@ -23,40 +23,63 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.TreeMap;
/** a ROD view for reads. This provides the Read traversals a way of getting a ReadMetaDataTracker */
public class ReadBasedReferenceOrderedView implements View {
private final WindowedData window;
// a list of the RMDDataState (location->iterators)
private final List<RMDDataState> states = new ArrayList<RMDDataState>(1);
private final static ReadMetaDataTracker EMPTY_TRACKER = new ReadMetaDataTracker();
public ReadBasedReferenceOrderedView(ShardDataProvider provider) {
window = new WindowedData(provider);
/**
* Used to get genome locs for reads
*/
private final GenomeLocParser genomeLocParser;
/**
* The total extent of all reads in this span. We create iterators from our RODs
* from the start of this span, to the end.
*/
private final GenomeLoc shardSpan;
public ReadBasedReferenceOrderedView(final ShardDataProvider provider) {
this(provider.getGenomeLocParser(), provider.getShard().getLocation());
provider.register(this);
if ( provider.getReferenceOrderedData() != null && ! shardSpan.isUnmapped() ) {
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
states.add(new RMDDataState(dataSource, dataSource.seek(shardSpan)));
}
}
private ReadBasedReferenceOrderedView(final GenomeLocParser genomeLocParser, final GenomeLoc shardSpan) {
this.genomeLocParser = genomeLocParser;
this.shardSpan = shardSpan;
}
/**
* for testing only please
*
* @param data the window provider
* Testing constructor
*/
ReadBasedReferenceOrderedView(WindowedData data) {
window = data;
}
public ReadMetaDataTracker getReferenceOrderedDataForRead(SAMRecord read) {
return window.getTracker(read);
protected ReadBasedReferenceOrderedView(final GenomeLocParser genomeLocParser,
final GenomeLoc shardSpan,
final List<String> names,
final List<PeekableIterator<RODRecordList>> featureSources) {
this(genomeLocParser, shardSpan);
for ( int i = 0; i < names.size(); i++ )
states.add(new RMDDataState(names.get(i), featureSources.get(i)));
}
public Collection<Class<? extends View>> getConflictingViews() {
@ -65,74 +88,6 @@ public class ReadBasedReferenceOrderedView implements View {
return classes;
}
public void close() {
if (window != null) window.close();
}
}
/** stores a window of data, dropping RODs if we've passed the new reads start point. */
class WindowedData {
// the queue of possibly in-frame RODs; RODs are removed as soon as they are out of scope
private final TreeMap<Integer, RODMetaDataContainer> mapping = new TreeMap<Integer, RODMetaDataContainer>();
// our current location from the last read we processed
private GenomeLoc currentLoc;
// a list of the RMDDataState (location->iterators)
private List<RMDDataState> states;
// the provider; where we get all our information
private final ShardDataProvider provider;
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(WindowedData.class);
/**
* create a WindowedData given a shard provider
*
* @param provider the ShardDataProvider
*/
public WindowedData(ShardDataProvider provider) {
this.provider = provider;
}
/**
* load the states dynamically, since the only way to get a genome loc is from the read (the shard doesn't have one)
*
* @param provider the ShardDataProvider
* @param rec the current read
*/
private void getStates(ShardDataProvider provider, SAMRecord rec) {
int stop = Integer.MAX_VALUE;
// figure out the appropriate alignment stop
if (provider.hasReference()) {
stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength();
}
// calculate the range of positions we need to look at
GenomeLoc range = provider.getGenomeLocParser().createGenomeLoc(rec.getReferenceName(),
rec.getAlignmentStart(),
stop);
states = new ArrayList<RMDDataState>();
if (provider.getReferenceOrderedData() != null)
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
states.add(new RMDDataState(dataSource, dataSource.seek(range)));
}
/**
* this function is for testing only
*
* @param states a list of RMDDataState to initialize with
*/
WindowedData(List<RMDDataState> states) {
this.states = states;
provider = null;
}
/**
* create a ReadMetaDataTracker given the current read
*
@ -140,60 +95,65 @@ class WindowedData {
*
* @return a ReadMetaDataTracker for the read, from which you can get ROD -> read alignments
*/
public ReadMetaDataTracker getTracker(SAMRecord rec) {
updatePosition(rec);
return new ReadMetaDataTracker(provider.getGenomeLocParser(), rec, mapping);
@Requires("rec != null")
@Ensures("result != null")
public ReadMetaDataTracker getReferenceOrderedDataForRead(final SAMRecord rec) {
if ( rec.getReadUnmappedFlag() )
// empty RODs for unmapped reads
return new ReadMetaDataTracker();
else
return getReferenceOrderedDataForInterval(genomeLocParser.createGenomeLoc(rec));
}
/**
* update the position we're storing
*
* @param rec the read to use for start and end
*/
private void updatePosition(SAMRecord rec) {
if (states == null) getStates(this.provider, rec);
currentLoc = provider.getGenomeLocParser().createGenomeLoc(rec);
// flush the queue looking for records we've passed over
while (mapping.size() > 0 && mapping.firstKey() < currentLoc.getStart())
mapping.pollFirstEntry(); // toss away records that we've passed
// add new data to the queue
for (RMDDataState state : states) {
// move into position
while (state.iterator.hasNext() && state.iterator.peekNextLocation().isBefore(currentLoc))
state.iterator.next();
while (state.iterator.hasNext() && state.iterator.peekNextLocation().overlapsP(currentLoc)) {
RODRecordList list = state.iterator.next();
for (GATKFeature datum : list) {
if (!mapping.containsKey(list.getLocation().getStart()))
mapping.put(list.getLocation().getStart(), new RODMetaDataContainer());
mapping.get(list.getLocation().getStart()).addEntry(datum);
}
}
@Requires({"interval != null", "shardSpan.containsP(interval)"})
@Ensures("result != null")
public ReadMetaDataTracker getReferenceOrderedDataForInterval(final GenomeLoc interval) {
if ( states.isEmpty() ) // optimization for no bindings (common for read walkers)
return EMPTY_TRACKER;
else {
final List<RODRecordList> bindings = new ArrayList<RODRecordList>(states.size());
for ( final RMDDataState state : states )
bindings.add(state.stream.getOverlapping(interval));
return new ReadMetaDataTracker(bindings);
}
}
/** Closes the current view. */
/**
* Closes the current view.
*/
public void close() {
if (states == null) return;
for (RMDDataState state : states)
state.dataSource.close( state.iterator );
for (final RMDDataState state : states)
state.close();
// Clear out the existing data so that post-close() accesses to this data will fail-fast.
states = null;
states.clear();
}
/** Models the traversal state of a given ROD lane. */
private static class RMDDataState {
public final ReferenceOrderedDataSource dataSource;
public final IntervalOverlappingRODsFromStream stream;
private final LocationAwareSeekableRODIterator iterator;
}
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
this.dataSource = dataSource;
this.iterator = iterator;
this.stream = new IntervalOverlappingRODsFromStream(dataSource.getName(), new PeekableIterator<RODRecordList>(iterator));
}
/** Models the traversal state of a given ROD lane. */
class RMDDataState {
public final ReferenceOrderedDataSource dataSource;
public final LocationAwareSeekableRODIterator iterator;
/**
* For testing
*/
public RMDDataState(final String name, final PeekableIterator<RODRecordList> iterator) {
this.dataSource = null;
this.iterator = null;
this.stream = new IntervalOverlappingRODsFromStream(name, new PeekableIterator<RODRecordList>(iterator));
}
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
this.dataSource = dataSource;
this.iterator = iterator;
public void close() {
if ( dataSource != null )
dataSource.close( iterator );
}
}
}

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -39,141 +40,12 @@ import java.util.*;
* <p/>
* a read-based meta data tracker
*/
public class ReadMetaDataTracker {
/**
* The parser, used to create new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
private final SAMRecord record;
// the buffer of positions and RODs we've stored
private final TreeMap<Integer, RODMetaDataContainer> mapping;
/**
* create a read meta data tracker, given the read and a queue of RODatum positions
*
* @param record the read to create offset from
* @param mapping the mapping of reference ordered datum
*/
public ReadMetaDataTracker(GenomeLocParser genomeLocParser, SAMRecord record, TreeMap<Integer, RODMetaDataContainer> mapping) {
this.genomeLocParser = genomeLocParser;
this.record = record;
this.mapping = mapping;
public class ReadMetaDataTracker extends RefMetaDataTracker {
public ReadMetaDataTracker() {
super();
}
/**
* create an alignment of read position to reference ordered datum
*
* @param record the SAMRecord
* @param queue the queue (as a tree set)
* @param cl the class name, null if not filtered by classname
* @param name the datum track name, null if not filtered by name
*
* @return a mapping from the position in the read to the reference ordered datum
*/
private Map<Integer, Collection<GATKFeature>> createReadAlignment(SAMRecord record, TreeMap<Integer, RODMetaDataContainer> queue, Class cl, String name) {
if (name != null && cl != null) throw new IllegalStateException("Both a class and name cannot be specified");
Map<Integer, Collection<GATKFeature>> ret = new LinkedHashMap<Integer, Collection<GATKFeature>>();
GenomeLoc location = genomeLocParser.createGenomeLoc(record);
int length = record.getReadLength();
for (Integer loc : queue.keySet()) {
Integer position = loc - location.getStart();
if (position >= 0 && position < length) {
Collection<GATKFeature> set;
if (cl != null)
set = queue.get(loc).getSet(cl);
else
set = queue.get(loc).getSet(name);
if (set != null && set.size() > 0)
ret.put(position, set);
}
}
return ret;
}
/**
* create an alignment of read position to reference ordered datum
*
* @return a mapping from the position in the read to the reference ordered datum
*/
private Map<Integer, Collection<GATKFeature>> createGenomeLocAlignment(SAMRecord record, TreeMap<Integer, RODMetaDataContainer> mapping, Class cl, String name) {
Map<Integer, Collection<GATKFeature>> ret = new LinkedHashMap<Integer, Collection<GATKFeature>>();
int start = record.getAlignmentStart();
int stop = record.getAlignmentEnd();
for (Integer location : mapping.keySet()) {
if (location >= start && location <= stop)
if (cl != null)
ret.put(location, mapping.get(location).getSet(cl));
else
ret.put(location, mapping.get(location).getSet(name));
}
return ret;
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping() {
return createReadAlignment(record, mapping, null, null);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping() {
return createGenomeLocAlignment(record, mapping, null, null);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping(String name) {
return createReadAlignment(record, mapping, null, name);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping(String name) {
return createGenomeLocAlignment(record, mapping, null, name);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping(Class cl) {
return createReadAlignment(record, mapping, cl, null);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping(Class cl) {
return createGenomeLocAlignment(record, mapping, cl, null);
}
/**
* get the list of all the RODS overlapping this read, without any information about their position
* @return a Collection (no order guaranteed), of all the RODs covering this read
*/
public List<GATKFeature> getAllCoveringRods() {
List<GATKFeature> ret = new ArrayList<GATKFeature>();
for (Map.Entry<Integer, RODMetaDataContainer> entry : mapping.entrySet())
ret.addAll(entry.getValue().getSet());
return ret;
public ReadMetaDataTracker(Collection<RODRecordList> allBindings) {
super(allBindings);
}
}

View File

@ -620,16 +620,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
}
private void populateKnownIndels(ReadMetaDataTracker metaDataTracker, ReferenceContext ref) {
for ( Collection<GATKFeature> rods : metaDataTracker.getContigOffsetMapping().values() ) {
Iterator<GATKFeature> rodIter = rods.iterator();
while ( rodIter.hasNext() ) {
Object rod = rodIter.next().getUnderlyingObject();
if ( indelRodsSeen.contains(rod) )
continue;
indelRodsSeen.add(rod);
if ( rod instanceof VariantContext )
knownIndelsToTry.add((VariantContext)rod);
}
for ( final VariantContext vc : metaDataTracker.getValues(known) ) {
if ( indelRodsSeen.contains(vc) )
continue;
indelRodsSeen.add(vc);
knownIndelsToTry.add(vc);
}
}

View File

@ -125,6 +125,15 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
return ! discontinuousP( that );
}
/**
* Return true if this GenomeLoc represents the UNMAPPED location
* @return
*/
public final boolean isUnmapped() {
return isUnmapped(this);
}
/**
* Returns a new GenomeLoc that represents the entire span of this and that. Requires that
* this and that GenomeLoc are contiguous and both mapped

View File

@ -1,207 +1,347 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceDictionary;
import org.testng.Assert;
import org.broad.tribble.BasicFeature;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.refdata.RODRecordListImpl;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTrackerUnitTest;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.annotations.BeforeMethod;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* @author aaron
* <p/>
* Class ReadBasedReferenceOrderedViewUnitTest
* <p/>
* test out the ReadBasedReferenceOrderedView class
* @author depristo
*/
public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
private GenomeLocParser genomeLocParser;
private static int startingChr = 1;
private static int endingChr = 2;
private static int readCount = 100;
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
private static String contig;
private static SAMFileHeader header;
private GenomeLocParser genomeLocParser;
@BeforeClass
public void beforeClass() {
header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
contig = header.getSequence(0).getSequenceName();
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
initializeTests();
}
@BeforeMethod
public void beforeEach() {
}
@Test
public void testCreateReadMetaDataTrackerOnePerSite() {
// make ten reads,
List<SAMRecord> records = new ArrayList<SAMRecord>();
for (int x = 1; x < 11; x++) {
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "name", 0, x, 10);
private class CompareFeatures implements Comparator<Feature> {
@Override
public int compare(Feature o1, Feature o2) {
return genomeLocParser.createGenomeLoc(o1).compareTo(genomeLocParser.createGenomeLoc(o2));
}
GenomeLoc start = genomeLocParser.createGenomeLoc(header.getSequenceDictionary().getSequence(0).getSequenceName(), 0, 0);
List<RMDDataState> list = new ArrayList<RMDDataState>();
list.add(new RMDDataState(null, new FakePeekingRODIterator(genomeLocParser,start, "fakeName")));
ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(new WindowedData(list));
}
for (SAMRecord rec : records) {
ReadMetaDataTracker tracker = view.getReferenceOrderedDataForRead(rec);
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping();
for (Integer i : map.keySet()) {
Assert.assertEquals(map.get(i).size(), 1);
private class ReadMetaDataTrackerRODStreamTest extends TestDataProvider {
final List<Feature> allFeatures;
final List<GenomeLoc> intervals;
public ReadMetaDataTrackerRODStreamTest(final List<Feature> allFeatures, final GenomeLoc interval) {
this(allFeatures, Collections.singletonList(interval));
}
public ReadMetaDataTrackerRODStreamTest(final List<Feature> allFeatures, final List<GenomeLoc> intervals) {
super(ReadMetaDataTrackerRODStreamTest.class);
this.allFeatures = new ArrayList<Feature>(allFeatures);
Collections.sort(this.allFeatures, new CompareFeatures());
this.intervals = new ArrayList<GenomeLoc>(intervals);
Collections.sort(this.intervals);
setName(String.format("%s nFeatures %d intervals %s", getClass().getSimpleName(), allFeatures.size(),
intervals.size() == 1 ? intervals.get(0) : "size " + intervals.size()));
}
public PeekableIterator<RODRecordList> getIterator(final String name) {
return new PeekableIterator<RODRecordList>(new TribbleIteratorFromCollection(name, genomeLocParser, allFeatures));
}
public Set<Feature> getExpectedOverlaps(final GenomeLoc interval) {
final Set<Feature> overlapping = new HashSet<Feature>();
for ( final Feature f : allFeatures )
if ( genomeLocParser.createGenomeLoc(f).overlapsP(interval) )
overlapping.add(f);
return overlapping;
}
}
public void initializeTests() {
final List<Feature> handPickedFeatures = new ArrayList<Feature>();
handPickedFeatures.add(new BasicFeature(contig, 1, 1));
handPickedFeatures.add(new BasicFeature(contig, 2, 5));
handPickedFeatures.add(new BasicFeature(contig, 4, 4));
handPickedFeatures.add(new BasicFeature(contig, 6, 6));
handPickedFeatures.add(new BasicFeature(contig, 9, 10));
handPickedFeatures.add(new BasicFeature(contig, 10, 10));
handPickedFeatures.add(new BasicFeature(contig, 10, 11));
handPickedFeatures.add(new BasicFeature(contig, 13, 20));
createTestsForFeatures(handPickedFeatures);
// test in the present of a large spanning element
{
List<Feature> oneLargeSpan = new ArrayList<Feature>(handPickedFeatures);
oneLargeSpan.add(new BasicFeature(contig, 1, 100));
createTestsForFeatures(oneLargeSpan);
}
// test in the presence of a partially spanning element
{
List<Feature> partialSpanStart = new ArrayList<Feature>(handPickedFeatures);
partialSpanStart.add(new BasicFeature(contig, 1, 6));
createTestsForFeatures(partialSpanStart);
}
// test in the presence of a partially spanning element at the end
{
List<Feature> partialSpanEnd = new ArrayList<Feature>(handPickedFeatures);
partialSpanEnd.add(new BasicFeature(contig, 10, 100));
createTestsForFeatures(partialSpanEnd);
}
// no data at all
final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, 5, 5);
new ReadMetaDataTrackerRODStreamTest(Collections.<Feature>emptyList(), loc);
}
// --------------------------------------------------------------------------------
//
// tests for the lower level IntervalOverlappingRODsFromStream
//
// --------------------------------------------------------------------------------
@DataProvider(name = "ReadMetaDataTrackerRODStreamTest")
public Object[][] createReadMetaDataTrackerRODStreamTest() {
return ReadMetaDataTrackerRODStreamTest.getTests(ReadMetaDataTrackerRODStreamTest.class);
}
private GenomeLoc span(final List<GenomeLoc> features) {
int featuresStart = 1; for ( final GenomeLoc f : features ) featuresStart = Math.min(featuresStart, f.getStart());
int featuresStop = 1; for ( final GenomeLoc f : features ) featuresStop = Math.max(featuresStop, f.getStop());
return genomeLocParser.createGenomeLoc(contig, featuresStart, featuresStop);
}
private void createTestsForFeatures(final List<Feature> features) {
int featuresStart = 1; for ( final Feature f : features ) featuresStart = Math.min(featuresStart, f.getStart());
int featuresStop = 1; for ( final Feature f : features ) featuresStop = Math.max(featuresStop, f.getEnd());
for ( final int size : Arrays.asList(1, 5, 10, 100, 1000) ) {
final List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
// regularly spaced
for ( int start = featuresStart; start < featuresStop; start++) {
final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, start, start + size - 1);
allIntervals.add(loc);
new ReadMetaDataTrackerRODStreamTest(features, loc);
}
Assert.assertEquals(map.keySet().size(), 10);
// starting and stopping at every feature
for ( final Feature f : features ) {
// just at the feature
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart(), f.getEnd()));
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
// up to end
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() - 1, f.getEnd()));
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
// missing by 1
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() + 1, f.getEnd() + 1));
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
// just spanning
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() - 1, f.getEnd() + 1));
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
}
new ReadMetaDataTrackerRODStreamTest(features, allIntervals);
}
}
@Test(enabled = true, dataProvider = "ReadMetaDataTrackerRODStreamTest")
public void runReadMetaDataTrackerRODStreamTest_singleQuery(final ReadMetaDataTrackerRODStreamTest data) {
if ( data.intervals.size() == 1 ) {
final String name = "testName";
final PeekableIterator<RODRecordList> iterator = data.getIterator(name);
final IntervalOverlappingRODsFromStream stream = new IntervalOverlappingRODsFromStream(name, iterator);
testRODStream(data, stream, Collections.singletonList(data.intervals.get(0)));
}
}
@Test(enabled = true, dataProvider = "ReadMetaDataTrackerRODStreamTest", dependsOnMethods = "runReadMetaDataTrackerRODStreamTest_singleQuery")
public void runReadMetaDataTrackerRODStreamTest_multipleQueries(final ReadMetaDataTrackerRODStreamTest data) {
if ( data.intervals.size() > 1 ) {
final String name = "testName";
final PeekableIterator<RODRecordList> iterator = data.getIterator(name);
final IntervalOverlappingRODsFromStream stream = new IntervalOverlappingRODsFromStream(name, iterator);
testRODStream(data, stream, data.intervals);
}
}
private void testRODStream(final ReadMetaDataTrackerRODStreamTest test, final IntervalOverlappingRODsFromStream stream, final List<GenomeLoc> intervals) {
for ( final GenomeLoc interval : intervals ) {
final RODRecordList query = stream.getOverlapping(interval);
final HashSet<Feature> queryFeatures = new HashSet<Feature>();
for ( final GATKFeature f : query ) queryFeatures.add((Feature)f.getUnderlyingObject());
final Set<Feature> overlaps = test.getExpectedOverlaps(interval);
Assert.assertEquals(queryFeatures.size(), overlaps.size(), "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
" Expected size = " + overlaps.size() + " but saw " + queryFeatures.size());
BaseTest.assertEqualsSet(queryFeatures, overlaps, "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
" Expected = " + Utils.join(",", overlaps) + " but saw " + Utils.join(",", queryFeatures));
}
}
// --------------------------------------------------------------------------------
//
// tests for the higher level tracker itself
//
// --------------------------------------------------------------------------------
@DataProvider(name = "ReadMetaDataTrackerTests")
public Object[][] createTrackerTests() {
List<Object[]> tests = new ArrayList<Object[]>();
final Object[][] singleTests = ReadMetaDataTrackerRODStreamTest.getTests(ReadMetaDataTrackerRODStreamTest.class);
final List<ReadMetaDataTrackerRODStreamTest> multiSiteTests = new ArrayList<ReadMetaDataTrackerRODStreamTest>();
for ( final Object[] singleTest : singleTests ) {
if ( ((ReadMetaDataTrackerRODStreamTest)singleTest[0]).intervals.size() > 1 )
multiSiteTests.add((ReadMetaDataTrackerRODStreamTest)singleTest[0]);
}
// all pairwise tests
for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 2, false)) {
tests.add(new Object[]{singleTest});
}
// all 3 way pairwise tests
for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 3, false)) {
tests.add(new Object[]{singleTest});
}
return tests.toArray(new Object[][]{});
}
}
@Test(enabled = true, dataProvider = "ReadMetaDataTrackerTests", dependsOnMethods = "runReadMetaDataTrackerRODStreamTest_multipleQueries")
public void runReadMetaDataTrackerTest(final List<ReadMetaDataTrackerRODStreamTest> RODs) {
final List<String> names = new ArrayList<String>();
final List<PeekableIterator<RODRecordList>> iterators = new ArrayList<PeekableIterator<RODRecordList>>();
final List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
final List<RodBinding<Feature>> rodBindings = new ArrayList<RodBinding<Feature>>();
for ( int i = 0; i < RODs.size(); i++ ) {
final RodBinding<Feature> rodBinding = new RodBinding<Feature>(Feature.class, "name"+i);
rodBindings.add(rodBinding);
final String name = rodBinding.getName();
names.add(name);
iterators.add(RODs.get(i).getIterator(name));
intervals.addAll(RODs.get(i).intervals);
}
class FakePeekingRODIterator implements LocationAwareSeekableRODIterator {
private GenomeLocParser genomeLocParser;
Collections.sort(intervals);
final GenomeLoc span = span(intervals);
final ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(genomeLocParser, span, names, iterators);
// current location
private GenomeLoc location;
private GATKFeature curROD;
private final String name;
for ( final GenomeLoc interval : intervals ) {
final ReadMetaDataTracker tracker = view.getReferenceOrderedDataForInterval(interval);
public FakePeekingRODIterator(GenomeLocParser genomeLocParser, GenomeLoc startingLoc, String name) {
this.name = name;
this.location = genomeLocParser.createGenomeLoc(startingLoc.getContig(), startingLoc.getStart() + 1, startingLoc.getStop() + 1);
for ( int i = 0; i < RODs.size(); i++ ) {
final ReadMetaDataTrackerRODStreamTest test = RODs.get(i);
final List<Feature> queryFeaturesList = tracker.getValues(rodBindings.get(i));
final Set<Feature> queryFeatures = new HashSet<Feature>(queryFeaturesList);
final Set<Feature> overlaps = test.getExpectedOverlaps(interval);
Assert.assertEquals(queryFeatures.size(), overlaps.size(), "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
" Expected size = " + overlaps.size() + " but saw " + queryFeatures.size());
BaseTest.assertEqualsSet(queryFeatures, overlaps, "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
" Expected = " + Utils.join(",", overlaps) + " but saw " + Utils.join(",", queryFeatures));
}
}
}
/**
* Gets the header associated with the backing input stream.
* @return the ROD header.
* Created with IntelliJ IDEA.
* User: depristo
* Date: 8/29/12
* Time: 1:19 PM
* To change this template use File | Settings | File Templates.
*/
@Override
public Object getHeader() {
return null;
}
static class TribbleIteratorFromCollection implements Iterator<RODRecordList> {
// current location
private final String name;
final Queue<GATKFeature> gatkFeatures;
/**
* Gets the sequence dictionary associated with the backing input stream.
* @return sequence dictionary from the ROD header.
*/
@Override
public SAMSequenceDictionary getSequenceDictionary() {
return null;
}
public TribbleIteratorFromCollection(final String name, final GenomeLocParser genomeLocParser, final List<Feature> features) {
this.name = name;
this.gatkFeatures = new LinkedList<GATKFeature>();
for ( final Feature f : features )
gatkFeatures.add(new GATKFeature.TribbleGATKFeature(genomeLocParser, f, name));
}
@Override
public GenomeLoc peekNextLocation() {
System.err.println("Peek Next -> " + location);
return location;
}
@Override
public boolean hasNext() {
return ! gatkFeatures.isEmpty();
}
@Override
public GenomeLoc position() {
return location;
}
@Override
public RODRecordList next() {
final GATKFeature first = gatkFeatures.poll();
final Collection<GATKFeature> myFeatures = new LinkedList<GATKFeature>();
myFeatures.add(first);
while ( gatkFeatures.peek() != null && gatkFeatures.peek().getLocation().getStart() == first.getStart() )
myFeatures.add(gatkFeatures.poll());
@Override
public RODRecordList seekForward(GenomeLoc interval) {
while (location.isBefore(interval))
next();
return next(); // we always move by one, we know the next location will be right
}
GenomeLoc loc = first.getLocation();
for ( final GATKFeature feature : myFeatures )
loc = loc.merge(feature.getLocation());
@Override
public boolean hasNext() {
return true; // we always have next
}
return new RODRecordListImpl(name, myFeatures, loc); // is this safe?
}
@Override
public RODRecordList next() {
System.err.println("Next -> " + location);
curROD = new ReadMetaDataTrackerUnitTest.FakeRODatum(location, name);
location = genomeLocParser.createGenomeLoc(location.getContig(), location.getStart() + 1, location.getStop() + 1);
FakeRODRecordList list = new FakeRODRecordList();
list.add(curROD);
return list;
}
@Override
public void remove() {
throw new IllegalStateException("GRRR");
}
@Override
public void close() {
// nothing to do
@Override public void remove() { throw new IllegalStateException("GRRR"); }
}
}
class FakeRODRecordList extends AbstractList<GATKFeature> implements RODRecordList {
private final List<GATKFeature> list = new ArrayList<GATKFeature>();
public boolean add(GATKFeature data) {
return list.add(data);
}
@Override
public GATKFeature get(int i) {
return list.get(i);
}
@Override
public int size() {
return list.size();
}
@Override
public GenomeLoc getLocation() {
return list.get(0).getLocation();
}
@Override
public String getName() {
return "test";
}
@Override
public int compareTo(RODRecordList rodRecordList) {
return this.list.get(0).getLocation().compareTo(rodRecordList.getLocation());
}
}

View File

@ -1,276 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.*;
/**
* @author aaron
* <p/>
* Class ReadMetaDataTrackerUnitTest
* <p/>
* test out the ReadMetaDataTracker
*/
public class ReadMetaDataTrackerUnitTest extends BaseTest {
private static int startingChr = 1;
private static int endingChr = 2;
private static int readCount = 100;
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
private static SAMFileHeader header;
private Set<String> nameSet;
private GenomeLocParser genomeLocParser;
@BeforeClass
public void beforeClass() {
header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
}
@BeforeMethod
public void beforeEach() {
nameSet = new TreeSet<String>();
nameSet.add("default");
}
@Test
public void twoRodsAtEachReadBase() {
nameSet.add("default2");
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
for (Integer x : tracker.getReadOffsetMapping().keySet()) {
count++;
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 2);
}
Assert.assertEquals(count, 10);
}
@Test
public void rodAtEachReadBase() {
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
for (Integer x : tracker.getReadOffsetMapping().keySet()) {
count++;
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1);
}
Assert.assertEquals(count, 10);
}
@Test
public void filterByName() {
nameSet.add("default2");
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping("default");
for (Integer x : map.keySet()) {
count++;
Assert.assertEquals(map.get(x).size(), 1);
}
Assert.assertEquals(count, 10);
}
@Test
public void filterByDupType() {
nameSet.add("default2");
ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type
// count the positions
int count = 0;
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(FakeRODatum.class);
for (Integer x : map.keySet()) {
count++;
Assert.assertEquals(map.get(x).size(), 2);
}
Assert.assertEquals(count, 10);
}
// @Test this test can be uncommented to determine the speed impacts of any changes to the RODs for reads system
public void filterByMassiveDupType() {
for (int y = 0; y < 20; y++) {
nameSet.add("default" + String.valueOf(y));
long firstTime = System.currentTimeMillis();
for (int lp = 0; lp < 1000; lp++) {
ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type
// count the positions
int count = 0;
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(FakeRODatum.class);
for (Integer x : map.keySet()) {
count++;
Assert.assertEquals(map.get(x).size(), y + 2);
}
Assert.assertEquals(count, 10);
}
System.err.println(y + " = " + (System.currentTimeMillis() - firstTime));
}
}
@Test
public void filterByType() {
nameSet.add("default2");
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(Fake2RODatum.class);
for (int x : map.keySet()) {
count++;
Assert.assertEquals(map.get(x).size(), 1);
}
Assert.assertEquals(count, 10);
}
@Test
public void sparceRODsForRead() {
ReadMetaDataTracker tracker = getRMDT(7, nameSet, true);
// count the positions
int count = 0;
for (Integer x : tracker.getReadOffsetMapping().keySet()) {
count++;
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1);
}
Assert.assertEquals(count, 2);
}
@Test
public void rodByGenomeLoc() {
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
for (Integer x : tracker.getContigOffsetMapping().keySet()) {
count++;
Assert.assertEquals(tracker.getContigOffsetMapping().get(x).size(), 1);
}
Assert.assertEquals(count, 10);
}
/**
* create a ReadMetaDataTracker given:
*
* @param incr the spacing between site locations
* @param names the names of the reference ordered data to create: one will be created at every location for each name
*
* @return a ReadMetaDataTracker
*/
private ReadMetaDataTracker getRMDT(int incr, Set<String> names, boolean alternateTypes) {
SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10);
TreeMap<Integer, RODMetaDataContainer> data = new TreeMap<Integer, RODMetaDataContainer>();
for (int x = 0; x < record.getAlignmentEnd(); x += incr) {
GenomeLoc loc = genomeLocParser.createGenomeLoc(record.getReferenceName(), record.getAlignmentStart() + x, record.getAlignmentStart() + x);
RODMetaDataContainer set = new RODMetaDataContainer();
int cnt = 0;
for (String name : names) {
if (alternateTypes)
set.addEntry((cnt % 2 == 0) ? new FakeRODatum(loc, name) : new Fake2RODatum(loc, name));
else
set.addEntry(new FakeRODatum(loc, name));
cnt++;
}
data.put(record.getAlignmentStart() + x, set);
}
ReadMetaDataTracker tracker = new ReadMetaDataTracker(genomeLocParser, record, data);
return tracker;
}
/** for testing, we want a fake rod with a different classname, for the get-by-class-name functions */
static public class Fake2RODatum extends FakeRODatum {
public Fake2RODatum(GenomeLoc location, String name) {
super(location, name);
}
}
/** for testing only */
static public class FakeRODatum extends GATKFeature {
final GenomeLoc location;
final String name;
public FakeRODatum(GenomeLoc location, String name) {
super(name);
this.location = location;
this.name = name;
}
@Override
public String getName() {
return name;
}
@Override
public GenomeLoc getLocation() {
return this.location;
}
@Override
public Object getUnderlyingObject() {
return null; //To change body of implemented methods use File | Settings | File Templates.
}
@Override
public String getChr() {
return location.getContig();
}
@Override
public int getStart() {
return (int)this.location.getStart();
}
@Override
public int getEnd() {
return (int)this.location.getStop();
}
}
}