RefMetaDataTracker cleanup and unit tests
You know have to provide an explicit list of RODRecordLists upfront to the constructor. RefMetaDataTracker is now immutable. Changes in engine to incorporate these differences Extensive UnitTests for RefMetaDataTracker now.
This commit is contained in:
parent
39b4e76fde
commit
3b799db61a
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -51,10 +52,13 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
|
|||
* @return A tracker containing information about this locus.
|
||||
*/
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
||||
RefMetaDataTracker tracks = new RefMetaDataTracker(states.size(), referenceContext);
|
||||
List<RODRecordList> bindings = states.isEmpty() ? Collections.<RODRecordList>emptyList() : new ArrayList<RODRecordList>(states.size());
|
||||
|
||||
for ( ReferenceOrderedDataState state: states )
|
||||
tracks.bind( state.dataSource.getName(), state.iterator.seekForward(loc) );
|
||||
return tracks;
|
||||
// todo -- warning, I removed the reference to the name from states
|
||||
bindings.add( state.iterator.seekForward(loc) );
|
||||
|
||||
return new RefMetaDataTracker(bindings, referenceContext);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -96,21 +96,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
}
|
||||
|
||||
rodQueue = new RODMergingIterator(iterators);
|
||||
|
||||
//throw new StingException("RodLocusView currently disabled");
|
||||
}
|
||||
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
||||
RefMetaDataTracker t = new RefMetaDataTracker(allTracksHere.size(), referenceContext);
|
||||
for ( RODRecordList track : allTracksHere ) {
|
||||
if ( ! t.hasValues(track.getName()) )
|
||||
t.bind(track.getName(), track);
|
||||
}
|
||||
|
||||
// special case the interval again -- add it into the ROD
|
||||
if ( interval != null ) { t.bind(interval.getName(), interval); }
|
||||
|
||||
return t;
|
||||
if ( interval != null ) { allTracksHere.add(interval); }
|
||||
return new RefMetaDataTracker(allTracksHere, referenceContext);
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
|
|
|
|||
|
|
@ -29,18 +29,13 @@ import java.util.*;
|
|||
* Time: 3:05:23 PM
|
||||
*/
|
||||
public class RefMetaDataTracker {
|
||||
// TODO: this should be a list, not a map, actually
|
||||
|
||||
private final static RODRecordList EMPTY_ROD_RECORD_LIST = new RODRecordListImpl("EMPTY");
|
||||
|
||||
final Map<String, RODRecordList> map;
|
||||
final ReferenceContext ref;
|
||||
protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
|
||||
|
||||
public RefMetaDataTracker(int nBindings, ReferenceContext ref) {
|
||||
this.ref = ref;
|
||||
if ( nBindings == 0 )
|
||||
map = Collections.emptyMap();
|
||||
else
|
||||
map = new HashMap<String, RODRecordList>(nBindings);
|
||||
}
|
||||
|
||||
final protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
|
||||
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -50,18 +45,18 @@ public class RefMetaDataTracker {
|
|||
//
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Binds the list of reference ordered data records (RMDs) to track name at this site. Should be used only by the traversal
|
||||
* system to provide access to RMDs in a structured way to the walkers.
|
||||
*
|
||||
* DO NOT USE THIS FUNCTION UNLESS YOU ARE THE GATK ENGINE
|
||||
*
|
||||
* @param name the name of the track
|
||||
* @param rod the collection of RMD data
|
||||
*/
|
||||
public void bind(final String name, RODRecordList rod) {
|
||||
//logger.debug(String.format("Binding %s to %s", name, rod));
|
||||
map.put(canonicalName(name), maybeConvertToVariantContext(rod));
|
||||
public RefMetaDataTracker(final Collection<RODRecordList> allBindings, final ReferenceContext ref) {
|
||||
this.ref = ref;
|
||||
if ( allBindings.isEmpty() )
|
||||
map = Collections.emptyMap();
|
||||
else {
|
||||
map = new HashMap<String, RODRecordList>(allBindings.size());
|
||||
for ( RODRecordList rod : allBindings ) {
|
||||
//logger.debug(String.format("Binding %s to %s", name, rod));
|
||||
if ( rod != null )
|
||||
map.put(canonicalName(rod.getName()), maybeConvertToVariantContext(rod));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -81,24 +76,13 @@ public class RefMetaDataTracker {
|
|||
final VariantContext vc = VariantContextAdaptors.toVariantContext(bindings.getName(), rec.getUnderlyingObject(), ref);
|
||||
if ( vc != null ) // it's possible that the conversion failed, but we continue along anyway
|
||||
values.add(new GATKFeature.TribbleGATKFeature(ref.getGenomeLocParser(), vc, rec.getName()));
|
||||
}
|
||||
} else
|
||||
values.add(rec);
|
||||
}
|
||||
|
||||
return new RODRecordListImpl(bindings.getName(), values, bindings.getLocation());
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Temporary setting for putting a reference context into the system.
|
||||
// *
|
||||
// * DO NOT USE THIS FUNCTION UNLESS YOU ARE THE GATK ENGINE
|
||||
// *
|
||||
// * @param ref
|
||||
// */
|
||||
// public void setRef(final ReferenceContext ref) {
|
||||
// this.ref = ref;
|
||||
// }
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
|
|
@ -107,58 +91,47 @@ public class RefMetaDataTracker {
|
|||
//
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* No-assumption version of getValues(name, class). Returns Objects.
|
||||
*/
|
||||
public List<Object> getValues(final String name) {
|
||||
return getValues(name, Object.class);
|
||||
public <T> List<T> getValues(Class<T> type) {
|
||||
return addValues(map.keySet(), type, new ArrayList<T>(), null, false, false);
|
||||
}
|
||||
public <T> List<T> getValues(Class<T> type, final GenomeLoc onlyAtThisLoc) {
|
||||
return addValues(map.keySet(), type, new ArrayList<T>(), onlyAtThisLoc, true, false);
|
||||
}
|
||||
public <T> List<T> getValues(Class<T> type, final String name) {
|
||||
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), null, false, false);
|
||||
}
|
||||
public <T> List<T> getValues(Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
|
||||
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), onlyAtThisLoc, true, false);
|
||||
}
|
||||
public <T> List<T> getValues(Class<T> type, final Collection<String> names) {
|
||||
return addValues(names, type, new ArrayList<T>(), null, false, false);
|
||||
}
|
||||
public <T> List<T> getValues(Class<T> type, final Collection<String> names, final GenomeLoc onlyAtThisLoc) {
|
||||
return addValues(names, type, new ArrayList<T>(), onlyAtThisLoc, true, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* get all the reference meta data associated with a track name.
|
||||
* @param name the name of the track we're looking for
|
||||
* @param clazz the expected class of the elements bound to rod name
|
||||
* @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
|
||||
* dbSNP RMD this will be a RodDbSNP, etc.
|
||||
*
|
||||
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
|
||||
*/
|
||||
public <T> List<T> getValues(final String name, final Class<T> clazz) {
|
||||
RODRecordList list = getTrackDataByName(name);
|
||||
|
||||
if (list == null)
|
||||
return Collections.emptyList();
|
||||
else {
|
||||
return addValues(name, clazz, new ArrayList<T>(), list, list.getLocation(), false, false);
|
||||
}
|
||||
public <T> T getFirstValue(Class<T> type) {
|
||||
return safeGetFirst(getValues(type));
|
||||
}
|
||||
public <T> T getFirstValue(Class<T> type, final GenomeLoc onlyAtThisLoc) {
|
||||
return safeGetFirst(getValues(type, onlyAtThisLoc));
|
||||
}
|
||||
public <T> T getFirstValue(Class<T> type, final String name) {
|
||||
return safeGetFirst(getValues(type, name));
|
||||
}
|
||||
public <T> T getFirstValue(Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
|
||||
return safeGetFirst(getValues(type, name, onlyAtThisLoc));
|
||||
}
|
||||
public <T> T getFirstValue(Class<T> type, final Collection<String> names) {
|
||||
return safeGetFirst(getValues(type, names));
|
||||
}
|
||||
public <T> T getFirstValue(Class<T> type, final Collection<String> names, final GenomeLoc onlyAtThisLoc) {
|
||||
return safeGetFirst(getValues(type, names, onlyAtThisLoc));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get a singleton record, given the name and a type. This function will return the first record at the
|
||||
* current position seen. The object is cast into a type clazz, or thoses an error if this isn't possible.
|
||||
*
|
||||
* * WARNING: we now suppport more than one RMD at a single position for all tracks. If there are
|
||||
* are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets
|
||||
* picked may change from time to time! BE WARNED!
|
||||
*
|
||||
* @param name the name of the track
|
||||
* @param clazz the underlying type to return
|
||||
* @param <T> the type to parameterize on, matching the clazz argument
|
||||
* @return a record of type T, or null if no record is present.
|
||||
*/
|
||||
public <T> T getFirstValue(final String name, final Class<T> clazz) {
|
||||
RODRecordList objects = getTrackDataByName(name);
|
||||
|
||||
// if empty or null return null;
|
||||
if (objects == null || objects.size() < 1) return null;
|
||||
|
||||
Object obj = objects.get(0).getUnderlyingObject();
|
||||
if (!(clazz.isAssignableFrom(obj.getClass())))
|
||||
throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString()
|
||||
+ " it's of type " + obj.getClass());
|
||||
else
|
||||
return (T)obj;
|
||||
final private <T> T safeGetFirst(List<T> l) {
|
||||
// todo: should we be warning people here? Throwing an error?
|
||||
return l.isEmpty() ? null : l.get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -195,8 +168,7 @@ public class RefMetaDataTracker {
|
|||
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
|
||||
*/
|
||||
public List<GATKFeature> getValuesAsGATKFeatures(final String name) {
|
||||
List<GATKFeature> feat = getTrackDataByName(name);
|
||||
return (feat == null) ? new ArrayList<GATKFeature>() : feat; // to satisfy the above requirement that we don't return null
|
||||
return getTrackDataByName(name);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -209,7 +181,7 @@ public class RefMetaDataTracker {
|
|||
LinkedList<RODRecordList> bound = new LinkedList<RODRecordList>();
|
||||
|
||||
for ( RODRecordList value : map.values() ) {
|
||||
if ( value != null && value.size() != 0 ) bound.add(value);
|
||||
if ( value.size() != 0 ) bound.add(value);
|
||||
}
|
||||
|
||||
return bound;
|
||||
|
|
@ -222,13 +194,79 @@ public class RefMetaDataTracker {
|
|||
public int getNumberOfTracksWithValue() {
|
||||
int n = 0;
|
||||
for ( RODRecordList value : map.values() ) {
|
||||
if ( value != null && ! value.isEmpty() ) {
|
||||
if ( ! value.isEmpty() ) {
|
||||
n++;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// old style Generic accessors
|
||||
//
|
||||
// TODO -- DELETE ME
|
||||
//
|
||||
//
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* No-assumption version of getValues(name, class). Returns Objects.
|
||||
*/
|
||||
@Deprecated
|
||||
public List<Object> getValues(final String name) {
|
||||
return getValues(name, Object.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* get all the reference meta data associated with a track name.
|
||||
* @param name the name of the track we're looking for
|
||||
* @param clazz the expected class of the elements bound to rod name
|
||||
* @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
|
||||
* dbSNP RMD this will be a RodDbSNP, etc.
|
||||
*
|
||||
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
|
||||
*/
|
||||
@Deprecated
|
||||
public <T> List<T> getValues(final String name, final Class<T> clazz) {
|
||||
RODRecordList list = getTrackDataByName(name);
|
||||
|
||||
if (list.isEmpty())
|
||||
return Collections.emptyList();
|
||||
else {
|
||||
return addValues(name, clazz, new ArrayList<T>(), list, list.getLocation(), false, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get a singleton record, given the name and a type. This function will return the first record at the
|
||||
* current position seen. The object is cast into a type clazz, or thoses an error if this isn't possible.
|
||||
*
|
||||
* * WARNING: we now suppport more than one RMD at a single position for all tracks. If there are
|
||||
* are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets
|
||||
* picked may change from time to time! BE WARNED!
|
||||
*
|
||||
* @param name the name of the track
|
||||
* @param clazz the underlying type to return
|
||||
* @param <T> the type to parameterize on, matching the clazz argument
|
||||
* @return a record of type T, or null if no record is present.
|
||||
*/
|
||||
@Deprecated
|
||||
public <T> T getFirstValue(final String name, final Class<T> clazz) {
|
||||
RODRecordList objects = getTrackDataByName(name);
|
||||
|
||||
if (objects.isEmpty()) return null;
|
||||
|
||||
Object obj = objects.get(0).getUnderlyingObject();
|
||||
if (!(clazz.isAssignableFrom(obj.getClass())))
|
||||
throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString()
|
||||
+ " it's of type " + obj.getClass());
|
||||
else
|
||||
return (T)obj;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
|
|
@ -244,6 +282,7 @@ public class RefMetaDataTracker {
|
|||
*
|
||||
* @return variant context
|
||||
*/
|
||||
@Deprecated
|
||||
public List<VariantContext> getAllVariantContexts() {
|
||||
return getAllVariantContexts(null, false, false);
|
||||
}
|
||||
|
|
@ -254,6 +293,7 @@ public class RefMetaDataTracker {
|
|||
* @param curLocation
|
||||
* @return
|
||||
*/
|
||||
@Deprecated
|
||||
public List<VariantContext> getAllVariantContexts(final GenomeLoc curLocation) {
|
||||
return getAllVariantContexts(curLocation, true, false);
|
||||
}
|
||||
|
|
@ -275,6 +315,7 @@ public class RefMetaDataTracker {
|
|||
* @param takeFirstOnly do we take the first rod only?
|
||||
* @return variant context
|
||||
*/
|
||||
@Deprecated
|
||||
public List<VariantContext> getAllVariantContexts(final GenomeLoc curLocation,
|
||||
final boolean requireStartHere,
|
||||
final boolean takeFirstOnly) {
|
||||
|
|
@ -299,6 +340,7 @@ public class RefMetaDataTracker {
|
|||
* @param takeFirstOnly do we take the first rod only?
|
||||
* @return variant context
|
||||
*/
|
||||
@Deprecated
|
||||
public List<VariantContext> getVariantContexts(final String name,
|
||||
final GenomeLoc curLocation,
|
||||
final boolean requireStartHere,
|
||||
|
|
@ -306,6 +348,7 @@ public class RefMetaDataTracker {
|
|||
return getVariantContexts(Arrays.asList(name), curLocation, requireStartHere, takeFirstOnly);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public List<VariantContext> getVariantContexts(final Collection<String> names,
|
||||
final GenomeLoc curLocation,
|
||||
final boolean requireStartHere,
|
||||
|
|
@ -314,9 +357,7 @@ public class RefMetaDataTracker {
|
|||
|
||||
for ( String name : names ) {
|
||||
RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match
|
||||
|
||||
if ( rodList != null )
|
||||
addVariantContexts(contexts, rodList, curLocation, requireStartHere, takeFirstOnly );
|
||||
addVariantContexts(contexts, rodList, curLocation, requireStartHere, takeFirstOnly );
|
||||
}
|
||||
|
||||
return contexts;
|
||||
|
|
@ -332,6 +373,7 @@ public class RefMetaDataTracker {
|
|||
* @param requireStartHere do we require the rod to start at this location?
|
||||
* @return variant context
|
||||
*/
|
||||
@Deprecated
|
||||
public VariantContext getVariantContext(final String name,
|
||||
final GenomeLoc curLocation,
|
||||
final boolean requireStartHere) {
|
||||
|
|
@ -354,11 +396,13 @@ public class RefMetaDataTracker {
|
|||
* @param curLocation
|
||||
* @return
|
||||
*/
|
||||
@Deprecated
|
||||
public VariantContext getVariantContext(final String name,
|
||||
final GenomeLoc curLocation) {
|
||||
return getVariantContext(name, curLocation, true);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
private void addVariantContexts(final List<VariantContext> contexts,
|
||||
final RODRecordList rodList,
|
||||
final GenomeLoc curLocation,
|
||||
|
|
@ -367,13 +411,27 @@ public class RefMetaDataTracker {
|
|||
addValues("xxx", VariantContext.class, contexts, rodList, curLocation, requireStartHere, takeFirstOnly);
|
||||
}
|
||||
|
||||
private static <T> List<T> addValues(final String name,
|
||||
final Class<T> type,
|
||||
final List<T> values,
|
||||
final RODRecordList rodList,
|
||||
final GenomeLoc curLocation,
|
||||
final boolean requireStartHere,
|
||||
final boolean takeFirstOnly ) {
|
||||
private <T> List<T> addValues(final Collection<String> names,
|
||||
final Class<T> type,
|
||||
final List<T> values,
|
||||
final GenomeLoc curLocation,
|
||||
final boolean requireStartHere,
|
||||
final boolean takeFirstOnly ) {
|
||||
for ( String name : names ) {
|
||||
RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match
|
||||
addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly );
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
private <T> List<T> addValues(final String name,
|
||||
final Class<T> type,
|
||||
final List<T> values,
|
||||
final RODRecordList rodList,
|
||||
final GenomeLoc curLocation,
|
||||
final boolean requireStartHere,
|
||||
final boolean takeFirstOnly ) {
|
||||
for ( GATKFeature rec : rodList ) {
|
||||
if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing
|
||||
Object obj = rec.getUnderlyingObject();
|
||||
|
|
@ -406,7 +464,8 @@ public class RefMetaDataTracker {
|
|||
*/
|
||||
private RODRecordList getTrackDataByName(final String name) {
|
||||
final String luName = canonicalName(name);
|
||||
return map.get(luName);
|
||||
RODRecordList l = map.get(luName);
|
||||
return l == null ? EMPTY_ROD_RECORD_LIST : l;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,275 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.dbsnp.DbSNPCodec;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.*;
|
||||
import java.util.*;
|
||||
import java.util.List;
|
||||
|
||||
public class RefMetaDataTrackerUnitTest {
|
||||
final protected static Logger logger = Logger.getLogger(RefMetaDataTrackerUnitTest.class);
|
||||
private static SAMFileHeader header;
|
||||
private ReferenceContext context;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private GenomeLoc locus;
|
||||
private final static int START_POS = 10;
|
||||
Allele A,C,G,T;
|
||||
VariantContext AC_SNP, AG_SNP, AT_SNP;
|
||||
TableFeature span10_10, span1_20, span10_20;
|
||||
DbSNPFeature dbsnp1, dbsnp2;
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100);
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
locus = genomeLocParser.createGenomeLoc("chr1", START_POS, START_POS);
|
||||
context = new ReferenceContext(genomeLocParser, locus, (byte)'A');
|
||||
A = Allele.create("A", true);
|
||||
C = Allele.create("C");
|
||||
G = Allele.create("G");
|
||||
T = Allele.create("T");
|
||||
AC_SNP = new VariantContext("x", "chr1", START_POS, START_POS, Arrays.asList(A, C));
|
||||
AG_SNP = new VariantContext("x", "chr1", START_POS, START_POS, Arrays.asList(A, G));
|
||||
AT_SNP = new VariantContext("x", "chr1", START_POS, START_POS, Arrays.asList(A, T));
|
||||
span10_10 = makeSpan(10, 10);
|
||||
span1_20 = makeSpan(1, 20);
|
||||
span10_20 = makeSpan(10, 20);
|
||||
|
||||
// dbsnp records
|
||||
DbSNPCodec dbsnpCodec = new DbSNPCodec();
|
||||
String line1 = Utils.join("\t", "585 chr1 9 9 rs56289060 0 + - - -/C genomic insertion unknown 0 0 unknown between 1".split(" +"));
|
||||
String line2 = Utils.join("\t", "585 chr1 9 10 rs55998931 0 + C C C/T genomic single unknown 0 0 unknown exact 1".split(" +"));
|
||||
dbsnp1 = (DbSNPFeature)dbsnpCodec.decode(line1);
|
||||
dbsnp2 = (DbSNPFeature)dbsnpCodec.decode(line2);
|
||||
}
|
||||
|
||||
private class MyTest extends BaseTest.TestDataProvider {
|
||||
public RODRecordList AValues, BValues;
|
||||
|
||||
private MyTest(Class c, final List<? extends Feature> AValues, final List<? extends Feature> BValues) {
|
||||
super(c);
|
||||
this.AValues = AValues == null ? null : makeRODRecord("A", AValues);
|
||||
this.BValues = BValues == null ? null : makeRODRecord("B", BValues);
|
||||
}
|
||||
|
||||
private MyTest(final List<? extends Feature> AValues, final List<? extends Feature> BValues) {
|
||||
super(MyTest.class);
|
||||
this.AValues = AValues == null ? null : makeRODRecord("A", AValues);
|
||||
this.BValues = BValues == null ? null : makeRODRecord("B", BValues);
|
||||
}
|
||||
|
||||
private final RODRecordList makeRODRecord(String name, List<? extends Feature> features) {
|
||||
List<GATKFeature> x = new ArrayList<GATKFeature>();
|
||||
for ( Feature f : features )
|
||||
x.add(new GATKFeature.TribbleGATKFeature(genomeLocParser, f, name));
|
||||
return new RODRecordListImpl(name, x, locus);
|
||||
}
|
||||
|
||||
public List<GATKFeature> expected(String name) {
|
||||
if ( name.equals("A+B") ) return allValues();
|
||||
if ( name.equals("A") ) return expectedAValues();
|
||||
if ( name.equals("B") ) return expectedBValues();
|
||||
throw new RuntimeException("FAIL");
|
||||
}
|
||||
|
||||
public List<GATKFeature> allValues() {
|
||||
List<GATKFeature> x = new ArrayList<GATKFeature>();
|
||||
x.addAll(expectedAValues());
|
||||
x.addAll(expectedBValues());
|
||||
return x;
|
||||
}
|
||||
|
||||
public List<GATKFeature> expectedAValues() {
|
||||
return AValues == null ? Collections.<GATKFeature>emptyList() : AValues;
|
||||
}
|
||||
|
||||
public List<GATKFeature> expectedBValues() {
|
||||
return BValues == null ? Collections.<GATKFeature>emptyList() : BValues;
|
||||
}
|
||||
|
||||
public RefMetaDataTracker makeTracker() {
|
||||
List<RODRecordList> x = new ArrayList<RODRecordList>();
|
||||
if ( AValues != null ) x.add(AValues);
|
||||
if ( BValues != null ) x.add(BValues);
|
||||
return new RefMetaDataTracker(x, context);
|
||||
}
|
||||
|
||||
public int nBoundTracks() {
|
||||
int n = 0;
|
||||
if ( AValues != null ) n++;
|
||||
if ( BValues != null ) n++;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
private class MyTestAdaptors extends MyTest {
|
||||
private MyTestAdaptors(final List<? extends Feature> AValues) {
|
||||
super(MyTestAdaptors.class, AValues, null);
|
||||
}
|
||||
}
|
||||
|
||||
private final TableFeature makeSpan(int start, int stop) {
|
||||
return new TableFeature(genomeLocParser.createGenomeLoc("chr1", start, stop),
|
||||
Collections.<String>emptyList(), Collections.<String>emptyList());
|
||||
}
|
||||
|
||||
@DataProvider(name = "tests")
|
||||
public Object[][] createTests() {
|
||||
new MyTest(null, null);
|
||||
new MyTest(Arrays.asList(AC_SNP), null);
|
||||
new MyTest(Arrays.asList(AC_SNP, AT_SNP), null);
|
||||
new MyTest(Arrays.asList(AC_SNP), Arrays.asList(AG_SNP));
|
||||
new MyTest(Arrays.asList(AC_SNP, AT_SNP), Arrays.asList(AG_SNP));
|
||||
new MyTest(Arrays.asList(AC_SNP, AT_SNP), Arrays.asList(span10_10));
|
||||
new MyTest(Arrays.asList(AC_SNP, AT_SNP), Arrays.asList(span10_10, span10_20));
|
||||
new MyTest(Arrays.asList(AC_SNP, AT_SNP), Arrays.asList(span10_10, span10_20, span1_20));
|
||||
|
||||
// for requires starts
|
||||
new MyTest(Arrays.asList(span1_20), null);
|
||||
new MyTest(Arrays.asList(span10_10, span10_20), null);
|
||||
new MyTest(Arrays.asList(span10_10, span10_20, span1_20), null);
|
||||
|
||||
return MyTest.getTests(MyTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "tests")
|
||||
public void testRawBindings(MyTest test) {
|
||||
logger.warn("Testing " + test + " for number of bound tracks");
|
||||
RefMetaDataTracker tracker = test.makeTracker();
|
||||
Assert.assertEquals(tracker.getNumberOfTracksWithValue(), test.nBoundTracks());
|
||||
|
||||
testSimpleBindings("A", tracker, test.AValues);
|
||||
testSimpleBindings("B", tracker, test.BValues);
|
||||
}
|
||||
|
||||
private void testSimpleBindings(String name, RefMetaDataTracker tracker, RODRecordList expected) {
|
||||
List<Feature> asValues = tracker.getValues(Feature.class, name);
|
||||
List<GATKFeature> asFeatures = tracker.getValuesAsGATKFeatures(name);
|
||||
|
||||
Assert.assertEquals(tracker.hasValues(name), expected != null);
|
||||
Assert.assertEquals(asFeatures.size(), expected == null ? 0 : expected.size());
|
||||
Assert.assertEquals(asValues.size(), expected == null ? 0 : expected.size());
|
||||
|
||||
if ( expected != null ) {
|
||||
for ( GATKFeature e : expected ) {
|
||||
boolean foundFeature = false;
|
||||
for ( GATKFeature f : asFeatures ) {
|
||||
if ( e.getUnderlyingObject() == f.getUnderlyingObject() ) foundFeature = true;
|
||||
}
|
||||
Assert.assertTrue(foundFeature, "Never found expected GATKFeature " + e + " bound to " + name + " in " + tracker);
|
||||
|
||||
boolean foundValue = false;
|
||||
for ( Feature f : asValues ) {
|
||||
if ( e.getUnderlyingObject() == f ) foundValue = true;
|
||||
}
|
||||
Assert.assertTrue(foundValue, "Never found expected value of " + e.getUnderlyingObject() + " bound to " + name + " in " + tracker);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "tests")
|
||||
public void testGetters(MyTest test) {
|
||||
logger.warn("Testing " + test + " for getFirst() methods");
|
||||
RefMetaDataTracker tracker = test.makeTracker();
|
||||
|
||||
for ( String name : Arrays.asList("A+B", "A", "B") ) {
|
||||
List<Feature> v1 = name.equals("A+B") ? tracker.getValues(Feature.class) : tracker.getValues(Feature.class, name);
|
||||
testGetter(name, v1, test.expected(name), true, tracker);
|
||||
|
||||
List<Feature> v2 = name.equals("A+B") ? tracker.getValues(Feature.class, locus) : tracker.getValues(Feature.class, name, locus);
|
||||
testGetter(name, v2, startingHere(test.expected(name)), true, tracker);
|
||||
|
||||
Feature v3 = name.equals("A+B") ? tracker.getFirstValue(Feature.class) : tracker.getFirstValue(Feature.class, name);
|
||||
testGetter(name, Arrays.asList(v3), test.expected(name), false, tracker);
|
||||
|
||||
Feature v4 = name.equals("A+B") ? tracker.getFirstValue(Feature.class, locus) : tracker.getFirstValue(Feature.class, name, locus);
|
||||
testGetter(name, Arrays.asList(v4), startingHere(test.expected(name)), false, tracker);
|
||||
}
|
||||
}
|
||||
|
||||
private List<GATKFeature> startingHere(List<GATKFeature> l) {
|
||||
List<GATKFeature> x = new ArrayList<GATKFeature>();
|
||||
for ( GATKFeature f : l ) if ( f.getStart() == locus.getStart() ) x.add(f);
|
||||
return x;
|
||||
}
|
||||
|
||||
private void testGetter(String name, List<Feature> got, List<GATKFeature> expected, boolean requireExact, RefMetaDataTracker tracker) {
|
||||
if ( got.size() == 1 && got.get(0) == null )
|
||||
got = Collections.emptyList();
|
||||
|
||||
if ( requireExact )
|
||||
Assert.assertEquals(got.size(), expected.size());
|
||||
|
||||
boolean foundAny = false;
|
||||
for ( GATKFeature e : expected ) {
|
||||
boolean found1 = false;
|
||||
for ( Feature got1 : got ) {
|
||||
if ( e.getUnderlyingObject() == got1 )
|
||||
found1 = true;
|
||||
}
|
||||
if ( requireExact )
|
||||
Assert.assertTrue(found1, "Never found expected GATKFeature " + e + " bound to " + name + " in " + tracker);
|
||||
foundAny = found1 || foundAny;
|
||||
}
|
||||
|
||||
if ( ! requireExact && ! expected.isEmpty() )
|
||||
Assert.assertTrue(foundAny, "Never found any got values matching one of the expected values bound to " + name + " in " + tracker);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "testAdaptors")
|
||||
public void testAdaptors(MyTestAdaptors test) {
|
||||
logger.warn("Testing " + test + " for number of bound tracks");
|
||||
RefMetaDataTracker tracker = test.makeTracker();
|
||||
Assert.assertEquals(tracker.getNumberOfTracksWithValue(), test.nBoundTracks());
|
||||
|
||||
// all of the objects should be of type VariantContext
|
||||
for ( Feature v : tracker.getValues(Feature.class) )
|
||||
Assert.assertEquals(v.getClass(), VariantContext.class, "Conversion failed from dbsnp to variant context in RefMetaDataTracker");
|
||||
}
|
||||
|
||||
@DataProvider(name = "testAdaptors")
|
||||
public Object[][] createTestAdaptors() {
|
||||
new MyTestAdaptors(Arrays.asList(dbsnp1));
|
||||
new MyTestAdaptors(Arrays.asList(dbsnp1, dbsnp2));
|
||||
return MyTestAdaptors.getTests(MyTestAdaptors.class);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue