Fixed issue where all comp tracks were being treated as known tracks. Fixed issue where multiple JEXL expressions were causing an exception because the underlying object did not implement the Comparable interface. Fixed issue where variants being compared to the known track were not being checked for equality of variation type. Fixed issue where functional annotations were not being iterated over properly. Refactored a lot of helper methods into a separate VariantEvalUtils utility class. Significantly expanded the test suite using a small VCF with SNPs, indels, and non-variant loci which makes it much easier to see what the proper answer should be, and included the appropriate grep and awk commands in the comments to confirm the values.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5204 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c4707631e2
commit
1085bbf303
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
|
|
@ -17,21 +17,14 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
|
|||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.RequiredStratification;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.StandardStratification;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.AnalysisModuleScanner;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||
|
|
@ -43,7 +36,7 @@ import java.lang.reflect.Field;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ts/Tv ratios, and a lot more)
|
||||
* General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more)
|
||||
*/
|
||||
@Reference(window=@Window(start=-50, stop=50))
|
||||
public class VariantEvalWalker extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||
|
|
@ -99,7 +92,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
private String TRANCHE_FILENAME = null;
|
||||
|
||||
// Variables
|
||||
private Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions = new TreeSet<VariantContextUtils.JexlVCMatchExp>();
|
||||
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
|
||||
private Set<String> compNames = new TreeSet<String>();
|
||||
private Set<String> knownNames = new TreeSet<String>();
|
||||
private Set<String> evalNames = new TreeSet<String>();
|
||||
|
|
@ -118,240 +111,17 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
private GATKReport report = null;
|
||||
|
||||
// Public constants
|
||||
public static String ALL_SAMPLE_NAME = "all";
|
||||
private static String ALL_SAMPLE_NAME = "all";
|
||||
|
||||
/**
|
||||
* List all of the available evaluation modules, then exit successfully
|
||||
*/
|
||||
private void listModulesAndExit() {
|
||||
List<Class<? extends VariantStratifier>> vsClasses = new PluginManager<VariantStratifier>( VariantStratifier.class ).getPlugins();
|
||||
List<Class<? extends VariantEvaluator>> veClasses = new PluginManager<VariantEvaluator>( VariantEvaluator.class ).getPlugins();
|
||||
|
||||
logger.info("Available stratification modules:");
|
||||
logger.info("(Standard modules are starred)");
|
||||
for (Class<? extends VariantStratifier> vsClass : vsClasses) {
|
||||
logger.info("\t" + vsClass.getSimpleName() + (RequiredStratification.class.isAssignableFrom(vsClass) || StandardStratification.class.isAssignableFrom(vsClass) ? "*" : ""));
|
||||
}
|
||||
logger.info("");
|
||||
|
||||
logger.info("Available evaluation modules:");
|
||||
logger.info("(Standard modules are starred)");
|
||||
for (Class<? extends VariantEvaluator> veClass : veClasses) {
|
||||
logger.info("\t" + veClass.getSimpleName() + (StandardEval.class.isAssignableFrom(veClass) ? "*" : ""));
|
||||
}
|
||||
logger.info("");
|
||||
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize required, standard and user-specified stratification objects
|
||||
*
|
||||
* @param noStandardStrats don't use the standard stratifications
|
||||
* @param modulesToUse the list of stratification modules to use
|
||||
* @return set of stratifications to use
|
||||
*/
|
||||
private TreeSet<VariantStratifier> initializeStratificationObjects(boolean noStandardStrats, String[] modulesToUse) {
|
||||
TreeSet<VariantStratifier> strats = new TreeSet<VariantStratifier>();
|
||||
Set<String> stratsToUse = new HashSet<String>();
|
||||
|
||||
// Create a map for all stratification modules for easy lookup.
|
||||
HashMap<String, Class<? extends VariantStratifier>> classMap = new HashMap<String, Class<? extends VariantStratifier>>();
|
||||
for ( Class<? extends VariantStratifier> c : new PluginManager<VariantStratifier>( VariantStratifier.class ).getPlugins() ) {
|
||||
classMap.put(c.getSimpleName(), c);
|
||||
}
|
||||
|
||||
// We must use all required stratification modules.
|
||||
for ( Class<? extends RequiredStratification> reqClass : new PluginManager<RequiredStratification>( RequiredStratification.class ).getPlugins() ) {
|
||||
if ( classMap.containsKey(reqClass.getSimpleName()) ) {
|
||||
stratsToUse.add(reqClass.getSimpleName());
|
||||
}
|
||||
}
|
||||
|
||||
// By default, use standard stratification modules.
|
||||
if ( !noStandardStrats ) {
|
||||
for ( Class<? extends StandardStratification> stdClass : new PluginManager<StandardStratification>( StandardStratification.class ).getPlugins() ) {
|
||||
if ( classMap.containsKey(stdClass.getSimpleName()) ) {
|
||||
stratsToUse.add(stdClass.getSimpleName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now add the user-selected modules
|
||||
stratsToUse.addAll(Arrays.asList(modulesToUse));
|
||||
|
||||
// Instantiate the stratifications
|
||||
for ( String module : stratsToUse ) {
|
||||
if ( !classMap.containsKey(module) ) {
|
||||
throw new UserException.CommandLineException("Module " + module + " could not be found; please check that you have specified the class name correctly");
|
||||
}
|
||||
|
||||
if ( classMap.containsKey(module) ) {
|
||||
Class<? extends VariantStratifier> c = classMap.get(module);
|
||||
|
||||
try {
|
||||
VariantStratifier vs = c.newInstance();
|
||||
vs.initialize(jexlExpressions, compNames, knownNames, evalNames, sampleNamesForStratification);
|
||||
|
||||
strats.add(vs);
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException("Unable to instantiate stratification module '" + c.getSimpleName() + "'");
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new StingException("Illegal access error when trying to instantiate stratification module '" + c.getSimpleName() + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return strats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize required, standard and user-specified evaluation objects
|
||||
*
|
||||
* @param noStandardEvals don't use the standard evaluations
|
||||
* @param modulesToUse the list of evaluation modules to use
|
||||
* @return set of evaluations to use
|
||||
*/
|
||||
private Set<Class<? extends VariantEvaluator>> initializeEvaluationObjects(boolean noStandardEvals, String[] modulesToUse) {
|
||||
Set<Class<? extends VariantEvaluator>> evals = new HashSet<Class<? extends VariantEvaluator>>();
|
||||
|
||||
// Create a map for all eval modules for easy lookup.
|
||||
HashMap<String, Class<? extends VariantEvaluator>> classMap = new HashMap<String, Class<? extends VariantEvaluator>>();
|
||||
for ( Class<? extends VariantEvaluator> c : new PluginManager<VariantEvaluator>( VariantEvaluator.class ).getPlugins() ) {
|
||||
classMap.put(c.getSimpleName(), c);
|
||||
}
|
||||
|
||||
// By default, use standard eval modules.
|
||||
if ( !noStandardEvals ) {
|
||||
for ( Class<? extends StandardEval> stdClass : new PluginManager<StandardEval>( StandardEval.class ).getPlugins() ) {
|
||||
if ( classMap.containsKey(stdClass.getSimpleName()) ) {
|
||||
evals.add(classMap.get(stdClass.getSimpleName()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get the specific classes provided.
|
||||
for ( String module : modulesToUse ) {
|
||||
if ( !classMap.containsKey(module) ) {
|
||||
throw new UserException.CommandLineException("Module " + module + " could not be found; please check that you have specified the class name correctly");
|
||||
}
|
||||
|
||||
if ( classMap.containsKey(module) ) {
|
||||
evals.add(classMap.get(module));
|
||||
}
|
||||
}
|
||||
|
||||
return evals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively initialize the evaluation contexts
|
||||
*
|
||||
* @param stratificationObjects the stratifications to use
|
||||
* @param evaluationObjects the evaluations to use
|
||||
* @param stratStack a stack of stratifications to apply
|
||||
* @param ec evaluation context
|
||||
* @return a map of all the evaluation contexts
|
||||
*/
|
||||
private HashMap<StateKey, NewEvaluationContext> initializeEvaluationContexts(Set<VariantStratifier> stratificationObjects, Set<Class<? extends VariantEvaluator>> evaluationObjects, Stack<VariantStratifier> stratStack, NewEvaluationContext ec) {
|
||||
HashMap<StateKey, NewEvaluationContext> ecs = new HashMap<StateKey, NewEvaluationContext>();
|
||||
|
||||
if (stratStack == null) {
|
||||
stratStack = new Stack<VariantStratifier>();
|
||||
stratStack.addAll(stratificationObjects);
|
||||
}
|
||||
|
||||
if (!stratStack.isEmpty()) {
|
||||
Stack<VariantStratifier> newStratStack = new Stack<VariantStratifier>();
|
||||
newStratStack.addAll(stratStack);
|
||||
|
||||
VariantStratifier vs = newStratStack.pop();
|
||||
|
||||
for ( String state : vs.getAllStates() ) {
|
||||
NewEvaluationContext nec = new NewEvaluationContext();
|
||||
if (ec != null) {
|
||||
nec.putAll(ec);
|
||||
}
|
||||
nec.put(vs, state);
|
||||
|
||||
ecs.putAll(initializeEvaluationContexts(stratificationObjects, evaluationObjects, newStratStack, nec));
|
||||
}
|
||||
} else {
|
||||
HashMap<StateKey, NewEvaluationContext> necs = new HashMap<StateKey, NewEvaluationContext>();
|
||||
|
||||
StateKey stateKey = new StateKey();
|
||||
for ( VariantStratifier vs : ec.keySet() ) {
|
||||
String state = ec.get(vs);
|
||||
|
||||
stateKey.put(vs.getClass().getSimpleName(), state);
|
||||
}
|
||||
|
||||
ec.addEvaluationClassList(this, stateKey, evaluationObjects);
|
||||
|
||||
necs.put(stateKey, ec);
|
||||
|
||||
return necs;
|
||||
}
|
||||
|
||||
return ecs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the output report
|
||||
*
|
||||
* @param stratificationObjects the stratifications to use
|
||||
* @param evaluationObjects the evaluations to use
|
||||
* @return an initialized report object
|
||||
*/
|
||||
private GATKReport initializeGATKReport(Set<VariantStratifier> stratificationObjects, Set<Class<? extends VariantEvaluator>> evaluationObjects) {
|
||||
GATKReport report = new GATKReport();
|
||||
|
||||
for ( Class<? extends VariantEvaluator> ve : evaluationObjects ) {
|
||||
String tableName = ve.getSimpleName();
|
||||
String tableDesc = ve.getAnnotation(Analysis.class).description();
|
||||
|
||||
report.addTable(tableName, tableDesc);
|
||||
|
||||
GATKReportTable table = report.getTable(tableName);
|
||||
table.addPrimaryKey("entry", false);
|
||||
table.addColumn(tableName, tableName);
|
||||
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
String columnName = vs.getClass().getSimpleName();
|
||||
|
||||
table.addColumn(columnName, "unknown");
|
||||
}
|
||||
|
||||
try {
|
||||
VariantEvaluator vei = ve.newInstance();
|
||||
vei.initialize(this);
|
||||
|
||||
AnalysisModuleScanner scanner = new AnalysisModuleScanner(vei);
|
||||
Map<Field, DataPoint> datamap = scanner.getData();
|
||||
|
||||
for (Field field : datamap.keySet()) {
|
||||
field.setAccessible(true);
|
||||
|
||||
if (! (field.get(vei) instanceof TableType) ) {
|
||||
table.addColumn(field.getName(), 0.0);
|
||||
}
|
||||
}
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException("InstantiationException: " + e);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new StingException("IllegalAccessException: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
return report;
|
||||
}
|
||||
// Utility class
|
||||
private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this);
|
||||
|
||||
/**
|
||||
* Initialize the stratifications, evaluations, evaluation contexts, and reporting object
|
||||
*/
|
||||
public void initialize() {
|
||||
// Just list the modules, and exit quickly.
|
||||
if (LIST) { listModulesAndExit(); }
|
||||
if (LIST) { variantEvalUtils.listModulesAndExit(); }
|
||||
|
||||
// Categorize each rod as an eval or a comp rod.
|
||||
for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
|
||||
|
|
@ -386,7 +156,11 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
sampleNamesForStratification.add(ALL_SAMPLE_NAME);
|
||||
|
||||
// Initialize select expressions
|
||||
jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS));
|
||||
//jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS));
|
||||
for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
|
||||
SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
|
||||
jexlExpressions.add(sjexl);
|
||||
}
|
||||
|
||||
// Add select expressions for anything in the tranches file
|
||||
if ( TRANCHE_FILENAME != null ) {
|
||||
|
|
@ -399,253 +173,14 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
}
|
||||
|
||||
// Initialize the set of stratifications and evaluations to use
|
||||
stratificationObjects = initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
|
||||
Set<Class<? extends VariantEvaluator>> evaluationObjects = initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
|
||||
stratificationObjects = variantEvalUtils.initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
|
||||
Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
|
||||
|
||||
// Initialize the evaluation contexts
|
||||
evaluationContexts = initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null);
|
||||
evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null);
|
||||
|
||||
// Initialize report table
|
||||
report = initializeGATKReport(stratificationObjects, evaluationObjects);
|
||||
}
|
||||
|
||||
/**
|
||||
* Figure out what the allowable variation types are based on the eval context
|
||||
*
|
||||
* @param tracker the reference metadata tracker
|
||||
* @param ref the reference context
|
||||
* @param compNames the comp track names
|
||||
* @param evalNames the evaluation track names
|
||||
* @return the set of allowable variation types
|
||||
*/
|
||||
private EnumSet<VariantContext.Type> getAllowableVariationTypes(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> compNames, Set<String> evalNames) {
|
||||
EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION);
|
||||
|
||||
if (tracker != null) {
|
||||
Collection<VariantContext> evalvcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false);
|
||||
|
||||
for ( VariantContext vc : evalvcs ) {
|
||||
allowableTypes.add(vc.getType());
|
||||
}
|
||||
|
||||
if (allowableTypes.size() == 1) {
|
||||
// We didn't find any variation in the eval track, so now let's look at the comp track for allowable types
|
||||
Collection<VariantContext> compvcs = tracker.getVariantContexts(ref, compNames, null, ref.getLocus(), true, false);
|
||||
|
||||
for ( VariantContext vc : compvcs ) {
|
||||
allowableTypes.add(vc.getType());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return allowableTypes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subset a VariantContext to a single sample
|
||||
*
|
||||
* @param vc the VariantContext object containing multiple samples
|
||||
* @param sampleName the sample to pull out of the VariantContext
|
||||
* @return a new VariantContext with just the requested sample
|
||||
*/
|
||||
private VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) {
|
||||
ArrayList<String> sampleNames = new ArrayList<String>();
|
||||
sampleNames.add(sampleName);
|
||||
|
||||
return getSubsetOfVariantContext(vc, sampleNames);
|
||||
}
|
||||
|
||||
/**
|
||||
* Subset a VariantContext to a set of samples
|
||||
*
|
||||
* @param vc the VariantContext object containing multiple samples
|
||||
* @param sampleNames the samples to pull out of the VariantContext
|
||||
* @return a new VariantContext with just the requested samples
|
||||
*/
|
||||
private VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) {
|
||||
VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values());
|
||||
|
||||
HashMap<String,Object> newAts = new HashMap<String,Object>(vcsub.getAttributes());
|
||||
|
||||
int originalAlleleCount = vc.getHetCount() + 2*vc.getHomVarCount();
|
||||
int newAlleleCount = vcsub.getHetCount() + 2*vcsub.getHomVarCount();
|
||||
|
||||
if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) {
|
||||
newAts.put("ISSINGLETON", true);
|
||||
}
|
||||
|
||||
VariantContextUtils.calculateChromosomeCounts(vcsub, newAts, true);
|
||||
vcsub = VariantContext.modifyAttributes(vcsub,newAts);
|
||||
|
||||
logger.debug(String.format("VC %s subset to %s AC%n", vc.getSource(), vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY)));
|
||||
|
||||
return vcsub;
|
||||
}
|
||||
|
||||
/**
|
||||
* For a list of track names, bind the variant contexts to a trackName->sampleName->VariantContext mapping.
|
||||
* Additional variant contexts per sample are automatically generated and added to the map unless the
|
||||
* sample name matches the ALL_SAMPLE_NAME constant.
|
||||
*
|
||||
* @param tracker the metadata tracker
|
||||
* @param ref the reference context
|
||||
* @param trackNames the list of track names to process
|
||||
* @param allowableTypes a set of allowable variation types
|
||||
* @param byFilter if false, only accept PASSing VariantContexts. Otherwise, accept both PASSing and filtered sites
|
||||
* @param trackPerSample if false, don't stratify per sample (and don't cut up the VariantContext like we would need to do this)
|
||||
* @param allowNoCalls if false, don't accept no-call loci from a variant track
|
||||
* @return a mapping of track names to a list of VariantContext objects
|
||||
*/
|
||||
private HashMap<String, HashMap<String, VariantContext>> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> trackNames, EnumSet<VariantContext.Type> allowableTypes, boolean byFilter, boolean trackPerSample, boolean allowNoCalls) {
|
||||
HashMap<String, HashMap<String, VariantContext>> bindings = new HashMap<String, HashMap<String, VariantContext>>();
|
||||
|
||||
for ( String trackName : trackNames ) {
|
||||
HashMap<String, VariantContext> vcs = new HashMap<String, VariantContext>();
|
||||
|
||||
Collection<VariantContext> contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true);
|
||||
VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null;
|
||||
|
||||
// First, filter the VariantContext to represent only the samples for evaluation
|
||||
if ( vc != null ) {
|
||||
VariantContext vcsub = vc;
|
||||
|
||||
if (vc.hasGenotypes() && vc.hasGenotypes(sampleNamesForEvaluation)) {
|
||||
vcsub = getSubsetOfVariantContext(vc, sampleNamesForEvaluation);
|
||||
}
|
||||
|
||||
if ((byFilter || !vcsub.isFiltered()) && (allowNoCalls || vcsub.getType() != VariantContext.Type.NO_VARIATION)) {
|
||||
vcs.put(ALL_SAMPLE_NAME, vcsub);
|
||||
}
|
||||
|
||||
// Now, if stratifying, split the subsetted vc per sample and add each as a new context
|
||||
if ( vc.hasGenotypes() && trackPerSample ) {
|
||||
for ( String sampleName : sampleNamesForEvaluation ) {
|
||||
VariantContext samplevc = getSubsetOfVariantContext(vc, sampleName);
|
||||
|
||||
if ((byFilter || !samplevc.isFiltered()) && (allowNoCalls || samplevc.getType() != VariantContext.Type.NO_VARIATION)) {
|
||||
vcs.put(sampleName, samplevc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bindings.put(trackName, vcs);
|
||||
}
|
||||
}
|
||||
|
||||
return bindings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps track names to sample name to VariantContext objects. For eval tracks, VariantContexts per specified
|
||||
* sample are also included.
|
||||
*
|
||||
* @param tracker the metadata tracker
|
||||
* @param ref the reference context
|
||||
* @param compNames the list of comp names to process
|
||||
* @param evalNames the list of eval names to process
|
||||
* @return a mapping of track names to a list of VariantContext objects
|
||||
*/
|
||||
private HashMap<String, HashMap<String, VariantContext>> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> compNames, Set<String> evalNames) {
|
||||
HashMap<String, HashMap<String, VariantContext>> vcs = new HashMap<String, HashMap<String, VariantContext>>();
|
||||
|
||||
EnumSet<VariantContext.Type> allowableTypes = getAllowableVariationTypes(tracker, ref, compNames, evalNames);
|
||||
|
||||
boolean byFilter = false;
|
||||
boolean perSampleIsEnabled = false;
|
||||
for (VariantStratifier vs : stratificationObjects) {
|
||||
if (vs.getClass().getSimpleName().equals("Filter")) {
|
||||
byFilter = true;
|
||||
} else if (vs.getClass().getSimpleName().equals("Sample")) {
|
||||
perSampleIsEnabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
HashMap<String, HashMap<String, VariantContext>> evalBindings = bindVariantContexts(tracker, ref, evalNames, allowableTypes, byFilter, perSampleIsEnabled, true);
|
||||
HashMap<String, HashMap<String, VariantContext>> compBindings = bindVariantContexts(tracker, ref, compNames, allowableTypes, byFilter, false, false);
|
||||
|
||||
vcs.putAll(compBindings);
|
||||
vcs.putAll(evalBindings);
|
||||
|
||||
return vcs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively initialize the state keys used to look up the right evaluation context based on the state of the variant context
|
||||
*
|
||||
* @param stateMap the map of allowable states
|
||||
* @param stateStack a stack of the states
|
||||
* @param stateKey a state key object
|
||||
* @param stateKeys all the state keys
|
||||
* @return a list of state keys
|
||||
*/
|
||||
private ArrayList<StateKey> initializeStateKeys(HashMap<VariantStratifier, ArrayList<String>> stateMap, Stack<HashMap<VariantStratifier, ArrayList<String>>> stateStack, StateKey stateKey, ArrayList<StateKey> stateKeys) {
|
||||
if (stateStack == null) {
|
||||
stateStack = new Stack<HashMap<VariantStratifier, ArrayList<String>>>();
|
||||
|
||||
for ( VariantStratifier vs : stateMap.keySet() ) {
|
||||
HashMap<VariantStratifier, ArrayList<String>> oneSetOfStates = new HashMap<VariantStratifier, ArrayList<String>>();
|
||||
oneSetOfStates.put(vs, stateMap.get(vs));
|
||||
|
||||
stateStack.add(oneSetOfStates);
|
||||
}
|
||||
}
|
||||
|
||||
if (!stateStack.isEmpty()) {
|
||||
Stack<HashMap<VariantStratifier, ArrayList<String>>> newStateStack = new Stack<HashMap<VariantStratifier, ArrayList<String>>>();
|
||||
newStateStack.addAll(stateStack);
|
||||
|
||||
HashMap<VariantStratifier, ArrayList<String>> oneSetOfStates = newStateStack.pop();
|
||||
VariantStratifier vs = oneSetOfStates.keySet().iterator().next();
|
||||
|
||||
for ( String state : oneSetOfStates.get(vs)) {
|
||||
StateKey newStateKey = new StateKey();
|
||||
if (stateKey != null) {
|
||||
newStateKey.putAll(stateKey);
|
||||
}
|
||||
|
||||
newStateKey.put(vs.getClass().getSimpleName(), state);
|
||||
|
||||
initializeStateKeys(stateMap, newStateStack, newStateKey, stateKeys);
|
||||
}
|
||||
} else {
|
||||
stateKeys.add(stateKey);
|
||||
|
||||
return stateKeys;
|
||||
}
|
||||
|
||||
return stateKeys;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of samples being used
|
||||
* @return the number of samples
|
||||
*/
|
||||
public int getNumSamples() {
|
||||
return numSamples;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the minimum phasing quality to be used with the GenotypePhasingEvaluator module
|
||||
* @return the minimum phasing quality
|
||||
*/
|
||||
public double getMinPhaseQuality() {
|
||||
return MIN_PHASE_QUALITY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the family structure to be used with the MendelianViolationEvaluator module
|
||||
* @return the family structure string
|
||||
*/
|
||||
public String getFamilyStructure() {
|
||||
return FAMILY_STRUCTURE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the mendelian violation qual threshold to be used with the MendelianViolationEvaluator module
|
||||
* @return the mendelian violation qual threshold
|
||||
*/
|
||||
public double getMendelianViolationQualThreshold() {
|
||||
return MENDELIAN_VIOLATION_QUAL_THRESHOLD;
|
||||
report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationObjects);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -659,32 +194,34 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
}
|
||||
}
|
||||
|
||||
// track sample vc
|
||||
HashMap<String, HashMap<String, VariantContext>> vcs = getVariantContexts(tracker, ref, compNames, evalNames);
|
||||
if (tracker != null) {
|
||||
// track sample vc
|
||||
HashMap<String, HashMap<String, VariantContext>> vcs = variantEvalUtils.getVariantContexts(tracker, ref, compNames, evalNames);
|
||||
|
||||
for ( String compName : compNames ) {
|
||||
VariantContext comp = vcs.containsKey(compName) && vcs.get(compName) != null && vcs.get(compName).containsKey(ALL_SAMPLE_NAME) ? vcs.get(compName).get(ALL_SAMPLE_NAME) : null;
|
||||
for ( String compName : compNames ) {
|
||||
VariantContext comp = vcs.containsKey(compName) && vcs.get(compName) != null && vcs.get(compName).containsKey(ALL_SAMPLE_NAME) ? vcs.get(compName).get(ALL_SAMPLE_NAME) : null;
|
||||
|
||||
for ( String evalName : evalNames ) {
|
||||
for ( String sampleName : sampleNamesForStratification ) {
|
||||
VariantContext eval = vcs.containsKey(evalName) && vcs.get(evalName) != null ? vcs.get(evalName).get(sampleName) : null;
|
||||
for ( String evalName : evalNames ) {
|
||||
for ( String sampleName : sampleNamesForStratification ) {
|
||||
VariantContext eval = vcs.containsKey(evalName) && vcs.get(evalName) != null ? vcs.get(evalName).get(sampleName) : null;
|
||||
|
||||
HashMap<VariantStratifier, ArrayList<String>> stateMap = new HashMap<VariantStratifier, ArrayList<String>>();
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
ArrayList<String> states = vs.getRelevantStates(ref, comp, compName, eval, evalName, sampleName);
|
||||
stateMap.put(vs, states);
|
||||
}
|
||||
HashMap<VariantStratifier, ArrayList<String>> stateMap = new HashMap<VariantStratifier, ArrayList<String>>();
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
ArrayList<String> states = vs.getRelevantStates(ref, tracker, comp, compName, eval, evalName, sampleName);
|
||||
stateMap.put(vs, states);
|
||||
}
|
||||
|
||||
ArrayList<StateKey> stateKeys = new ArrayList<StateKey>();
|
||||
initializeStateKeys(stateMap, null, null, stateKeys);
|
||||
ArrayList<StateKey> stateKeys = new ArrayList<StateKey>();
|
||||
variantEvalUtils.initializeStateKeys(stateMap, null, null, stateKeys);
|
||||
|
||||
HashSet<StateKey> stateKeysHash = new HashSet<StateKey>(stateKeys);
|
||||
HashSet<StateKey> stateKeysHash = new HashSet<StateKey>(stateKeys);
|
||||
|
||||
for ( StateKey stateKey : stateKeysHash ) {
|
||||
NewEvaluationContext nec = evaluationContexts.get(stateKey);
|
||||
for ( StateKey stateKey : stateKeysHash ) {
|
||||
NewEvaluationContext nec = evaluationContexts.get(stateKey);
|
||||
|
||||
synchronized (nec) {
|
||||
nec.apply(tracker, ref, context, comp, eval);
|
||||
synchronized (nec) {
|
||||
nec.apply(tracker, ref, context, comp, eval);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -694,38 +231,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* A composite, 'reduce of reduces' function.
|
||||
*
|
||||
* @param lhs 'left-most' portion of data in the composite reduce.
|
||||
* @param rhs 'right-most' portion of data in the composite reduce.
|
||||
* @return The composite reduce type.
|
||||
*/
|
||||
public Integer treeReduce(Integer lhs, Integer rhs) {
|
||||
return null;
|
||||
}
|
||||
public Integer treeReduce(Integer lhs, Integer rhs) { return null; }
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return null;
|
||||
}
|
||||
public Integer reduceInit() { return null; }
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return null;
|
||||
}
|
||||
public Integer reduce(Integer value, Integer sum) { return null; }
|
||||
|
||||
/**
|
||||
* Output the finalized report
|
||||
|
|
@ -822,4 +334,31 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
|
||||
report.print(out);
|
||||
}
|
||||
|
||||
// Accessors
|
||||
public Logger getLogger() { return logger; }
|
||||
|
||||
public int getNumSamples() { return numSamples; }
|
||||
|
||||
public double getMinPhaseQuality() { return MIN_PHASE_QUALITY; }
|
||||
|
||||
public String getFamilyStructure() { return FAMILY_STRUCTURE; }
|
||||
|
||||
public double getMendelianViolationQualThreshold() { return MENDELIAN_VIOLATION_QUAL_THRESHOLD; }
|
||||
|
||||
public TreeSet<VariantStratifier> getStratificationObjects() { return stratificationObjects; }
|
||||
|
||||
public static String getAllSampleName() { return ALL_SAMPLE_NAME; }
|
||||
|
||||
public Set<String> getKnownNames() { return knownNames; }
|
||||
|
||||
public Set<String> getEvalNames() { return evalNames; }
|
||||
|
||||
public Set<String> getSampleNamesForEvaluation() { return sampleNamesForEvaluation; }
|
||||
|
||||
public Set<String> getSampleNamesForStratification() { return sampleNamesForStratification; }
|
||||
|
||||
public Set<String> getCompNames() { return compNames; }
|
||||
|
||||
public Set<SortableJexlVCMatchExp> getJexlExpressions() { return jexlExpressions; }
|
||||
}
|
||||
|
|
@ -19,18 +19,17 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
|||
*/
|
||||
@Analysis(description = "The overlap between eval and comp sites")
|
||||
public class CompOverlap extends VariantEvaluator implements StandardEval {
|
||||
|
||||
@DataPoint(description = "number of eval SNP sites")
|
||||
long nEvalSNPs = 0;
|
||||
long nEvalVariants = 0;
|
||||
|
||||
@DataPoint(description = "number of comp SNP sites")
|
||||
long nCompSNPs = 0;
|
||||
long nCompVariants = 0;
|
||||
|
||||
@DataPoint(description = "number of eval sites outside of comp sites")
|
||||
long novelSites = 0;
|
||||
|
||||
@DataPoint(description = "number of eval sites at comp sites")
|
||||
long nSNPsAtComp = 0;
|
||||
long nVariantsAtComp = 0;
|
||||
|
||||
@DataPoint(description = "percentage of eval sites at comp sites")
|
||||
double compRate = 0.0;
|
||||
|
|
@ -45,9 +44,9 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
|||
return 2; // we need to see each eval track and each comp track
|
||||
}
|
||||
|
||||
public long nNovelSites() { return nEvalSNPs - nSNPsAtComp; }
|
||||
public double compRate() { return rate(nSNPsAtComp, nEvalSNPs); }
|
||||
public double concordanceRate() { return rate(nConcordant, nSNPsAtComp); }
|
||||
public long nNovelSites() { return nEvalVariants - nVariantsAtComp; }
|
||||
public double compRate() { return rate(nVariantsAtComp, nEvalVariants); }
|
||||
public double concordanceRate() { return rate(nConcordant, nVariantsAtComp); }
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
compRate = 100 * compRate();
|
||||
|
|
@ -76,21 +75,18 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
//boolean expectingIndels = false;
|
||||
boolean evalIsGood = eval != null;
|
||||
boolean expectingIndels = eval != null && eval.isIndel();
|
||||
|
||||
//boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
|
||||
//boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ;
|
||||
boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
|
||||
|
||||
boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP() ;
|
||||
boolean evalIsGood = eval != null && eval.isSNP() ;
|
||||
|
||||
if (compIsGood) nCompSNPs++; // count the number of comp events
|
||||
if (evalIsGood) nEvalSNPs++; // count the number of eval events
|
||||
if (compIsGood) nCompVariants++; // count the number of comp events
|
||||
if (evalIsGood) nEvalVariants++; // count the number of eval events
|
||||
|
||||
if (compIsGood && evalIsGood) {
|
||||
nSNPsAtComp++;
|
||||
nVariantsAtComp++;
|
||||
|
||||
if (!discordantP(eval, comp)) { // count whether we're concordant or not with the comp value
|
||||
if (!discordantP(eval, comp)) { // count whether we're concordant or not with the comp value
|
||||
nConcordant++;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,18 +1,19 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.phasing.AllelePair;
|
||||
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext;
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@ import org.broad.tribble.util.variantcontext.VariantContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -5,9 +5,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -157,8 +157,7 @@ public class IndelMetricsByAC extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
String returnString = "";
|
||||
return returnString;
|
||||
return "";
|
||||
}
|
||||
|
||||
public void incrValue( VariantContext eval ) {
|
||||
|
|
|
|||
|
|
@ -474,10 +474,6 @@ public class IndelStatistics extends VariantEvaluator {
|
|||
return getName();
|
||||
}
|
||||
|
||||
//public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
|
||||
//return null;
|
||||
//}
|
||||
|
||||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
if (eval != null ) {
|
||||
|
|
@ -502,13 +498,9 @@ public class IndelStatistics extends VariantEvaluator {
|
|||
|
||||
return null; // This module doesn't capture any interesting sites, so return null
|
||||
}
|
||||
public String update0(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
return null;
|
||||
}
|
||||
public void finalizeEvaluation() {
|
||||
//
|
||||
int k=0;
|
||||
int k=0;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,18 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
|||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Mendelian violation detection and counting
|
||||
|
|
|
|||
|
|
@ -11,8 +11,8 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Sample;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey;
|
||||
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
|
|
|||
|
|
@ -31,9 +31,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
|
@ -13,7 +15,7 @@ public class CompRod extends VariantStratifier implements RequiredStratification
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
this.compNames = compNames;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
|
|
@ -24,7 +26,7 @@ public class CompRod extends VariantStratifier implements RequiredStratification
|
|||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add(compName);
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
|
@ -11,7 +13,7 @@ public class CpG extends VariantStratifier implements StandardStratification {
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
states = new ArrayList<String>();
|
||||
states.add("all");
|
||||
states.add("CpG");
|
||||
|
|
@ -22,7 +24,7 @@ public class CpG extends VariantStratifier implements StandardStratification {
|
|||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
boolean isCpG = false;
|
||||
if (ref != null && ref.getBases() != null) {
|
||||
String fwRefBases = new String(ref.getBases());
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -14,7 +16,7 @@ public class Degeneracy extends VariantStratifier {
|
|||
private HashMap<String, String> degeneracies;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
states = new ArrayList<String>();
|
||||
states.add("1-fold");
|
||||
states.add("2-fold");
|
||||
|
|
@ -52,7 +54,7 @@ public class Degeneracy extends VariantStratifier {
|
|||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add("all");
|
||||
|
|
@ -61,10 +63,9 @@ public class Degeneracy extends VariantStratifier {
|
|||
String type = null;
|
||||
String aa = null;
|
||||
|
||||
if (eval.getAttributeAsString("refseq.functionalClass") != null) {
|
||||
type = eval.getAttributeAsString("refseq.functionalClass");
|
||||
if (eval.hasAttribute("refseq.functionalClass")) {
|
||||
aa = eval.getAttributeAsString("refseq.variantAA");
|
||||
} else if (eval.getAttributeAsString("refseq.functionalClass_1") != null) {
|
||||
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
||||
int annotationId = 1;
|
||||
String key;
|
||||
|
||||
|
|
@ -85,7 +86,7 @@ public class Degeneracy extends VariantStratifier {
|
|||
}
|
||||
|
||||
annotationId++;
|
||||
} while (eval.getAttributeAsString(key) != null);
|
||||
} while (eval.hasAttribute(key));
|
||||
}
|
||||
|
||||
if (aa != null && degeneracies.containsKey(aa)) {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
|
@ -13,7 +15,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
this.evalNames = evalNames;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
|
|
@ -24,7 +26,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification
|
|||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add(evalName);
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
|
@ -12,7 +14,7 @@ public class Filter extends VariantStratifier {
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
states = new ArrayList<String>();
|
||||
states.add("called");
|
||||
states.add("filtered");
|
||||
|
|
@ -23,7 +25,7 @@ public class Filter extends VariantStratifier {
|
|||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add("raw");
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
|
@ -12,7 +14,7 @@ public class FunctionalClass extends VariantStratifier {
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
states = new ArrayList<String>();
|
||||
states.add("all");
|
||||
states.add("silent");
|
||||
|
|
@ -24,7 +26,7 @@ public class FunctionalClass extends VariantStratifier {
|
|||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add("all");
|
||||
|
|
@ -32,9 +34,9 @@ public class FunctionalClass extends VariantStratifier {
|
|||
if (eval != null && eval.isVariant()) {
|
||||
String type = null;
|
||||
|
||||
if (eval.getAttributeAsString("refseq.functionalClass") != null) {
|
||||
if (eval.hasAttribute("refseq.functionalClass")) {
|
||||
type = eval.getAttributeAsString("refseq.functionalClass");
|
||||
} else if (eval.getAttributeAsString("refseq.functionalClass_1") != null) {
|
||||
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
||||
int annotationId = 1;
|
||||
String key;
|
||||
|
||||
|
|
@ -43,7 +45,7 @@ public class FunctionalClass extends VariantStratifier {
|
|||
|
||||
String newtype = eval.getAttributeAsString(key);
|
||||
|
||||
if ( newtype != null &&
|
||||
if ( newtype != null && !newtype.equalsIgnoreCase("null") &&
|
||||
( type == null ||
|
||||
( type.equals("silent") && !newtype.equals("silent") ) ||
|
||||
( type.equals("missense") && newtype.equals("nonsense") ) )
|
||||
|
|
@ -52,7 +54,7 @@ public class FunctionalClass extends VariantStratifier {
|
|||
}
|
||||
|
||||
annotationId++;
|
||||
} while (eval.getAttributeAsString(key) != null);
|
||||
} while (eval.hasAttribute(key));
|
||||
}
|
||||
|
||||
if (type != null) {
|
||||
|
|
|
|||
|
|
@ -3,22 +3,24 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
public class JexlExpression extends VariantStratifier implements StandardStratification {
|
||||
// needs to know the jexl expressions
|
||||
private Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions;
|
||||
private Set<SortableJexlVCMatchExp> jexlExpressions;
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
this.jexlExpressions = jexlExpressions;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
states.add("none");
|
||||
for ( VariantContextUtils.JexlVCMatchExp jexlExpression : jexlExpressions ) {
|
||||
for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) {
|
||||
states.add(jexlExpression.name);
|
||||
}
|
||||
}
|
||||
|
|
@ -27,11 +29,11 @@ public class JexlExpression extends VariantStratifier implements StandardStratif
|
|||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
relevantStates.add("none");
|
||||
|
||||
for ( VariantContextUtils.JexlVCMatchExp jexlExpression : jexlExpressions ) {
|
||||
for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) {
|
||||
if (eval != null && VariantContextUtils.match(eval, jexlExpression)) {
|
||||
relevantStates.add(jexlExpression.name);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,8 +3,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Set;
|
||||
|
||||
public class Novelty extends VariantStratifier implements StandardStratification {
|
||||
|
|
@ -13,7 +17,7 @@ public class Novelty extends VariantStratifier implements StandardStratification
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
this.knownNames = knownNames;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
|
|
@ -26,11 +30,29 @@ public class Novelty extends VariantStratifier implements StandardStratification
|
|||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
boolean isNovel = true;
|
||||
|
||||
if (tracker != null) {
|
||||
for (String knownName : knownNames) {
|
||||
if (tracker.hasROD(knownName)) {
|
||||
EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION);
|
||||
if (eval != null) {
|
||||
allowableTypes.add(eval.getType());
|
||||
}
|
||||
|
||||
Collection<VariantContext> knownComps = tracker.getVariantContexts(ref, knownName, allowableTypes, ref.getLocus(), true, true);
|
||||
|
||||
isNovel = knownComps.size() == 0;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
relevantStates.add("all");
|
||||
relevantStates.add(comp == null ? "novel" : "known");
|
||||
relevantStates.add(isNovel ? "novel" : "known");
|
||||
|
||||
return relevantStates;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
|
@ -13,7 +14,7 @@ public class Sample extends VariantStratifier {
|
|||
private ArrayList<String> samples;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
samples = new ArrayList<String>();
|
||||
samples.addAll(sampleNames);
|
||||
}
|
||||
|
|
@ -22,7 +23,7 @@ public class Sample extends VariantStratifier {
|
|||
return samples;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
relevantStates.add(sampleName);
|
||||
|
||||
|
|
|
|||
|
|
@ -4,18 +4,19 @@ import org.broad.tribble.util.variantcontext.VariantContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
public abstract class VariantStratifier implements Comparable {
|
||||
public abstract void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames);
|
||||
public abstract void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames);
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return new ArrayList<String>();
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -52,40 +52,35 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
|
|||
|
||||
public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
|
||||
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
|
||||
//synchronized ( this ) {
|
||||
// we always call update0 in case the evaluation tracks things like number of bases covered
|
||||
//evaluation.update0(tracker, ref, context);
|
||||
// we always call update0 in case the evaluation tracks things like number of bases covered
|
||||
|
||||
// the other updateN methods don't see a null context
|
||||
if ( tracker == null )
|
||||
continue;
|
||||
// the other updateN methods don't see a null context
|
||||
if ( tracker == null )
|
||||
continue;
|
||||
|
||||
// now call the single or paired update function
|
||||
switch ( evaluation.getComparisonOrder() ) {
|
||||
case 1:
|
||||
if (eval != null) {
|
||||
evaluation.update1(eval, tracker, ref, context);
|
||||
}
|
||||
// now call the single or paired update function
|
||||
switch ( evaluation.getComparisonOrder() ) {
|
||||
case 1:
|
||||
if (eval != null) {
|
||||
evaluation.update1(eval, tracker, ref, context);
|
||||
}
|
||||
|
||||
break;
|
||||
case 2:
|
||||
//if (eval != null) {
|
||||
evaluation.update2(eval, comp, tracker, ref, context);
|
||||
//}
|
||||
break;
|
||||
case 2:
|
||||
//if (eval != null) {
|
||||
evaluation.update2(eval, comp, tracker, ref, context);
|
||||
//}
|
||||
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation);
|
||||
}
|
||||
//}
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
//synchronized (this) {
|
||||
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
|
||||
evaluation.update0(tracker, ref, context);
|
||||
}
|
||||
//}
|
||||
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
|
||||
evaluation.update0(tracker, ref, context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,9 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
|
||||
public class VariantEvalIntegrationTest extends WalkerTest {
|
||||
private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval";
|
||||
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
|
||||
|
||||
private static String cmdRoot = "-T VariantEval" +
|
||||
" -R " + b36KGReference;
|
||||
|
||||
|
|
@ -18,18 +21,301 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
|
||||
|
||||
private static String rootGZ = cmdRoot +
|
||||
" -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
|
||||
" -B:eval,VCF " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" +
|
||||
" -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz";
|
||||
" -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
|
||||
" -B:eval,VCF " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" +
|
||||
" -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz";
|
||||
|
||||
private static String[] testsEnumerations = {root, rootGZ};
|
||||
|
||||
private String cmdLineBuilder(String ... arguments) {
|
||||
String cmdline = "";
|
||||
|
||||
for ( int argIndex = 0; argIndex < arguments.length; argIndex++ ) {
|
||||
cmdline += arguments[argIndex];
|
||||
|
||||
if (argIndex < arguments.length - 1) {
|
||||
cmdline += " ";
|
||||
}
|
||||
}
|
||||
|
||||
return cmdline;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndels() {
|
||||
// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
|
||||
// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -c PASS = 9
|
||||
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 4
|
||||
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5
|
||||
// variantRate = nVariantLoci / nProcessedLoci = 0.131578947
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3
|
||||
// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1
|
||||
// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1
|
||||
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4
|
||||
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8
|
||||
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10
|
||||
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
cmdLineBuilder(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-D " + b37dbSNP129,
|
||||
"-B:eval,VCF " + fundamentalTestVCF,
|
||||
"-noEV",
|
||||
"-EV CountVariants",
|
||||
"-noST",
|
||||
"-BTI eval",
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("476b495de54e1a377c6895c02a6fdf6a")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndels", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() {
|
||||
// nProcessedLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
|
||||
// nCalledLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }'= 3
|
||||
// nVariantLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | wc -l = 3
|
||||
// variantRateKnown = nVariantLoci / nProcessedLoci = 0.0789473684
|
||||
// nSNPsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($5) == 1) print $0 }' | wc -l = 3
|
||||
// nInsertionsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 0
|
||||
// nDeletionsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 0
|
||||
// nNoCallsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 0
|
||||
// nHetsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 3
|
||||
// nHomRefKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 1
|
||||
// nHomVarKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
cmdLineBuilder(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-D " + b37dbSNP129,
|
||||
"-B:eval,VCF " + fundamentalTestVCF,
|
||||
"-noEV",
|
||||
"-EV CountVariants",
|
||||
"-noST",
|
||||
"-ST Novelty",
|
||||
"-BTI eval",
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("9f4e4fff339e725f42d65063e43e7d1c")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() {
|
||||
// nProcessedLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
|
||||
// nCalledLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -vc PASS = 3
|
||||
// nRefLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 1
|
||||
// nVariantLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 2
|
||||
// nSNPsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 1
|
||||
// nInsertionsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 0
|
||||
// nDeletionsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1
|
||||
// nNoCallsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 3
|
||||
// nHetsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 1
|
||||
// nHomRefFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 2
|
||||
// nHomVarFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 3
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
cmdLineBuilder(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-D " + b37dbSNP129,
|
||||
"-B:eval,VCF " + fundamentalTestVCF,
|
||||
"-noEV",
|
||||
"-EV CountVariants",
|
||||
"-noST",
|
||||
"-ST Novelty",
|
||||
"-ST Filter",
|
||||
"-BTI eval",
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("369fa4f37bcc03b8a0bc1e58bf22bf0a")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithCpG() {
|
||||
// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
|
||||
// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep -c PASS = 8
|
||||
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 3
|
||||
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3
|
||||
// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1
|
||||
// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1
|
||||
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4
|
||||
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8
|
||||
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10
|
||||
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
cmdLineBuilder(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-D " + b37dbSNP129,
|
||||
"-B:eval,VCF " + fundamentalTestVCF,
|
||||
"-noEV",
|
||||
"-EV CountVariants",
|
||||
"-noST",
|
||||
"-ST CpG",
|
||||
"-BTI eval",
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("891ad0d38f1a1b08b31fe1cb6a3afc04")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithFunctionalClasses() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
cmdLineBuilder(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-D " + b37dbSNP129,
|
||||
"-B:eval,VCF " + fundamentalTestVCF,
|
||||
"-noEV",
|
||||
"-EV CountVariants",
|
||||
"-noST",
|
||||
"-ST FunctionalClass",
|
||||
"-BTI eval",
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("d588179e2d9ed6e92a6ae1a80ac04270")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithDegeneracy() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
cmdLineBuilder(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-D " + b37dbSNP129,
|
||||
"-B:eval,VCF " + fundamentalTestVCF,
|
||||
"-noEV",
|
||||
"-EV CountVariants",
|
||||
"-noST",
|
||||
"-ST Degeneracy",
|
||||
"-BTI eval",
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("ceb0f5d9e0ea99eb8d00bce2f7bc1b73")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithSample() {
|
||||
// HG00513
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) == 1 && length($5) == 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 3
|
||||
// nInsertions = $ grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) == 1 && length($5) > 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 1
|
||||
// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) > 1 && length($5) == 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 0
|
||||
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 2
|
||||
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "0/1") print $0 }' | wc -l = 2
|
||||
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "0/0") print $0 }' | wc -l = 3
|
||||
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "1/1") print $0 }' | wc -l = 2
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
cmdLineBuilder(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-D " + b37dbSNP129,
|
||||
"-B:eval,VCF " + fundamentalTestVCF,
|
||||
"-noEV",
|
||||
"-EV CountVariants",
|
||||
"-noST",
|
||||
"-ST Sample",
|
||||
"-BTI eval",
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("94ce29b34b9e2e4304fc1bbf3f971a7d")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithJexlExpression() {
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $0 }' | wc -l = 7
|
||||
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 == ".") print $0 }' | wc -l = 4
|
||||
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 != ".") print $0 }' | wc -l = 3
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && length($5) == 1 && length($6) == 1 && $6 != ".") print $0 }' | wc -l = 3
|
||||
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/0' = 9
|
||||
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/1' = 3
|
||||
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '1/1' = 5
|
||||
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '\.\/\.' = 4
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
cmdLineBuilder(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-D " + b37dbSNP129,
|
||||
"-B:eval,VCF " + fundamentalTestVCF,
|
||||
"-noEV",
|
||||
"-EV CountVariants",
|
||||
"-noST",
|
||||
"-ST JexlExpression",
|
||||
"-select 'DP < 20'",
|
||||
"-selectName DepthSelect",
|
||||
"-BTI eval",
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("96de32970b204816ecd9a120b9d8782b")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithMultipleJexlExpressions() {
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $0 }' | wc -l = 7
|
||||
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 == ".") print $0 }' | wc -l = 4
|
||||
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 != ".") print $0 }' | wc -l = 3
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && length($5) == 1 && length($6) == 1 && $6 != ".") print $0 }' | wc -l = 3
|
||||
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/0' = 9
|
||||
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/1' = 3
|
||||
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '1/1' = 5
|
||||
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '\.\/\.' = 4
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
cmdLineBuilder(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-D " + b37dbSNP129,
|
||||
"-B:eval,VCF " + fundamentalTestVCF,
|
||||
"-noEV",
|
||||
"-EV CountVariants",
|
||||
"-noST",
|
||||
"-ST JexlExpression",
|
||||
"-select 'DP < 20'",
|
||||
"-selectName DepthLt20",
|
||||
"-select 'DP > 20'",
|
||||
"-selectName DepthGt20",
|
||||
"-BTI eval",
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("aea882132eb6afdc93fbc70e8d6c50e2")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSelect1() {
|
||||
String extraArgs = "-L 1:1-10,000,000";
|
||||
for (String tests : testsEnumerations) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s",
|
||||
1, Arrays.asList("c803c90f587796146286217ab30930a3"));
|
||||
1, Arrays.asList("4184a8d44f8c559c904e41edf464a467"));
|
||||
executeTestParallel("testSelect1", spec);
|
||||
//executeTest("testSelect1", spec);
|
||||
}
|
||||
|
|
@ -50,7 +336,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
for (String vcfFile : vcfFiles) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B:eval,VCF " + validationDataLocation + vcfFile + " -B:comp,VCF " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
|
||||
1,
|
||||
Arrays.asList("bb16335f9510bcab2bd14a4299afd879"));
|
||||
Arrays.asList("1387fcf8d5c53ff2c820fe79cc999bcf"));
|
||||
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
|
||||
//executeTest("testVEGenotypeConcordance" + vcfFile, spec);
|
||||
}
|
||||
|
|
@ -60,8 +346,8 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVESimple() {
|
||||
HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
expectations.put("-L 1:1-10,000,000", "47990cfb955720421c29991954af4450");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "ce7bb86e6281b1aef1ad94f9ba7301a9");
|
||||
expectations.put("-L 1:1-10,000,000", "b28516b4d3627d2eb017a5449284a4e4");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "a49d897095586ddb72bfe9faf0291312");
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs = entry.getKey();
|
||||
|
|
@ -84,10 +370,9 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" -B:comp_hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
|
||||
|
||||
|
||||
String matchingMD5 = "6388fbad81b0f281298812496fd3ed6c";
|
||||
expectations.put("", matchingMD5);
|
||||
expectations.put(" -knownName comp_hapmap -knownName dbsnp", matchingMD5);
|
||||
expectations.put(" -knownName comp_hapmap", "6388fbad81b0f281298812496fd3ed6c");
|
||||
expectations.put("", "65d74eb7eea2355989c389e8fa886c06");
|
||||
expectations.put(" -knownName comp_hapmap -knownName dbsnp", "776827cd5a6fcaf8b8508813e8dc023c");
|
||||
expectations.put(" -knownName comp_hapmap", "e99a89cbca2027c983edc00d31ea4ec9");
|
||||
for (String tests : testsEnumerations) {
|
||||
for (Map.Entry<String, String> entry : expectations.entrySet()) {
|
||||
String extraArgs2 = entry.getKey();
|
||||
|
|
@ -102,6 +387,79 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompVsEvalAC() {
|
||||
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -EV GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("49cb4a6126c5383abd9a49a6c22b8d93"));
|
||||
executeTestParallel("testCompVsEvalAC",spec);
|
||||
//executeTest("testCompVsEvalAC",spec);
|
||||
}
|
||||
|
||||
private static String withSelect(String cmd, String select, String name) {
|
||||
return String.format("%s -select '%s' -selectName %s", cmd, select, name);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTranches() {
|
||||
String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -tf " + testDir + "tranches.6.txt";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("68044a69f03ba4cc11d2061cc96e9eb5"));
|
||||
executeTestParallel("testTranches",spec);
|
||||
//executeTest("testTranches",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompOverlap() {
|
||||
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e8b5561eb60ea98a9be4a45abee00e07"));
|
||||
executeTestParallel("testCompOverlap",spec);
|
||||
//executeTest("testCompOverlap",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEvalTrackWithoutGenotypes() {
|
||||
String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod";
|
||||
|
||||
String extraArgs = "-T VariantEval -R " +
|
||||
b37KGReference +
|
||||
" -L 20" +
|
||||
" -D " + dbsnp +
|
||||
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||
" -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9323a6ad62dedbdb08752411960db60f"));
|
||||
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleEvalTracksWithoutGenotypes() {
|
||||
String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod";
|
||||
|
||||
String extraArgs = "-T VariantEval -R " + b37KGReference +
|
||||
" -L 20" +
|
||||
" -D " + dbsnp +
|
||||
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||
" -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
|
||||
" -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ef23195331affd332af1de9d261fdd0a"));
|
||||
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleCompTracks() {
|
||||
String dbsnp = GATKDataLocation + "dbsnp_132_b37.vcf";
|
||||
|
||||
String extraArgs = "-T VariantEval" +
|
||||
" -R " + b37KGReference +
|
||||
" -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" +
|
||||
" -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" +
|
||||
" -B:dbsnp,VCF " + dbsnp +
|
||||
" -L 20:10000000-10100000" +
|
||||
" -noST -noEV -ST Novelty -EV CompOverlap" +
|
||||
" -o %s";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("3fced8e5fa7a1c952d08fead0accd3fb"));
|
||||
executeTestParallel("testMultipleCompTracks",spec);
|
||||
}
|
||||
|
||||
// @Test
|
||||
// public void testVEGenomicallyAnnotated() {
|
||||
// String vecmd = "-T VariantEval" +
|
||||
|
|
@ -129,81 +487,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
// //executeTest("testVEWriteVCF", spec);
|
||||
// }
|
||||
// }
|
||||
|
||||
@Test
|
||||
public void testCompVsEvalAC() {
|
||||
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -EV GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d8d59ec86ec9e00abad4ec44741de22f"));
|
||||
executeTestParallel("testCompVsEvalAC",spec);
|
||||
//executeTest("testCompVsEvalAC",spec);
|
||||
}
|
||||
|
||||
private static String withSelect(String cmd, String select, String name) {
|
||||
return String.format("%s -select '%s' -selectName %s", cmd, select, name);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTranches() {
|
||||
String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -tf " + testDir + "tranches.6.txt";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("68044a69f03ba4cc11d2061cc96e9eb5"));
|
||||
executeTestParallel("testTranches",spec);
|
||||
//executeTest("testTranches",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompOverlap() {
|
||||
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("81377be26bf8fa32339d01c173428f7d"));
|
||||
executeTestParallel("testCompOverlap",spec);
|
||||
//executeTest("testCompOverlap",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEvalTrackWithoutGenotypes() {
|
||||
String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod";
|
||||
|
||||
String extraArgs = "-T VariantEval -R " +
|
||||
b37KGReference +
|
||||
" -L 20" +
|
||||
" -D " + dbsnp +
|
||||
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||
" -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("2e2c24b49f699506b967befbde5a6fa8"));
|
||||
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleEvalTracksWithoutGenotypes() {
|
||||
String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod";
|
||||
|
||||
String extraArgs = "-T VariantEval -R " + b37KGReference +
|
||||
" -L 20" +
|
||||
" -D " + dbsnp +
|
||||
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||
" -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
|
||||
" -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("144053b8bef5a79b23d0abd17b561294"));
|
||||
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleCompTracks() {
|
||||
String dbsnp = GATKDataLocation + "dbsnp_132_b37.vcf";
|
||||
|
||||
String extraArgs = "-T VariantEval" +
|
||||
" -R " + b37KGReference +
|
||||
" -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" +
|
||||
" -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" +
|
||||
" -B:dbsnp,VCF " + dbsnp +
|
||||
" -L 20:10000000-10100000" +
|
||||
" -noST -noEV -ST Novelty -EV CompOverlap" +
|
||||
" -o %s";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("2d2c6e7850ec964624bb032d24834e2f"));
|
||||
executeTestParallel("testMultipleCompTracks",spec);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// @Test
|
||||
// public void testVEValidatePass() {
|
||||
// String extraArgs = "-L 1:1-10,000,000";
|
||||
|
|
@ -227,4 +511,5 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
// private static String withValidateTiTv(String cmd, double min, double max) {
|
||||
// return String.format("%s -validate 'eval.comp_genotypes.all.called.all.titv.tiTvRatio >= %2$s' -validate 'eval.comp_genotypes.all.called.all.titv.tiTvRatio <= %3$s'", cmd, min, max);
|
||||
// }
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue