From 1085bbf303765ff87332d66fd6f7214961fa3fcc Mon Sep 17 00:00:00 2001 From: kiran Date: Sun, 6 Feb 2011 19:19:20 +0000 Subject: [PATCH] Fixed issue where all comp tracks were being treated as known tracks. Fixed issue where multiple JEXL expressions were causing an exception because the underlying object did not implement the Comparable interface. Fixed issue where variants being compared to the known track were not being checked for equality of variation type. Fixed issue where functional annotations were not being iterated over properly. Refactored a lot of helper methods into a separate VariantEvalUtils utility class. Significantly expanded the test suite using a small VCF with SNPs, indels, and non-variant loci which makes it much easier to see what the proper answer should be, and included the appropriate grep and awk commands in the comments to confirm the values. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5204 348d0f76-0448-11de-a6fe-93d51630548a --- .../varianteval/VariantEvalWalker.java | 597 ++---------------- .../varianteval/evaluators/CompOverlap.java | 30 +- .../evaluators/GenotypeConcordance.java | 11 +- .../evaluators/GenotypePhasingEvaluator.java | 1 - .../evaluators/IndelLengthHistogram.java | 2 +- .../evaluators/IndelMetricsByAC.java | 5 +- .../evaluators/IndelStatistics.java | 10 +- .../MendelianViolationEvaluator.java | 6 +- .../evaluators/SimpleMetricsByAC.java | 2 +- .../evaluators/VariantQualityScore.java | 2 +- .../varianteval/stratifications/CompRod.java | 6 +- .../varianteval/stratifications/CpG.java | 6 +- .../stratifications/Degeneracy.java | 13 +- .../varianteval/stratifications/EvalRod.java | 6 +- .../varianteval/stratifications/Filter.java | 6 +- .../stratifications/FunctionalClass.java | 14 +- .../stratifications/JexlExpression.java | 12 +- .../varianteval/stratifications/Novelty.java | 30 +- .../varianteval/stratifications/Sample.java | 7 +- .../stratifications/VariantStratifier.java | 5 +- .../util/NewEvaluationContext.java | 49 +- .../VariantEvalIntegrationTest.java | 457 +++++++++++--- 22 files changed, 560 insertions(+), 717 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 0aae62c5c..6f65132c2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.apache.log4j.Logger; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broad.tribble.vcf.VCFConstants; import org.broad.tribble.vcf.VCFHeader; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; @@ -17,21 +17,14 @@ import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; -import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; -import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.RequiredStratification; -import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.StandardStratification; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.AnalysisModuleScanner; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.*; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.report.utils.TableType; @@ -43,7 +36,7 @@ import java.lang.reflect.Field; import java.util.*; /** - * General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ts/Tv ratios, and a lot more) + * General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more) */ @Reference(window=@Window(start=-50, stop=50)) public class VariantEvalWalker extends RodWalker implements TreeReducible { @@ -99,7 +92,7 @@ public class VariantEvalWalker extends RodWalker implements Tr private String TRANCHE_FILENAME = null; // Variables - private Set jexlExpressions = new TreeSet(); + private Set jexlExpressions = new TreeSet(); private Set compNames = new TreeSet(); private Set knownNames = new TreeSet(); private Set evalNames = new TreeSet(); @@ -118,240 +111,17 @@ public class VariantEvalWalker extends RodWalker implements Tr private GATKReport report = null; // Public constants - public static String ALL_SAMPLE_NAME = "all"; + private static String ALL_SAMPLE_NAME = "all"; - /** - * List all of the available evaluation modules, then exit successfully - */ - private void listModulesAndExit() { - List> vsClasses = new PluginManager( VariantStratifier.class ).getPlugins(); - List> veClasses = new PluginManager( VariantEvaluator.class ).getPlugins(); - - logger.info("Available stratification modules:"); - logger.info("(Standard modules are starred)"); - for (Class vsClass : vsClasses) { - logger.info("\t" + vsClass.getSimpleName() + (RequiredStratification.class.isAssignableFrom(vsClass) || StandardStratification.class.isAssignableFrom(vsClass) ? "*" : "")); - } - logger.info(""); - - logger.info("Available evaluation modules:"); - logger.info("(Standard modules are starred)"); - for (Class veClass : veClasses) { - logger.info("\t" + veClass.getSimpleName() + (StandardEval.class.isAssignableFrom(veClass) ? "*" : "")); - } - logger.info(""); - - System.exit(0); - } - - /** - * Initialize required, standard and user-specified stratification objects - * - * @param noStandardStrats don't use the standard stratifications - * @param modulesToUse the list of stratification modules to use - * @return set of stratifications to use - */ - private TreeSet initializeStratificationObjects(boolean noStandardStrats, String[] modulesToUse) { - TreeSet strats = new TreeSet(); - Set stratsToUse = new HashSet(); - - // Create a map for all stratification modules for easy lookup. - HashMap> classMap = new HashMap>(); - for ( Class c : new PluginManager( VariantStratifier.class ).getPlugins() ) { - classMap.put(c.getSimpleName(), c); - } - - // We must use all required stratification modules. - for ( Class reqClass : new PluginManager( RequiredStratification.class ).getPlugins() ) { - if ( classMap.containsKey(reqClass.getSimpleName()) ) { - stratsToUse.add(reqClass.getSimpleName()); - } - } - - // By default, use standard stratification modules. - if ( !noStandardStrats ) { - for ( Class stdClass : new PluginManager( StandardStratification.class ).getPlugins() ) { - if ( classMap.containsKey(stdClass.getSimpleName()) ) { - stratsToUse.add(stdClass.getSimpleName()); - } - } - } - - // Now add the user-selected modules - stratsToUse.addAll(Arrays.asList(modulesToUse)); - - // Instantiate the stratifications - for ( String module : stratsToUse ) { - if ( !classMap.containsKey(module) ) { - throw new UserException.CommandLineException("Module " + module + " could not be found; please check that you have specified the class name correctly"); - } - - if ( classMap.containsKey(module) ) { - Class c = classMap.get(module); - - try { - VariantStratifier vs = c.newInstance(); - vs.initialize(jexlExpressions, compNames, knownNames, evalNames, sampleNamesForStratification); - - strats.add(vs); - } catch (InstantiationException e) { - throw new StingException("Unable to instantiate stratification module '" + c.getSimpleName() + "'"); - } catch (IllegalAccessException e) { - throw new StingException("Illegal access error when trying to instantiate stratification module '" + c.getSimpleName() + "'"); - } - } - } - - return strats; - } - - /** - * Initialize required, standard and user-specified evaluation objects - * - * @param noStandardEvals don't use the standard evaluations - * @param modulesToUse the list of evaluation modules to use - * @return set of evaluations to use - */ - private Set> initializeEvaluationObjects(boolean noStandardEvals, String[] modulesToUse) { - Set> evals = new HashSet>(); - - // Create a map for all eval modules for easy lookup. - HashMap> classMap = new HashMap>(); - for ( Class c : new PluginManager( VariantEvaluator.class ).getPlugins() ) { - classMap.put(c.getSimpleName(), c); - } - - // By default, use standard eval modules. - if ( !noStandardEvals ) { - for ( Class stdClass : new PluginManager( StandardEval.class ).getPlugins() ) { - if ( classMap.containsKey(stdClass.getSimpleName()) ) { - evals.add(classMap.get(stdClass.getSimpleName())); - } - } - } - - // Get the specific classes provided. - for ( String module : modulesToUse ) { - if ( !classMap.containsKey(module) ) { - throw new UserException.CommandLineException("Module " + module + " could not be found; please check that you have specified the class name correctly"); - } - - if ( classMap.containsKey(module) ) { - evals.add(classMap.get(module)); - } - } - - return evals; - } - - /** - * Recursively initialize the evaluation contexts - * - * @param stratificationObjects the stratifications to use - * @param evaluationObjects the evaluations to use - * @param stratStack a stack of stratifications to apply - * @param ec evaluation context - * @return a map of all the evaluation contexts - */ - private HashMap initializeEvaluationContexts(Set stratificationObjects, Set> evaluationObjects, Stack stratStack, NewEvaluationContext ec) { - HashMap ecs = new HashMap(); - - if (stratStack == null) { - stratStack = new Stack(); - stratStack.addAll(stratificationObjects); - } - - if (!stratStack.isEmpty()) { - Stack newStratStack = new Stack(); - newStratStack.addAll(stratStack); - - VariantStratifier vs = newStratStack.pop(); - - for ( String state : vs.getAllStates() ) { - NewEvaluationContext nec = new NewEvaluationContext(); - if (ec != null) { - nec.putAll(ec); - } - nec.put(vs, state); - - ecs.putAll(initializeEvaluationContexts(stratificationObjects, evaluationObjects, newStratStack, nec)); - } - } else { - HashMap necs = new HashMap(); - - StateKey stateKey = new StateKey(); - for ( VariantStratifier vs : ec.keySet() ) { - String state = ec.get(vs); - - stateKey.put(vs.getClass().getSimpleName(), state); - } - - ec.addEvaluationClassList(this, stateKey, evaluationObjects); - - necs.put(stateKey, ec); - - return necs; - } - - return ecs; - } - - /** - * Initialize the output report - * - * @param stratificationObjects the stratifications to use - * @param evaluationObjects the evaluations to use - * @return an initialized report object - */ - private GATKReport initializeGATKReport(Set stratificationObjects, Set> evaluationObjects) { - GATKReport report = new GATKReport(); - - for ( Class ve : evaluationObjects ) { - String tableName = ve.getSimpleName(); - String tableDesc = ve.getAnnotation(Analysis.class).description(); - - report.addTable(tableName, tableDesc); - - GATKReportTable table = report.getTable(tableName); - table.addPrimaryKey("entry", false); - table.addColumn(tableName, tableName); - - for ( VariantStratifier vs : stratificationObjects ) { - String columnName = vs.getClass().getSimpleName(); - - table.addColumn(columnName, "unknown"); - } - - try { - VariantEvaluator vei = ve.newInstance(); - vei.initialize(this); - - AnalysisModuleScanner scanner = new AnalysisModuleScanner(vei); - Map datamap = scanner.getData(); - - for (Field field : datamap.keySet()) { - field.setAccessible(true); - - if (! (field.get(vei) instanceof TableType) ) { - table.addColumn(field.getName(), 0.0); - } - } - } catch (InstantiationException e) { - throw new StingException("InstantiationException: " + e); - } catch (IllegalAccessException e) { - throw new StingException("IllegalAccessException: " + e); - } - } - - return report; - } + // Utility class + private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this); /** * Initialize the stratifications, evaluations, evaluation contexts, and reporting object */ public void initialize() { // Just list the modules, and exit quickly. - if (LIST) { listModulesAndExit(); } + if (LIST) { variantEvalUtils.listModulesAndExit(); } // Categorize each rod as an eval or a comp rod. for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { @@ -386,7 +156,11 @@ public class VariantEvalWalker extends RodWalker implements Tr sampleNamesForStratification.add(ALL_SAMPLE_NAME); // Initialize select expressions - jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)); + //jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)); + for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) { + SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp); + jexlExpressions.add(sjexl); + } // Add select expressions for anything in the tranches file if ( TRANCHE_FILENAME != null ) { @@ -399,253 +173,14 @@ public class VariantEvalWalker extends RodWalker implements Tr } // Initialize the set of stratifications and evaluations to use - stratificationObjects = initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); - Set> evaluationObjects = initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); + stratificationObjects = variantEvalUtils.initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); + Set> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); // Initialize the evaluation contexts - evaluationContexts = initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null); + evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null); // Initialize report table - report = initializeGATKReport(stratificationObjects, evaluationObjects); - } - - /** - * Figure out what the allowable variation types are based on the eval context - * - * @param tracker the reference metadata tracker - * @param ref the reference context - * @param compNames the comp track names - * @param evalNames the evaluation track names - * @return the set of allowable variation types - */ - private EnumSet getAllowableVariationTypes(RefMetaDataTracker tracker, ReferenceContext ref, Set compNames, Set evalNames) { - EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION); - - if (tracker != null) { - Collection evalvcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false); - - for ( VariantContext vc : evalvcs ) { - allowableTypes.add(vc.getType()); - } - - if (allowableTypes.size() == 1) { - // We didn't find any variation in the eval track, so now let's look at the comp track for allowable types - Collection compvcs = tracker.getVariantContexts(ref, compNames, null, ref.getLocus(), true, false); - - for ( VariantContext vc : compvcs ) { - allowableTypes.add(vc.getType()); - } - } - } - - return allowableTypes; - } - - /** - * Subset a VariantContext to a single sample - * - * @param vc the VariantContext object containing multiple samples - * @param sampleName the sample to pull out of the VariantContext - * @return a new VariantContext with just the requested sample - */ - private VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) { - ArrayList sampleNames = new ArrayList(); - sampleNames.add(sampleName); - - return getSubsetOfVariantContext(vc, sampleNames); - } - - /** - * Subset a VariantContext to a set of samples - * - * @param vc the VariantContext object containing multiple samples - * @param sampleNames the samples to pull out of the VariantContext - * @return a new VariantContext with just the requested samples - */ - private VariantContext getSubsetOfVariantContext(VariantContext vc, Collection sampleNames) { - VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values()); - - HashMap newAts = new HashMap(vcsub.getAttributes()); - - int originalAlleleCount = vc.getHetCount() + 2*vc.getHomVarCount(); - int newAlleleCount = vcsub.getHetCount() + 2*vcsub.getHomVarCount(); - - if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) { - newAts.put("ISSINGLETON", true); - } - - VariantContextUtils.calculateChromosomeCounts(vcsub, newAts, true); - vcsub = VariantContext.modifyAttributes(vcsub,newAts); - - logger.debug(String.format("VC %s subset to %s AC%n", vc.getSource(), vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY))); - - return vcsub; - } - - /** - * For a list of track names, bind the variant contexts to a trackName->sampleName->VariantContext mapping. - * Additional variant contexts per sample are automatically generated and added to the map unless the - * sample name matches the ALL_SAMPLE_NAME constant. - * - * @param tracker the metadata tracker - * @param ref the reference context - * @param trackNames the list of track names to process - * @param allowableTypes a set of allowable variation types - * @param byFilter if false, only accept PASSing VariantContexts. Otherwise, accept both PASSing and filtered sites - * @param trackPerSample if false, don't stratify per sample (and don't cut up the VariantContext like we would need to do this) - * @param allowNoCalls if false, don't accept no-call loci from a variant track - * @return a mapping of track names to a list of VariantContext objects - */ - private HashMap> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set trackNames, EnumSet allowableTypes, boolean byFilter, boolean trackPerSample, boolean allowNoCalls) { - HashMap> bindings = new HashMap>(); - - for ( String trackName : trackNames ) { - HashMap vcs = new HashMap(); - - Collection contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true); - VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null; - - // First, filter the VariantContext to represent only the samples for evaluation - if ( vc != null ) { - VariantContext vcsub = vc; - - if (vc.hasGenotypes() && vc.hasGenotypes(sampleNamesForEvaluation)) { - vcsub = getSubsetOfVariantContext(vc, sampleNamesForEvaluation); - } - - if ((byFilter || !vcsub.isFiltered()) && (allowNoCalls || vcsub.getType() != VariantContext.Type.NO_VARIATION)) { - vcs.put(ALL_SAMPLE_NAME, vcsub); - } - - // Now, if stratifying, split the subsetted vc per sample and add each as a new context - if ( vc.hasGenotypes() && trackPerSample ) { - for ( String sampleName : sampleNamesForEvaluation ) { - VariantContext samplevc = getSubsetOfVariantContext(vc, sampleName); - - if ((byFilter || !samplevc.isFiltered()) && (allowNoCalls || samplevc.getType() != VariantContext.Type.NO_VARIATION)) { - vcs.put(sampleName, samplevc); - } - } - } - - bindings.put(trackName, vcs); - } - } - - return bindings; - } - - /** - * Maps track names to sample name to VariantContext objects. For eval tracks, VariantContexts per specified - * sample are also included. - * - * @param tracker the metadata tracker - * @param ref the reference context - * @param compNames the list of comp names to process - * @param evalNames the list of eval names to process - * @return a mapping of track names to a list of VariantContext objects - */ - private HashMap> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set compNames, Set evalNames) { - HashMap> vcs = new HashMap>(); - - EnumSet allowableTypes = getAllowableVariationTypes(tracker, ref, compNames, evalNames); - - boolean byFilter = false; - boolean perSampleIsEnabled = false; - for (VariantStratifier vs : stratificationObjects) { - if (vs.getClass().getSimpleName().equals("Filter")) { - byFilter = true; - } else if (vs.getClass().getSimpleName().equals("Sample")) { - perSampleIsEnabled = true; - } - } - - HashMap> evalBindings = bindVariantContexts(tracker, ref, evalNames, allowableTypes, byFilter, perSampleIsEnabled, true); - HashMap> compBindings = bindVariantContexts(tracker, ref, compNames, allowableTypes, byFilter, false, false); - - vcs.putAll(compBindings); - vcs.putAll(evalBindings); - - return vcs; - } - - /** - * Recursively initialize the state keys used to look up the right evaluation context based on the state of the variant context - * - * @param stateMap the map of allowable states - * @param stateStack a stack of the states - * @param stateKey a state key object - * @param stateKeys all the state keys - * @return a list of state keys - */ - private ArrayList initializeStateKeys(HashMap> stateMap, Stack>> stateStack, StateKey stateKey, ArrayList stateKeys) { - if (stateStack == null) { - stateStack = new Stack>>(); - - for ( VariantStratifier vs : stateMap.keySet() ) { - HashMap> oneSetOfStates = new HashMap>(); - oneSetOfStates.put(vs, stateMap.get(vs)); - - stateStack.add(oneSetOfStates); - } - } - - if (!stateStack.isEmpty()) { - Stack>> newStateStack = new Stack>>(); - newStateStack.addAll(stateStack); - - HashMap> oneSetOfStates = newStateStack.pop(); - VariantStratifier vs = oneSetOfStates.keySet().iterator().next(); - - for ( String state : oneSetOfStates.get(vs)) { - StateKey newStateKey = new StateKey(); - if (stateKey != null) { - newStateKey.putAll(stateKey); - } - - newStateKey.put(vs.getClass().getSimpleName(), state); - - initializeStateKeys(stateMap, newStateStack, newStateKey, stateKeys); - } - } else { - stateKeys.add(stateKey); - - return stateKeys; - } - - return stateKeys; - } - - /** - * Return the number of samples being used - * @return the number of samples - */ - public int getNumSamples() { - return numSamples; - } - - /** - * Return the minimum phasing quality to be used with the GenotypePhasingEvaluator module - * @return the minimum phasing quality - */ - public double getMinPhaseQuality() { - return MIN_PHASE_QUALITY; - } - - /** - * Return the family structure to be used with the MendelianViolationEvaluator module - * @return the family structure string - */ - public String getFamilyStructure() { - return FAMILY_STRUCTURE; - } - - /** - * Return the mendelian violation qual threshold to be used with the MendelianViolationEvaluator module - * @return the mendelian violation qual threshold - */ - public double getMendelianViolationQualThreshold() { - return MENDELIAN_VIOLATION_QUAL_THRESHOLD; + report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationObjects); } /** @@ -659,32 +194,34 @@ public class VariantEvalWalker extends RodWalker implements Tr } } - // track sample vc - HashMap> vcs = getVariantContexts(tracker, ref, compNames, evalNames); + if (tracker != null) { + // track sample vc + HashMap> vcs = variantEvalUtils.getVariantContexts(tracker, ref, compNames, evalNames); - for ( String compName : compNames ) { - VariantContext comp = vcs.containsKey(compName) && vcs.get(compName) != null && vcs.get(compName).containsKey(ALL_SAMPLE_NAME) ? vcs.get(compName).get(ALL_SAMPLE_NAME) : null; + for ( String compName : compNames ) { + VariantContext comp = vcs.containsKey(compName) && vcs.get(compName) != null && vcs.get(compName).containsKey(ALL_SAMPLE_NAME) ? vcs.get(compName).get(ALL_SAMPLE_NAME) : null; - for ( String evalName : evalNames ) { - for ( String sampleName : sampleNamesForStratification ) { - VariantContext eval = vcs.containsKey(evalName) && vcs.get(evalName) != null ? vcs.get(evalName).get(sampleName) : null; + for ( String evalName : evalNames ) { + for ( String sampleName : sampleNamesForStratification ) { + VariantContext eval = vcs.containsKey(evalName) && vcs.get(evalName) != null ? vcs.get(evalName).get(sampleName) : null; - HashMap> stateMap = new HashMap>(); - for ( VariantStratifier vs : stratificationObjects ) { - ArrayList states = vs.getRelevantStates(ref, comp, compName, eval, evalName, sampleName); - stateMap.put(vs, states); - } + HashMap> stateMap = new HashMap>(); + for ( VariantStratifier vs : stratificationObjects ) { + ArrayList states = vs.getRelevantStates(ref, tracker, comp, compName, eval, evalName, sampleName); + stateMap.put(vs, states); + } - ArrayList stateKeys = new ArrayList(); - initializeStateKeys(stateMap, null, null, stateKeys); + ArrayList stateKeys = new ArrayList(); + variantEvalUtils.initializeStateKeys(stateMap, null, null, stateKeys); - HashSet stateKeysHash = new HashSet(stateKeys); + HashSet stateKeysHash = new HashSet(stateKeys); - for ( StateKey stateKey : stateKeysHash ) { - NewEvaluationContext nec = evaluationContexts.get(stateKey); + for ( StateKey stateKey : stateKeysHash ) { + NewEvaluationContext nec = evaluationContexts.get(stateKey); - synchronized (nec) { - nec.apply(tracker, ref, context, comp, eval); + synchronized (nec) { + nec.apply(tracker, ref, context, comp, eval); + } } } } @@ -694,38 +231,13 @@ public class VariantEvalWalker extends RodWalker implements Tr return null; } - /** - * A composite, 'reduce of reduces' function. - * - * @param lhs 'left-most' portion of data in the composite reduce. - * @param rhs 'right-most' portion of data in the composite reduce. - * @return The composite reduce type. - */ - public Integer treeReduce(Integer lhs, Integer rhs) { - return null; - } + public Integer treeReduce(Integer lhs, Integer rhs) { return null; } - /** - * Provide an initial value for reduce computations. - * - * @return Initial value of reduce. - */ @Override - public Integer reduceInit() { - return null; - } + public Integer reduceInit() { return null; } - /** - * Reduces a single map with the accumulator provided as the ReduceType. - * - * @param value result of the map. - * @param sum accumulator for the reduce. - * @return accumulator with result of the map taken into account. - */ @Override - public Integer reduce(Integer value, Integer sum) { - return null; - } + public Integer reduce(Integer value, Integer sum) { return null; } /** * Output the finalized report @@ -822,4 +334,31 @@ public class VariantEvalWalker extends RodWalker implements Tr report.print(out); } + + // Accessors + public Logger getLogger() { return logger; } + + public int getNumSamples() { return numSamples; } + + public double getMinPhaseQuality() { return MIN_PHASE_QUALITY; } + + public String getFamilyStructure() { return FAMILY_STRUCTURE; } + + public double getMendelianViolationQualThreshold() { return MENDELIAN_VIOLATION_QUAL_THRESHOLD; } + + public TreeSet getStratificationObjects() { return stratificationObjects; } + + public static String getAllSampleName() { return ALL_SAMPLE_NAME; } + + public Set getKnownNames() { return knownNames; } + + public Set getEvalNames() { return evalNames; } + + public Set getSampleNamesForEvaluation() { return sampleNamesForEvaluation; } + + public Set getSampleNamesForStratification() { return sampleNamesForStratification; } + + public Set getCompNames() { return compNames; } + + public Set getJexlExpressions() { return jexlExpressions; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java index 97e721a4b..e02154bb9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java @@ -19,18 +19,17 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; */ @Analysis(description = "The overlap between eval and comp sites") public class CompOverlap extends VariantEvaluator implements StandardEval { - @DataPoint(description = "number of eval SNP sites") - long nEvalSNPs = 0; + long nEvalVariants = 0; @DataPoint(description = "number of comp SNP sites") - long nCompSNPs = 0; + long nCompVariants = 0; @DataPoint(description = "number of eval sites outside of comp sites") long novelSites = 0; @DataPoint(description = "number of eval sites at comp sites") - long nSNPsAtComp = 0; + long nVariantsAtComp = 0; @DataPoint(description = "percentage of eval sites at comp sites") double compRate = 0.0; @@ -45,9 +44,9 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { return 2; // we need to see each eval track and each comp track } - public long nNovelSites() { return nEvalSNPs - nSNPsAtComp; } - public double compRate() { return rate(nSNPsAtComp, nEvalSNPs); } - public double concordanceRate() { return rate(nConcordant, nSNPsAtComp); } + public long nNovelSites() { return nEvalVariants - nVariantsAtComp; } + public double compRate() { return rate(nVariantsAtComp, nEvalVariants); } + public double concordanceRate() { return rate(nConcordant, nVariantsAtComp); } public void finalizeEvaluation() { compRate = 100 * compRate(); @@ -76,21 +75,18 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { } public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - //boolean expectingIndels = false; + boolean evalIsGood = eval != null; + boolean expectingIndels = eval != null && eval.isIndel(); - //boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ; - //boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ; + boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ; - boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP() ; - boolean evalIsGood = eval != null && eval.isSNP() ; - - if (compIsGood) nCompSNPs++; // count the number of comp events - if (evalIsGood) nEvalSNPs++; // count the number of eval events + if (compIsGood) nCompVariants++; // count the number of comp events + if (evalIsGood) nEvalVariants++; // count the number of eval events if (compIsGood && evalIsGood) { - nSNPsAtComp++; + nVariantsAtComp++; - if (!discordantP(eval, comp)) { // count whether we're concordant or not with the comp value + if (!discordantP(eval, comp)) { // count whether we're concordant or not with the comp value nConcordant++; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java index 26fa3baab..0fdef754f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java @@ -1,18 +1,19 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; +import org.apache.log4j.Logger; import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFConstants; -import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; -import org.broadinstitute.sting.utils.report.utils.TableType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.report.utils.TableType; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java index a2b548404..9fbcea5bc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java @@ -9,7 +9,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.phasing.AllelePair; import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java index 285695526..5364e7b87 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java @@ -4,9 +4,9 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.report.utils.TableType; /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java index 82c95a4ba..e61540c32 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java @@ -5,9 +5,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.report.utils.TableType; import java.util.ArrayList; @@ -157,8 +157,7 @@ public class IndelMetricsByAC extends VariantEvaluator { } public String toString() { - String returnString = ""; - return returnString; + return ""; } public void incrValue( VariantContext eval ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java index 03fa2f7c6..97a5669a7 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java @@ -474,10 +474,6 @@ public class IndelStatistics extends VariantEvaluator { return getName(); } - //public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) { - //return null; - //} - public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (eval != null ) { @@ -502,13 +498,9 @@ public class IndelStatistics extends VariantEvaluator { return null; // This module doesn't capture any interesting sites, so return null } - public String update0(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return null; - } public void finalizeEvaluation() { - // - int k=0; + int k=0; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index bf162d88d..6d73310c4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -3,18 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.List; import java.util.Arrays; -import java.util.regex.Pattern; +import java.util.List; import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Mendelian violation detection and counting diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java index b7e965ead..457ca4d4d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java @@ -11,8 +11,8 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Sample; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; -import org.broadinstitute.sting.utils.report.utils.TableType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.report.utils.TableType; import java.util.ArrayList; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java index f79211d62..735ca70eb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java @@ -31,9 +31,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.report.utils.TableType; import java.util.ArrayList; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java index cf3b70432..22483f224 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; @@ -13,7 +15,7 @@ public class CompRod extends VariantStratifier implements RequiredStratification private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { this.compNames = compNames; states = new ArrayList(); @@ -24,7 +26,7 @@ public class CompRod extends VariantStratifier implements RequiredStratification return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add(compName); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java index 20f3de1ad..9af9fd518 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; @@ -11,7 +13,7 @@ public class CpG extends VariantStratifier implements StandardStratification { private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { states = new ArrayList(); states.add("all"); states.add("CpG"); @@ -22,7 +24,7 @@ public class CpG extends VariantStratifier implements StandardStratification { return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { boolean isCpG = false; if (ref != null && ref.getBases() != null) { String fwRefBases = new String(ref.getBases()); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java index dbe2262dc..eccc737ff 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.HashMap; @@ -14,7 +16,7 @@ public class Degeneracy extends VariantStratifier { private HashMap degeneracies; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { states = new ArrayList(); states.add("1-fold"); states.add("2-fold"); @@ -52,7 +54,7 @@ public class Degeneracy extends VariantStratifier { return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("all"); @@ -61,10 +63,9 @@ public class Degeneracy extends VariantStratifier { String type = null; String aa = null; - if (eval.getAttributeAsString("refseq.functionalClass") != null) { - type = eval.getAttributeAsString("refseq.functionalClass"); + if (eval.hasAttribute("refseq.functionalClass")) { aa = eval.getAttributeAsString("refseq.variantAA"); - } else if (eval.getAttributeAsString("refseq.functionalClass_1") != null) { + } else if (eval.hasAttribute("refseq.functionalClass_1")) { int annotationId = 1; String key; @@ -85,7 +86,7 @@ public class Degeneracy extends VariantStratifier { } annotationId++; - } while (eval.getAttributeAsString(key) != null); + } while (eval.hasAttribute(key)); } if (aa != null && degeneracies.containsKey(aa)) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java index 1f475bb28..faf0ab245 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; @@ -13,7 +15,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { this.evalNames = evalNames; states = new ArrayList(); @@ -24,7 +26,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add(evalName); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java index 35c97c704..316300396 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; @@ -12,7 +14,7 @@ public class Filter extends VariantStratifier { private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { states = new ArrayList(); states.add("called"); states.add("filtered"); @@ -23,7 +25,7 @@ public class Filter extends VariantStratifier { return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("raw"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index 4db03e1ee..609e604a6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; @@ -12,7 +14,7 @@ public class FunctionalClass extends VariantStratifier { private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { states = new ArrayList(); states.add("all"); states.add("silent"); @@ -24,7 +26,7 @@ public class FunctionalClass extends VariantStratifier { return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("all"); @@ -32,9 +34,9 @@ public class FunctionalClass extends VariantStratifier { if (eval != null && eval.isVariant()) { String type = null; - if (eval.getAttributeAsString("refseq.functionalClass") != null) { + if (eval.hasAttribute("refseq.functionalClass")) { type = eval.getAttributeAsString("refseq.functionalClass"); - } else if (eval.getAttributeAsString("refseq.functionalClass_1") != null) { + } else if (eval.hasAttribute("refseq.functionalClass_1")) { int annotationId = 1; String key; @@ -43,7 +45,7 @@ public class FunctionalClass extends VariantStratifier { String newtype = eval.getAttributeAsString(key); - if ( newtype != null && + if ( newtype != null && !newtype.equalsIgnoreCase("null") && ( type == null || ( type.equals("silent") && !newtype.equals("silent") ) || ( type.equals("missense") && newtype.equals("nonsense") ) ) @@ -52,7 +54,7 @@ public class FunctionalClass extends VariantStratifier { } annotationId++; - } while (eval.getAttributeAsString(key) != null); + } while (eval.hasAttribute(key)); } if (type != null) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java index 4e94ef03e..ce980e3f5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java @@ -3,22 +3,24 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; public class JexlExpression extends VariantStratifier implements StandardStratification { // needs to know the jexl expressions - private Set jexlExpressions; + private Set jexlExpressions; private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { this.jexlExpressions = jexlExpressions; states = new ArrayList(); states.add("none"); - for ( VariantContextUtils.JexlVCMatchExp jexlExpression : jexlExpressions ) { + for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) { states.add(jexlExpression.name); } } @@ -27,11 +29,11 @@ public class JexlExpression extends VariantStratifier implements StandardStratif return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("none"); - for ( VariantContextUtils.JexlVCMatchExp jexlExpression : jexlExpressions ) { + for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) { if (eval != null && VariantContextUtils.match(eval, jexlExpression)) { relevantStates.add(jexlExpression.name); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index a4a1822ce..e55bd496e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -3,8 +3,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; +import java.util.Collection; +import java.util.EnumSet; import java.util.Set; public class Novelty extends VariantStratifier implements StandardStratification { @@ -13,7 +17,7 @@ public class Novelty extends VariantStratifier implements StandardStratification private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { this.knownNames = knownNames; states = new ArrayList(); @@ -26,11 +30,29 @@ public class Novelty extends VariantStratifier implements StandardStratification return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { - ArrayList relevantStates = new ArrayList(); + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + boolean isNovel = true; + if (tracker != null) { + for (String knownName : knownNames) { + if (tracker.hasROD(knownName)) { + EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION); + if (eval != null) { + allowableTypes.add(eval.getType()); + } + + Collection knownComps = tracker.getVariantContexts(ref, knownName, allowableTypes, ref.getLocus(), true, true); + + isNovel = knownComps.size() == 0; + + break; + } + } + } + + ArrayList relevantStates = new ArrayList(); relevantStates.add("all"); - relevantStates.add(comp == null ? "novel" : "known"); + relevantStates.add(isNovel ? "novel" : "known"); return relevantStates; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java index 4c73031d8..d95ede0b2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java @@ -3,7 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; @@ -13,7 +14,7 @@ public class Sample extends VariantStratifier { private ArrayList samples; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { samples = new ArrayList(); samples.addAll(sampleNames); } @@ -22,7 +23,7 @@ public class Sample extends VariantStratifier { return samples; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add(sampleName); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index 5717958de..8bfdcc3d1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -4,18 +4,19 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; public abstract class VariantStratifier implements Comparable { - public abstract void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames); + public abstract void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames); public ArrayList getAllStates() { return new ArrayList(); } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { return null; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java index 4faa9c42f..44248e169 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java @@ -52,40 +52,35 @@ public class NewEvaluationContext extends HashMap { public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) { for ( VariantEvaluator evaluation : evaluationInstances.values() ) { - //synchronized ( this ) { - // we always call update0 in case the evaluation tracks things like number of bases covered - //evaluation.update0(tracker, ref, context); + // we always call update0 in case the evaluation tracks things like number of bases covered - // the other updateN methods don't see a null context - if ( tracker == null ) - continue; + // the other updateN methods don't see a null context + if ( tracker == null ) + continue; - // now call the single or paired update function - switch ( evaluation.getComparisonOrder() ) { - case 1: - if (eval != null) { - evaluation.update1(eval, tracker, ref, context); - } + // now call the single or paired update function + switch ( evaluation.getComparisonOrder() ) { + case 1: + if (eval != null) { + evaluation.update1(eval, tracker, ref, context); + } - break; - case 2: - //if (eval != null) { - evaluation.update2(eval, comp, tracker, ref, context); - //} + break; + case 2: + //if (eval != null) { + evaluation.update2(eval, comp, tracker, ref, context); + //} - break; - default: - throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation); - } - //} + break; + default: + throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation); + } } } public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - //synchronized (this) { - for ( VariantEvaluator evaluation : evaluationInstances.values() ) { - evaluation.update0(tracker, ref, context); - } - //} + for ( VariantEvaluator evaluation : evaluationInstances.values() ) { + evaluation.update0(tracker, ref, context); + } } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 9ae1b58d3..fc57e97c0 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -9,6 +9,9 @@ import java.util.HashMap; import java.util.Map; public class VariantEvalIntegrationTest extends WalkerTest { + private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval"; + private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; + private static String cmdRoot = "-T VariantEval" + " -R " + b36KGReference; @@ -18,18 +21,301 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; private static String rootGZ = cmdRoot + - " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + - " -B:eval,VCF " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + - " -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz"; + " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -B:eval,VCF " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + + " -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz"; private static String[] testsEnumerations = {root, rootGZ}; + private String cmdLineBuilder(String ... arguments) { + String cmdline = ""; + + for ( int argIndex = 0; argIndex < arguments.length; argIndex++ ) { + cmdline += arguments[argIndex]; + + if (argIndex < arguments.length - 1) { + cmdline += " "; + } + } + + return cmdline; + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndels() { +// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 +// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -c PASS = 9 +// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 4 +// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5 +// variantRate = nVariantLoci / nProcessedLoci = 0.131578947 +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3 +// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1 +// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1 +// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4 +// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8 +// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10 +// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5 + + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("476b495de54e1a377c6895c02a6fdf6a") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() { +// nProcessedLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 +// nCalledLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }'= 3 +// nVariantLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | wc -l = 3 +// variantRateKnown = nVariantLoci / nProcessedLoci = 0.0789473684 +// nSNPsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($5) == 1) print $0 }' | wc -l = 3 +// nInsertionsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 0 +// nDeletionsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 0 +// nNoCallsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 0 +// nHetsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 3 +// nHomRefKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 1 +// nHomVarKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5 + + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Novelty", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("9f4e4fff339e725f42d65063e43e7d1c") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() { +// nProcessedLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 +// nCalledLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -vc PASS = 3 +// nRefLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 1 +// nVariantLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 2 +// nSNPsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 1 +// nInsertionsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 0 +// nDeletionsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1 +// nNoCallsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 3 +// nHetsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 1 +// nHomRefFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 2 +// nHomVarFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 3 + + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Novelty", + "-ST Filter", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("369fa4f37bcc03b8a0bc1e58bf22bf0a") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithCpG() { +// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 +// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep -c PASS = 8 +// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 3 +// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5 +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3 +// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1 +// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1 +// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4 +// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8 +// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10 +// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5 + + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST CpG", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("891ad0d38f1a1b08b31fe1cb6a3afc04") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithFunctionalClasses() { + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST FunctionalClass", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("d588179e2d9ed6e92a6ae1a80ac04270") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithDegeneracy() { + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Degeneracy", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("ceb0f5d9e0ea99eb8d00bce2f7bc1b73") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithSample() { +// HG00513 +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) == 1 && length($5) == 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 3 +// nInsertions = $ grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) == 1 && length($5) > 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 1 +// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) > 1 && length($5) == 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 0 +// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 2 +// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "0/1") print $0 }' | wc -l = 2 +// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "0/0") print $0 }' | wc -l = 3 +// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "1/1") print $0 }' | wc -l = 2 + + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Sample", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("94ce29b34b9e2e4304fc1bbf3f971a7d") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithJexlExpression() { +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $0 }' | wc -l = 7 +// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 == ".") print $0 }' | wc -l = 4 +// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 != ".") print $0 }' | wc -l = 3 +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && length($5) == 1 && length($6) == 1 && $6 != ".") print $0 }' | wc -l = 3 +// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/0' = 9 +// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/1' = 3 +// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '1/1' = 5 +// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '\.\/\.' = 4 + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST JexlExpression", + "-select 'DP < 20'", + "-selectName DepthSelect", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("96de32970b204816ecd9a120b9d8782b") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithMultipleJexlExpressions() { +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $0 }' | wc -l = 7 +// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 == ".") print $0 }' | wc -l = 4 +// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 != ".") print $0 }' | wc -l = 3 +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && length($5) == 1 && length($6) == 1 && $6 != ".") print $0 }' | wc -l = 3 +// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/0' = 9 +// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/1' = 3 +// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '1/1' = 5 +// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '\.\/\.' = 4 + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST JexlExpression", + "-select 'DP < 20'", + "-selectName DepthLt20", + "-select 'DP > 20'", + "-selectName DepthGt20", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("aea882132eb6afdc93fbc70e8d6c50e2") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); + } + @Test public void testSelect1() { String extraArgs = "-L 1:1-10,000,000"; for (String tests : testsEnumerations) { WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s", - 1, Arrays.asList("c803c90f587796146286217ab30930a3")); + 1, Arrays.asList("4184a8d44f8c559c904e41edf464a467")); executeTestParallel("testSelect1", spec); //executeTest("testSelect1", spec); } @@ -50,7 +336,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { for (String vcfFile : vcfFiles) { WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B:eval,VCF " + validationDataLocation + vcfFile + " -B:comp,VCF " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", 1, - Arrays.asList("bb16335f9510bcab2bd14a4299afd879")); + Arrays.asList("1387fcf8d5c53ff2c820fe79cc999bcf")); executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); //executeTest("testVEGenotypeConcordance" + vcfFile, spec); } @@ -60,8 +346,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testVESimple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "47990cfb955720421c29991954af4450"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "ce7bb86e6281b1aef1ad94f9ba7301a9"); + expectations.put("-L 1:1-10,000,000", "b28516b4d3627d2eb017a5449284a4e4"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "a49d897095586ddb72bfe9faf0291312"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -84,10 +370,9 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:comp_hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String matchingMD5 = "6388fbad81b0f281298812496fd3ed6c"; - expectations.put("", matchingMD5); - expectations.put(" -knownName comp_hapmap -knownName dbsnp", matchingMD5); - expectations.put(" -knownName comp_hapmap", "6388fbad81b0f281298812496fd3ed6c"); + expectations.put("", "65d74eb7eea2355989c389e8fa886c06"); + expectations.put(" -knownName comp_hapmap -knownName dbsnp", "776827cd5a6fcaf8b8508813e8dc023c"); + expectations.put(" -knownName comp_hapmap", "e99a89cbca2027c983edc00d31ea4ec9"); for (String tests : testsEnumerations) { for (Map.Entry entry : expectations.entrySet()) { String extraArgs2 = entry.getKey(); @@ -102,6 +387,79 @@ public class VariantEvalIntegrationTest extends WalkerTest { } } + @Test + public void testCompVsEvalAC() { + String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -EV GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("49cb4a6126c5383abd9a49a6c22b8d93")); + executeTestParallel("testCompVsEvalAC",spec); + //executeTest("testCompVsEvalAC",spec); + } + + private static String withSelect(String cmd, String select, String name) { + return String.format("%s -select '%s' -selectName %s", cmd, select, name); + } + + @Test + public void testTranches() { + String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -tf " + testDir + "tranches.6.txt"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("68044a69f03ba4cc11d2061cc96e9eb5")); + executeTestParallel("testTranches",spec); + //executeTest("testTranches",spec); + } + + @Test + public void testCompOverlap() { + String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e8b5561eb60ea98a9be4a45abee00e07")); + executeTestParallel("testCompOverlap",spec); + //executeTest("testCompOverlap",spec); + } + + @Test + public void testEvalTrackWithoutGenotypes() { + String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod"; + + String extraArgs = "-T VariantEval -R " + + b37KGReference + + " -L 20" + + " -D " + dbsnp + + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9323a6ad62dedbdb08752411960db60f")); + executeTestParallel("testEvalTrackWithoutGenotypes",spec); + } + + @Test + public void testMultipleEvalTracksWithoutGenotypes() { + String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod"; + + String extraArgs = "-T VariantEval -R " + b37KGReference + + " -L 20" + + " -D " + dbsnp + + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + + " -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ef23195331affd332af1de9d261fdd0a")); + executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); + } + + @Test + public void testMultipleCompTracks() { + String dbsnp = GATKDataLocation + "dbsnp_132_b37.vcf"; + + String extraArgs = "-T VariantEval" + + " -R " + b37KGReference + + " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + + " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + + " -B:dbsnp,VCF " + dbsnp + + " -L 20:10000000-10100000" + + " -noST -noEV -ST Novelty -EV CompOverlap" + + " -o %s"; + + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("3fced8e5fa7a1c952d08fead0accd3fb")); + executeTestParallel("testMultipleCompTracks",spec); + } + // @Test // public void testVEGenomicallyAnnotated() { // String vecmd = "-T VariantEval" + @@ -129,81 +487,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { // //executeTest("testVEWriteVCF", spec); // } // } - - @Test - public void testCompVsEvalAC() { - String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -EV GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d8d59ec86ec9e00abad4ec44741de22f")); - executeTestParallel("testCompVsEvalAC",spec); - //executeTest("testCompVsEvalAC",spec); - } - - private static String withSelect(String cmd, String select, String name) { - return String.format("%s -select '%s' -selectName %s", cmd, select, name); - } - - @Test - public void testTranches() { - String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -tf " + testDir + "tranches.6.txt"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("68044a69f03ba4cc11d2061cc96e9eb5")); - executeTestParallel("testTranches",spec); - //executeTest("testTranches",spec); - } - - @Test - public void testCompOverlap() { - String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("81377be26bf8fa32339d01c173428f7d")); - executeTestParallel("testCompOverlap",spec); - //executeTest("testCompOverlap",spec); - } - - @Test - public void testEvalTrackWithoutGenotypes() { - String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod"; - - String extraArgs = "-T VariantEval -R " + - b37KGReference + - " -L 20" + - " -D " + dbsnp + - " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + - " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("2e2c24b49f699506b967befbde5a6fa8")); - executeTestParallel("testEvalTrackWithoutGenotypes",spec); - } - - @Test - public void testMultipleEvalTracksWithoutGenotypes() { - String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod"; - - String extraArgs = "-T VariantEval -R " + b37KGReference + - " -L 20" + - " -D " + dbsnp + - " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + - " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + - " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("144053b8bef5a79b23d0abd17b561294")); - executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); - } - - @Test - public void testMultipleCompTracks() { - String dbsnp = GATKDataLocation + "dbsnp_132_b37.vcf"; - - String extraArgs = "-T VariantEval" + - " -R " + b37KGReference + - " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + - " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + - " -B:dbsnp,VCF " + dbsnp + - " -L 20:10000000-10100000" + - " -noST -noEV -ST Novelty -EV CompOverlap" + - " -o %s"; - - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("2d2c6e7850ec964624bb032d24834e2f")); - executeTestParallel("testMultipleCompTracks",spec); - } - - +// // @Test // public void testVEValidatePass() { // String extraArgs = "-L 1:1-10,000,000"; @@ -227,4 +511,5 @@ public class VariantEvalIntegrationTest extends WalkerTest { // private static String withValidateTiTv(String cmd, double min, double max) { // return String.format("%s -validate 'eval.comp_genotypes.all.called.all.titv.tiTvRatio >= %2$s' -validate 'eval.comp_genotypes.all.called.all.titv.tiTvRatio <= %3$s'", cmd, min, max); // } + }