diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 0aae62c5c..6f65132c2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.apache.log4j.Logger; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broad.tribble.vcf.VCFConstants; import org.broad.tribble.vcf.VCFHeader; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; @@ -17,21 +17,14 @@ import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; -import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; -import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.RequiredStratification; -import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.StandardStratification; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.AnalysisModuleScanner; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.*; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.report.utils.TableType; @@ -43,7 +36,7 @@ import java.lang.reflect.Field; import java.util.*; /** - * General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ts/Tv ratios, and a lot more) + * General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more) */ @Reference(window=@Window(start=-50, stop=50)) public class VariantEvalWalker extends RodWalker implements TreeReducible { @@ -99,7 +92,7 @@ public class VariantEvalWalker extends RodWalker implements Tr private String TRANCHE_FILENAME = null; // Variables - private Set jexlExpressions = new TreeSet(); + private Set jexlExpressions = new TreeSet(); private Set compNames = new TreeSet(); private Set knownNames = new TreeSet(); private Set evalNames = new TreeSet(); @@ -118,240 +111,17 @@ public class VariantEvalWalker extends RodWalker implements Tr private GATKReport report = null; // Public constants - public static String ALL_SAMPLE_NAME = "all"; + private static String ALL_SAMPLE_NAME = "all"; - /** - * List all of the available evaluation modules, then exit successfully - */ - private void listModulesAndExit() { - List> vsClasses = new PluginManager( VariantStratifier.class ).getPlugins(); - List> veClasses = new PluginManager( VariantEvaluator.class ).getPlugins(); - - logger.info("Available stratification modules:"); - logger.info("(Standard modules are starred)"); - for (Class vsClass : vsClasses) { - logger.info("\t" + vsClass.getSimpleName() + (RequiredStratification.class.isAssignableFrom(vsClass) || StandardStratification.class.isAssignableFrom(vsClass) ? "*" : "")); - } - logger.info(""); - - logger.info("Available evaluation modules:"); - logger.info("(Standard modules are starred)"); - for (Class veClass : veClasses) { - logger.info("\t" + veClass.getSimpleName() + (StandardEval.class.isAssignableFrom(veClass) ? "*" : "")); - } - logger.info(""); - - System.exit(0); - } - - /** - * Initialize required, standard and user-specified stratification objects - * - * @param noStandardStrats don't use the standard stratifications - * @param modulesToUse the list of stratification modules to use - * @return set of stratifications to use - */ - private TreeSet initializeStratificationObjects(boolean noStandardStrats, String[] modulesToUse) { - TreeSet strats = new TreeSet(); - Set stratsToUse = new HashSet(); - - // Create a map for all stratification modules for easy lookup. - HashMap> classMap = new HashMap>(); - for ( Class c : new PluginManager( VariantStratifier.class ).getPlugins() ) { - classMap.put(c.getSimpleName(), c); - } - - // We must use all required stratification modules. - for ( Class reqClass : new PluginManager( RequiredStratification.class ).getPlugins() ) { - if ( classMap.containsKey(reqClass.getSimpleName()) ) { - stratsToUse.add(reqClass.getSimpleName()); - } - } - - // By default, use standard stratification modules. - if ( !noStandardStrats ) { - for ( Class stdClass : new PluginManager( StandardStratification.class ).getPlugins() ) { - if ( classMap.containsKey(stdClass.getSimpleName()) ) { - stratsToUse.add(stdClass.getSimpleName()); - } - } - } - - // Now add the user-selected modules - stratsToUse.addAll(Arrays.asList(modulesToUse)); - - // Instantiate the stratifications - for ( String module : stratsToUse ) { - if ( !classMap.containsKey(module) ) { - throw new UserException.CommandLineException("Module " + module + " could not be found; please check that you have specified the class name correctly"); - } - - if ( classMap.containsKey(module) ) { - Class c = classMap.get(module); - - try { - VariantStratifier vs = c.newInstance(); - vs.initialize(jexlExpressions, compNames, knownNames, evalNames, sampleNamesForStratification); - - strats.add(vs); - } catch (InstantiationException e) { - throw new StingException("Unable to instantiate stratification module '" + c.getSimpleName() + "'"); - } catch (IllegalAccessException e) { - throw new StingException("Illegal access error when trying to instantiate stratification module '" + c.getSimpleName() + "'"); - } - } - } - - return strats; - } - - /** - * Initialize required, standard and user-specified evaluation objects - * - * @param noStandardEvals don't use the standard evaluations - * @param modulesToUse the list of evaluation modules to use - * @return set of evaluations to use - */ - private Set> initializeEvaluationObjects(boolean noStandardEvals, String[] modulesToUse) { - Set> evals = new HashSet>(); - - // Create a map for all eval modules for easy lookup. - HashMap> classMap = new HashMap>(); - for ( Class c : new PluginManager( VariantEvaluator.class ).getPlugins() ) { - classMap.put(c.getSimpleName(), c); - } - - // By default, use standard eval modules. - if ( !noStandardEvals ) { - for ( Class stdClass : new PluginManager( StandardEval.class ).getPlugins() ) { - if ( classMap.containsKey(stdClass.getSimpleName()) ) { - evals.add(classMap.get(stdClass.getSimpleName())); - } - } - } - - // Get the specific classes provided. - for ( String module : modulesToUse ) { - if ( !classMap.containsKey(module) ) { - throw new UserException.CommandLineException("Module " + module + " could not be found; please check that you have specified the class name correctly"); - } - - if ( classMap.containsKey(module) ) { - evals.add(classMap.get(module)); - } - } - - return evals; - } - - /** - * Recursively initialize the evaluation contexts - * - * @param stratificationObjects the stratifications to use - * @param evaluationObjects the evaluations to use - * @param stratStack a stack of stratifications to apply - * @param ec evaluation context - * @return a map of all the evaluation contexts - */ - private HashMap initializeEvaluationContexts(Set stratificationObjects, Set> evaluationObjects, Stack stratStack, NewEvaluationContext ec) { - HashMap ecs = new HashMap(); - - if (stratStack == null) { - stratStack = new Stack(); - stratStack.addAll(stratificationObjects); - } - - if (!stratStack.isEmpty()) { - Stack newStratStack = new Stack(); - newStratStack.addAll(stratStack); - - VariantStratifier vs = newStratStack.pop(); - - for ( String state : vs.getAllStates() ) { - NewEvaluationContext nec = new NewEvaluationContext(); - if (ec != null) { - nec.putAll(ec); - } - nec.put(vs, state); - - ecs.putAll(initializeEvaluationContexts(stratificationObjects, evaluationObjects, newStratStack, nec)); - } - } else { - HashMap necs = new HashMap(); - - StateKey stateKey = new StateKey(); - for ( VariantStratifier vs : ec.keySet() ) { - String state = ec.get(vs); - - stateKey.put(vs.getClass().getSimpleName(), state); - } - - ec.addEvaluationClassList(this, stateKey, evaluationObjects); - - necs.put(stateKey, ec); - - return necs; - } - - return ecs; - } - - /** - * Initialize the output report - * - * @param stratificationObjects the stratifications to use - * @param evaluationObjects the evaluations to use - * @return an initialized report object - */ - private GATKReport initializeGATKReport(Set stratificationObjects, Set> evaluationObjects) { - GATKReport report = new GATKReport(); - - for ( Class ve : evaluationObjects ) { - String tableName = ve.getSimpleName(); - String tableDesc = ve.getAnnotation(Analysis.class).description(); - - report.addTable(tableName, tableDesc); - - GATKReportTable table = report.getTable(tableName); - table.addPrimaryKey("entry", false); - table.addColumn(tableName, tableName); - - for ( VariantStratifier vs : stratificationObjects ) { - String columnName = vs.getClass().getSimpleName(); - - table.addColumn(columnName, "unknown"); - } - - try { - VariantEvaluator vei = ve.newInstance(); - vei.initialize(this); - - AnalysisModuleScanner scanner = new AnalysisModuleScanner(vei); - Map datamap = scanner.getData(); - - for (Field field : datamap.keySet()) { - field.setAccessible(true); - - if (! (field.get(vei) instanceof TableType) ) { - table.addColumn(field.getName(), 0.0); - } - } - } catch (InstantiationException e) { - throw new StingException("InstantiationException: " + e); - } catch (IllegalAccessException e) { - throw new StingException("IllegalAccessException: " + e); - } - } - - return report; - } + // Utility class + private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this); /** * Initialize the stratifications, evaluations, evaluation contexts, and reporting object */ public void initialize() { // Just list the modules, and exit quickly. - if (LIST) { listModulesAndExit(); } + if (LIST) { variantEvalUtils.listModulesAndExit(); } // Categorize each rod as an eval or a comp rod. for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { @@ -386,7 +156,11 @@ public class VariantEvalWalker extends RodWalker implements Tr sampleNamesForStratification.add(ALL_SAMPLE_NAME); // Initialize select expressions - jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)); + //jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)); + for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) { + SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp); + jexlExpressions.add(sjexl); + } // Add select expressions for anything in the tranches file if ( TRANCHE_FILENAME != null ) { @@ -399,253 +173,14 @@ public class VariantEvalWalker extends RodWalker implements Tr } // Initialize the set of stratifications and evaluations to use - stratificationObjects = initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); - Set> evaluationObjects = initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); + stratificationObjects = variantEvalUtils.initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); + Set> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); // Initialize the evaluation contexts - evaluationContexts = initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null); + evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null); // Initialize report table - report = initializeGATKReport(stratificationObjects, evaluationObjects); - } - - /** - * Figure out what the allowable variation types are based on the eval context - * - * @param tracker the reference metadata tracker - * @param ref the reference context - * @param compNames the comp track names - * @param evalNames the evaluation track names - * @return the set of allowable variation types - */ - private EnumSet getAllowableVariationTypes(RefMetaDataTracker tracker, ReferenceContext ref, Set compNames, Set evalNames) { - EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION); - - if (tracker != null) { - Collection evalvcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false); - - for ( VariantContext vc : evalvcs ) { - allowableTypes.add(vc.getType()); - } - - if (allowableTypes.size() == 1) { - // We didn't find any variation in the eval track, so now let's look at the comp track for allowable types - Collection compvcs = tracker.getVariantContexts(ref, compNames, null, ref.getLocus(), true, false); - - for ( VariantContext vc : compvcs ) { - allowableTypes.add(vc.getType()); - } - } - } - - return allowableTypes; - } - - /** - * Subset a VariantContext to a single sample - * - * @param vc the VariantContext object containing multiple samples - * @param sampleName the sample to pull out of the VariantContext - * @return a new VariantContext with just the requested sample - */ - private VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) { - ArrayList sampleNames = new ArrayList(); - sampleNames.add(sampleName); - - return getSubsetOfVariantContext(vc, sampleNames); - } - - /** - * Subset a VariantContext to a set of samples - * - * @param vc the VariantContext object containing multiple samples - * @param sampleNames the samples to pull out of the VariantContext - * @return a new VariantContext with just the requested samples - */ - private VariantContext getSubsetOfVariantContext(VariantContext vc, Collection sampleNames) { - VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values()); - - HashMap newAts = new HashMap(vcsub.getAttributes()); - - int originalAlleleCount = vc.getHetCount() + 2*vc.getHomVarCount(); - int newAlleleCount = vcsub.getHetCount() + 2*vcsub.getHomVarCount(); - - if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) { - newAts.put("ISSINGLETON", true); - } - - VariantContextUtils.calculateChromosomeCounts(vcsub, newAts, true); - vcsub = VariantContext.modifyAttributes(vcsub,newAts); - - logger.debug(String.format("VC %s subset to %s AC%n", vc.getSource(), vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY))); - - return vcsub; - } - - /** - * For a list of track names, bind the variant contexts to a trackName->sampleName->VariantContext mapping. - * Additional variant contexts per sample are automatically generated and added to the map unless the - * sample name matches the ALL_SAMPLE_NAME constant. - * - * @param tracker the metadata tracker - * @param ref the reference context - * @param trackNames the list of track names to process - * @param allowableTypes a set of allowable variation types - * @param byFilter if false, only accept PASSing VariantContexts. Otherwise, accept both PASSing and filtered sites - * @param trackPerSample if false, don't stratify per sample (and don't cut up the VariantContext like we would need to do this) - * @param allowNoCalls if false, don't accept no-call loci from a variant track - * @return a mapping of track names to a list of VariantContext objects - */ - private HashMap> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set trackNames, EnumSet allowableTypes, boolean byFilter, boolean trackPerSample, boolean allowNoCalls) { - HashMap> bindings = new HashMap>(); - - for ( String trackName : trackNames ) { - HashMap vcs = new HashMap(); - - Collection contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true); - VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null; - - // First, filter the VariantContext to represent only the samples for evaluation - if ( vc != null ) { - VariantContext vcsub = vc; - - if (vc.hasGenotypes() && vc.hasGenotypes(sampleNamesForEvaluation)) { - vcsub = getSubsetOfVariantContext(vc, sampleNamesForEvaluation); - } - - if ((byFilter || !vcsub.isFiltered()) && (allowNoCalls || vcsub.getType() != VariantContext.Type.NO_VARIATION)) { - vcs.put(ALL_SAMPLE_NAME, vcsub); - } - - // Now, if stratifying, split the subsetted vc per sample and add each as a new context - if ( vc.hasGenotypes() && trackPerSample ) { - for ( String sampleName : sampleNamesForEvaluation ) { - VariantContext samplevc = getSubsetOfVariantContext(vc, sampleName); - - if ((byFilter || !samplevc.isFiltered()) && (allowNoCalls || samplevc.getType() != VariantContext.Type.NO_VARIATION)) { - vcs.put(sampleName, samplevc); - } - } - } - - bindings.put(trackName, vcs); - } - } - - return bindings; - } - - /** - * Maps track names to sample name to VariantContext objects. For eval tracks, VariantContexts per specified - * sample are also included. - * - * @param tracker the metadata tracker - * @param ref the reference context - * @param compNames the list of comp names to process - * @param evalNames the list of eval names to process - * @return a mapping of track names to a list of VariantContext objects - */ - private HashMap> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set compNames, Set evalNames) { - HashMap> vcs = new HashMap>(); - - EnumSet allowableTypes = getAllowableVariationTypes(tracker, ref, compNames, evalNames); - - boolean byFilter = false; - boolean perSampleIsEnabled = false; - for (VariantStratifier vs : stratificationObjects) { - if (vs.getClass().getSimpleName().equals("Filter")) { - byFilter = true; - } else if (vs.getClass().getSimpleName().equals("Sample")) { - perSampleIsEnabled = true; - } - } - - HashMap> evalBindings = bindVariantContexts(tracker, ref, evalNames, allowableTypes, byFilter, perSampleIsEnabled, true); - HashMap> compBindings = bindVariantContexts(tracker, ref, compNames, allowableTypes, byFilter, false, false); - - vcs.putAll(compBindings); - vcs.putAll(evalBindings); - - return vcs; - } - - /** - * Recursively initialize the state keys used to look up the right evaluation context based on the state of the variant context - * - * @param stateMap the map of allowable states - * @param stateStack a stack of the states - * @param stateKey a state key object - * @param stateKeys all the state keys - * @return a list of state keys - */ - private ArrayList initializeStateKeys(HashMap> stateMap, Stack>> stateStack, StateKey stateKey, ArrayList stateKeys) { - if (stateStack == null) { - stateStack = new Stack>>(); - - for ( VariantStratifier vs : stateMap.keySet() ) { - HashMap> oneSetOfStates = new HashMap>(); - oneSetOfStates.put(vs, stateMap.get(vs)); - - stateStack.add(oneSetOfStates); - } - } - - if (!stateStack.isEmpty()) { - Stack>> newStateStack = new Stack>>(); - newStateStack.addAll(stateStack); - - HashMap> oneSetOfStates = newStateStack.pop(); - VariantStratifier vs = oneSetOfStates.keySet().iterator().next(); - - for ( String state : oneSetOfStates.get(vs)) { - StateKey newStateKey = new StateKey(); - if (stateKey != null) { - newStateKey.putAll(stateKey); - } - - newStateKey.put(vs.getClass().getSimpleName(), state); - - initializeStateKeys(stateMap, newStateStack, newStateKey, stateKeys); - } - } else { - stateKeys.add(stateKey); - - return stateKeys; - } - - return stateKeys; - } - - /** - * Return the number of samples being used - * @return the number of samples - */ - public int getNumSamples() { - return numSamples; - } - - /** - * Return the minimum phasing quality to be used with the GenotypePhasingEvaluator module - * @return the minimum phasing quality - */ - public double getMinPhaseQuality() { - return MIN_PHASE_QUALITY; - } - - /** - * Return the family structure to be used with the MendelianViolationEvaluator module - * @return the family structure string - */ - public String getFamilyStructure() { - return FAMILY_STRUCTURE; - } - - /** - * Return the mendelian violation qual threshold to be used with the MendelianViolationEvaluator module - * @return the mendelian violation qual threshold - */ - public double getMendelianViolationQualThreshold() { - return MENDELIAN_VIOLATION_QUAL_THRESHOLD; + report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationObjects); } /** @@ -659,32 +194,34 @@ public class VariantEvalWalker extends RodWalker implements Tr } } - // track sample vc - HashMap> vcs = getVariantContexts(tracker, ref, compNames, evalNames); + if (tracker != null) { + // track sample vc + HashMap> vcs = variantEvalUtils.getVariantContexts(tracker, ref, compNames, evalNames); - for ( String compName : compNames ) { - VariantContext comp = vcs.containsKey(compName) && vcs.get(compName) != null && vcs.get(compName).containsKey(ALL_SAMPLE_NAME) ? vcs.get(compName).get(ALL_SAMPLE_NAME) : null; + for ( String compName : compNames ) { + VariantContext comp = vcs.containsKey(compName) && vcs.get(compName) != null && vcs.get(compName).containsKey(ALL_SAMPLE_NAME) ? vcs.get(compName).get(ALL_SAMPLE_NAME) : null; - for ( String evalName : evalNames ) { - for ( String sampleName : sampleNamesForStratification ) { - VariantContext eval = vcs.containsKey(evalName) && vcs.get(evalName) != null ? vcs.get(evalName).get(sampleName) : null; + for ( String evalName : evalNames ) { + for ( String sampleName : sampleNamesForStratification ) { + VariantContext eval = vcs.containsKey(evalName) && vcs.get(evalName) != null ? vcs.get(evalName).get(sampleName) : null; - HashMap> stateMap = new HashMap>(); - for ( VariantStratifier vs : stratificationObjects ) { - ArrayList states = vs.getRelevantStates(ref, comp, compName, eval, evalName, sampleName); - stateMap.put(vs, states); - } + HashMap> stateMap = new HashMap>(); + for ( VariantStratifier vs : stratificationObjects ) { + ArrayList states = vs.getRelevantStates(ref, tracker, comp, compName, eval, evalName, sampleName); + stateMap.put(vs, states); + } - ArrayList stateKeys = new ArrayList(); - initializeStateKeys(stateMap, null, null, stateKeys); + ArrayList stateKeys = new ArrayList(); + variantEvalUtils.initializeStateKeys(stateMap, null, null, stateKeys); - HashSet stateKeysHash = new HashSet(stateKeys); + HashSet stateKeysHash = new HashSet(stateKeys); - for ( StateKey stateKey : stateKeysHash ) { - NewEvaluationContext nec = evaluationContexts.get(stateKey); + for ( StateKey stateKey : stateKeysHash ) { + NewEvaluationContext nec = evaluationContexts.get(stateKey); - synchronized (nec) { - nec.apply(tracker, ref, context, comp, eval); + synchronized (nec) { + nec.apply(tracker, ref, context, comp, eval); + } } } } @@ -694,38 +231,13 @@ public class VariantEvalWalker extends RodWalker implements Tr return null; } - /** - * A composite, 'reduce of reduces' function. - * - * @param lhs 'left-most' portion of data in the composite reduce. - * @param rhs 'right-most' portion of data in the composite reduce. - * @return The composite reduce type. - */ - public Integer treeReduce(Integer lhs, Integer rhs) { - return null; - } + public Integer treeReduce(Integer lhs, Integer rhs) { return null; } - /** - * Provide an initial value for reduce computations. - * - * @return Initial value of reduce. - */ @Override - public Integer reduceInit() { - return null; - } + public Integer reduceInit() { return null; } - /** - * Reduces a single map with the accumulator provided as the ReduceType. - * - * @param value result of the map. - * @param sum accumulator for the reduce. - * @return accumulator with result of the map taken into account. - */ @Override - public Integer reduce(Integer value, Integer sum) { - return null; - } + public Integer reduce(Integer value, Integer sum) { return null; } /** * Output the finalized report @@ -822,4 +334,31 @@ public class VariantEvalWalker extends RodWalker implements Tr report.print(out); } + + // Accessors + public Logger getLogger() { return logger; } + + public int getNumSamples() { return numSamples; } + + public double getMinPhaseQuality() { return MIN_PHASE_QUALITY; } + + public String getFamilyStructure() { return FAMILY_STRUCTURE; } + + public double getMendelianViolationQualThreshold() { return MENDELIAN_VIOLATION_QUAL_THRESHOLD; } + + public TreeSet getStratificationObjects() { return stratificationObjects; } + + public static String getAllSampleName() { return ALL_SAMPLE_NAME; } + + public Set getKnownNames() { return knownNames; } + + public Set getEvalNames() { return evalNames; } + + public Set getSampleNamesForEvaluation() { return sampleNamesForEvaluation; } + + public Set getSampleNamesForStratification() { return sampleNamesForStratification; } + + public Set getCompNames() { return compNames; } + + public Set getJexlExpressions() { return jexlExpressions; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java index 97e721a4b..e02154bb9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java @@ -19,18 +19,17 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; */ @Analysis(description = "The overlap between eval and comp sites") public class CompOverlap extends VariantEvaluator implements StandardEval { - @DataPoint(description = "number of eval SNP sites") - long nEvalSNPs = 0; + long nEvalVariants = 0; @DataPoint(description = "number of comp SNP sites") - long nCompSNPs = 0; + long nCompVariants = 0; @DataPoint(description = "number of eval sites outside of comp sites") long novelSites = 0; @DataPoint(description = "number of eval sites at comp sites") - long nSNPsAtComp = 0; + long nVariantsAtComp = 0; @DataPoint(description = "percentage of eval sites at comp sites") double compRate = 0.0; @@ -45,9 +44,9 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { return 2; // we need to see each eval track and each comp track } - public long nNovelSites() { return nEvalSNPs - nSNPsAtComp; } - public double compRate() { return rate(nSNPsAtComp, nEvalSNPs); } - public double concordanceRate() { return rate(nConcordant, nSNPsAtComp); } + public long nNovelSites() { return nEvalVariants - nVariantsAtComp; } + public double compRate() { return rate(nVariantsAtComp, nEvalVariants); } + public double concordanceRate() { return rate(nConcordant, nVariantsAtComp); } public void finalizeEvaluation() { compRate = 100 * compRate(); @@ -76,21 +75,18 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { } public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - //boolean expectingIndels = false; + boolean evalIsGood = eval != null; + boolean expectingIndels = eval != null && eval.isIndel(); - //boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ; - //boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ; + boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ; - boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP() ; - boolean evalIsGood = eval != null && eval.isSNP() ; - - if (compIsGood) nCompSNPs++; // count the number of comp events - if (evalIsGood) nEvalSNPs++; // count the number of eval events + if (compIsGood) nCompVariants++; // count the number of comp events + if (evalIsGood) nEvalVariants++; // count the number of eval events if (compIsGood && evalIsGood) { - nSNPsAtComp++; + nVariantsAtComp++; - if (!discordantP(eval, comp)) { // count whether we're concordant or not with the comp value + if (!discordantP(eval, comp)) { // count whether we're concordant or not with the comp value nConcordant++; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java index 26fa3baab..0fdef754f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java @@ -1,18 +1,19 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; +import org.apache.log4j.Logger; import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFConstants; -import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; -import org.broadinstitute.sting.utils.report.utils.TableType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.report.utils.TableType; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java index a2b548404..9fbcea5bc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java @@ -9,7 +9,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.phasing.AllelePair; import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java index 285695526..5364e7b87 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java @@ -4,9 +4,9 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.report.utils.TableType; /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java index 82c95a4ba..e61540c32 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java @@ -5,9 +5,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.report.utils.TableType; import java.util.ArrayList; @@ -157,8 +157,7 @@ public class IndelMetricsByAC extends VariantEvaluator { } public String toString() { - String returnString = ""; - return returnString; + return ""; } public void incrValue( VariantContext eval ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java index 03fa2f7c6..97a5669a7 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java @@ -474,10 +474,6 @@ public class IndelStatistics extends VariantEvaluator { return getName(); } - //public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) { - //return null; - //} - public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (eval != null ) { @@ -502,13 +498,9 @@ public class IndelStatistics extends VariantEvaluator { return null; // This module doesn't capture any interesting sites, so return null } - public String update0(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return null; - } public void finalizeEvaluation() { - // - int k=0; + int k=0; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index bf162d88d..6d73310c4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -3,18 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.List; import java.util.Arrays; -import java.util.regex.Pattern; +import java.util.List; import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Mendelian violation detection and counting diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java index b7e965ead..457ca4d4d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java @@ -11,8 +11,8 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Sample; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; -import org.broadinstitute.sting.utils.report.utils.TableType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.report.utils.TableType; import java.util.ArrayList; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java index f79211d62..735ca70eb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java @@ -31,9 +31,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.report.utils.TableType; import java.util.ArrayList; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java index cf3b70432..22483f224 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; @@ -13,7 +15,7 @@ public class CompRod extends VariantStratifier implements RequiredStratification private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { this.compNames = compNames; states = new ArrayList(); @@ -24,7 +26,7 @@ public class CompRod extends VariantStratifier implements RequiredStratification return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add(compName); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java index 20f3de1ad..9af9fd518 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; @@ -11,7 +13,7 @@ public class CpG extends VariantStratifier implements StandardStratification { private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { states = new ArrayList(); states.add("all"); states.add("CpG"); @@ -22,7 +24,7 @@ public class CpG extends VariantStratifier implements StandardStratification { return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { boolean isCpG = false; if (ref != null && ref.getBases() != null) { String fwRefBases = new String(ref.getBases()); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java index dbe2262dc..eccc737ff 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.HashMap; @@ -14,7 +16,7 @@ public class Degeneracy extends VariantStratifier { private HashMap degeneracies; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { states = new ArrayList(); states.add("1-fold"); states.add("2-fold"); @@ -52,7 +54,7 @@ public class Degeneracy extends VariantStratifier { return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("all"); @@ -61,10 +63,9 @@ public class Degeneracy extends VariantStratifier { String type = null; String aa = null; - if (eval.getAttributeAsString("refseq.functionalClass") != null) { - type = eval.getAttributeAsString("refseq.functionalClass"); + if (eval.hasAttribute("refseq.functionalClass")) { aa = eval.getAttributeAsString("refseq.variantAA"); - } else if (eval.getAttributeAsString("refseq.functionalClass_1") != null) { + } else if (eval.hasAttribute("refseq.functionalClass_1")) { int annotationId = 1; String key; @@ -85,7 +86,7 @@ public class Degeneracy extends VariantStratifier { } annotationId++; - } while (eval.getAttributeAsString(key) != null); + } while (eval.hasAttribute(key)); } if (aa != null && degeneracies.containsKey(aa)) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java index 1f475bb28..faf0ab245 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; @@ -13,7 +15,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { this.evalNames = evalNames; states = new ArrayList(); @@ -24,7 +26,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add(evalName); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java index 35c97c704..316300396 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; @@ -12,7 +14,7 @@ public class Filter extends VariantStratifier { private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { states = new ArrayList(); states.add("called"); states.add("filtered"); @@ -23,7 +25,7 @@ public class Filter extends VariantStratifier { return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("raw"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index 4db03e1ee..609e604a6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; @@ -12,7 +14,7 @@ public class FunctionalClass extends VariantStratifier { private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { states = new ArrayList(); states.add("all"); states.add("silent"); @@ -24,7 +26,7 @@ public class FunctionalClass extends VariantStratifier { return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("all"); @@ -32,9 +34,9 @@ public class FunctionalClass extends VariantStratifier { if (eval != null && eval.isVariant()) { String type = null; - if (eval.getAttributeAsString("refseq.functionalClass") != null) { + if (eval.hasAttribute("refseq.functionalClass")) { type = eval.getAttributeAsString("refseq.functionalClass"); - } else if (eval.getAttributeAsString("refseq.functionalClass_1") != null) { + } else if (eval.hasAttribute("refseq.functionalClass_1")) { int annotationId = 1; String key; @@ -43,7 +45,7 @@ public class FunctionalClass extends VariantStratifier { String newtype = eval.getAttributeAsString(key); - if ( newtype != null && + if ( newtype != null && !newtype.equalsIgnoreCase("null") && ( type == null || ( type.equals("silent") && !newtype.equals("silent") ) || ( type.equals("missense") && newtype.equals("nonsense") ) ) @@ -52,7 +54,7 @@ public class FunctionalClass extends VariantStratifier { } annotationId++; - } while (eval.getAttributeAsString(key) != null); + } while (eval.hasAttribute(key)); } if (type != null) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java index 4e94ef03e..ce980e3f5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java @@ -3,22 +3,24 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; public class JexlExpression extends VariantStratifier implements StandardStratification { // needs to know the jexl expressions - private Set jexlExpressions; + private Set jexlExpressions; private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { this.jexlExpressions = jexlExpressions; states = new ArrayList(); states.add("none"); - for ( VariantContextUtils.JexlVCMatchExp jexlExpression : jexlExpressions ) { + for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) { states.add(jexlExpression.name); } } @@ -27,11 +29,11 @@ public class JexlExpression extends VariantStratifier implements StandardStratif return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("none"); - for ( VariantContextUtils.JexlVCMatchExp jexlExpression : jexlExpressions ) { + for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) { if (eval != null && VariantContextUtils.match(eval, jexlExpression)) { relevantStates.add(jexlExpression.name); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index a4a1822ce..e55bd496e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -3,8 +3,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; +import java.util.Collection; +import java.util.EnumSet; import java.util.Set; public class Novelty extends VariantStratifier implements StandardStratification { @@ -13,7 +17,7 @@ public class Novelty extends VariantStratifier implements StandardStratification private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { this.knownNames = knownNames; states = new ArrayList(); @@ -26,11 +30,29 @@ public class Novelty extends VariantStratifier implements StandardStratification return states; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { - ArrayList relevantStates = new ArrayList(); + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + boolean isNovel = true; + if (tracker != null) { + for (String knownName : knownNames) { + if (tracker.hasROD(knownName)) { + EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION); + if (eval != null) { + allowableTypes.add(eval.getType()); + } + + Collection knownComps = tracker.getVariantContexts(ref, knownName, allowableTypes, ref.getLocus(), true, true); + + isNovel = knownComps.size() == 0; + + break; + } + } + } + + ArrayList relevantStates = new ArrayList(); relevantStates.add("all"); - relevantStates.add(comp == null ? "novel" : "known"); + relevantStates.add(isNovel ? "novel" : "known"); return relevantStates; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java index 4c73031d8..d95ede0b2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java @@ -3,7 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; @@ -13,7 +14,7 @@ public class Sample extends VariantStratifier { private ArrayList samples; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { samples = new ArrayList(); samples.addAll(sampleNames); } @@ -22,7 +23,7 @@ public class Sample extends VariantStratifier { return samples; } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add(sampleName); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index 5717958de..8bfdcc3d1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -4,18 +4,19 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; public abstract class VariantStratifier implements Comparable { - public abstract void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames); + public abstract void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames); public ArrayList getAllStates() { return new ArrayList(); } - public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { return null; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java index 4faa9c42f..44248e169 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java @@ -52,40 +52,35 @@ public class NewEvaluationContext extends HashMap { public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) { for ( VariantEvaluator evaluation : evaluationInstances.values() ) { - //synchronized ( this ) { - // we always call update0 in case the evaluation tracks things like number of bases covered - //evaluation.update0(tracker, ref, context); + // we always call update0 in case the evaluation tracks things like number of bases covered - // the other updateN methods don't see a null context - if ( tracker == null ) - continue; + // the other updateN methods don't see a null context + if ( tracker == null ) + continue; - // now call the single or paired update function - switch ( evaluation.getComparisonOrder() ) { - case 1: - if (eval != null) { - evaluation.update1(eval, tracker, ref, context); - } + // now call the single or paired update function + switch ( evaluation.getComparisonOrder() ) { + case 1: + if (eval != null) { + evaluation.update1(eval, tracker, ref, context); + } - break; - case 2: - //if (eval != null) { - evaluation.update2(eval, comp, tracker, ref, context); - //} + break; + case 2: + //if (eval != null) { + evaluation.update2(eval, comp, tracker, ref, context); + //} - break; - default: - throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation); - } - //} + break; + default: + throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation); + } } } public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - //synchronized (this) { - for ( VariantEvaluator evaluation : evaluationInstances.values() ) { - evaluation.update0(tracker, ref, context); - } - //} + for ( VariantEvaluator evaluation : evaluationInstances.values() ) { + evaluation.update0(tracker, ref, context); + } } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 9ae1b58d3..fc57e97c0 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -9,6 +9,9 @@ import java.util.HashMap; import java.util.Map; public class VariantEvalIntegrationTest extends WalkerTest { + private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval"; + private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; + private static String cmdRoot = "-T VariantEval" + " -R " + b36KGReference; @@ -18,18 +21,301 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; private static String rootGZ = cmdRoot + - " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + - " -B:eval,VCF " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + - " -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz"; + " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -B:eval,VCF " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + + " -B:comp_genotypes,VCF " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz"; private static String[] testsEnumerations = {root, rootGZ}; + private String cmdLineBuilder(String ... arguments) { + String cmdline = ""; + + for ( int argIndex = 0; argIndex < arguments.length; argIndex++ ) { + cmdline += arguments[argIndex]; + + if (argIndex < arguments.length - 1) { + cmdline += " "; + } + } + + return cmdline; + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndels() { +// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 +// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -c PASS = 9 +// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 4 +// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5 +// variantRate = nVariantLoci / nProcessedLoci = 0.131578947 +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3 +// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1 +// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1 +// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4 +// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8 +// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10 +// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5 + + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("476b495de54e1a377c6895c02a6fdf6a") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() { +// nProcessedLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 +// nCalledLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }'= 3 +// nVariantLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | wc -l = 3 +// variantRateKnown = nVariantLoci / nProcessedLoci = 0.0789473684 +// nSNPsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($5) == 1) print $0 }' | wc -l = 3 +// nInsertionsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 0 +// nDeletionsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 0 +// nNoCallsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 0 +// nHetsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 3 +// nHomRefKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 1 +// nHomVarKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5 + + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Novelty", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("9f4e4fff339e725f42d65063e43e7d1c") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() { +// nProcessedLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 +// nCalledLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -vc PASS = 3 +// nRefLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 1 +// nVariantLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 2 +// nSNPsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 1 +// nInsertionsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 0 +// nDeletionsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1 +// nNoCallsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 3 +// nHetsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 1 +// nHomRefFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 2 +// nHomVarFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 3 + + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Novelty", + "-ST Filter", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("369fa4f37bcc03b8a0bc1e58bf22bf0a") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithCpG() { +// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 +// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep -c PASS = 8 +// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 3 +// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5 +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3 +// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1 +// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1 +// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4 +// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8 +// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10 +// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5 + + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST CpG", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("891ad0d38f1a1b08b31fe1cb6a3afc04") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithFunctionalClasses() { + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST FunctionalClass", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("d588179e2d9ed6e92a6ae1a80ac04270") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithDegeneracy() { + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Degeneracy", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("ceb0f5d9e0ea99eb8d00bce2f7bc1b73") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithSample() { +// HG00513 +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) == 1 && length($5) == 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 3 +// nInsertions = $ grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) == 1 && length($5) > 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 1 +// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) > 1 && length($5) == 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 0 +// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 2 +// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "0/1") print $0 }' | wc -l = 2 +// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "0/0") print $0 }' | wc -l = 3 +// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "1/1") print $0 }' | wc -l = 2 + + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Sample", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("94ce29b34b9e2e4304fc1bbf3f971a7d") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithJexlExpression() { +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $0 }' | wc -l = 7 +// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 == ".") print $0 }' | wc -l = 4 +// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 != ".") print $0 }' | wc -l = 3 +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && length($5) == 1 && length($6) == 1 && $6 != ".") print $0 }' | wc -l = 3 +// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/0' = 9 +// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/1' = 3 +// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '1/1' = 5 +// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '\.\/\.' = 4 + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST JexlExpression", + "-select 'DP < 20'", + "-selectName DepthSelect", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("96de32970b204816ecd9a120b9d8782b") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithMultipleJexlExpressions() { +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $0 }' | wc -l = 7 +// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 == ".") print $0 }' | wc -l = 4 +// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 != ".") print $0 }' | wc -l = 3 +// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && length($5) == 1 && length($6) == 1 && $6 != ".") print $0 }' | wc -l = 3 +// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/0' = 9 +// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/1' = 3 +// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '1/1' = 5 +// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '\.\/\.' = 4 + WalkerTestSpec spec = new WalkerTestSpec( + cmdLineBuilder( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST JexlExpression", + "-select 'DP < 20'", + "-selectName DepthLt20", + "-select 'DP > 20'", + "-selectName DepthGt20", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("aea882132eb6afdc93fbc70e8d6c50e2") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); + } + @Test public void testSelect1() { String extraArgs = "-L 1:1-10,000,000"; for (String tests : testsEnumerations) { WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s", - 1, Arrays.asList("c803c90f587796146286217ab30930a3")); + 1, Arrays.asList("4184a8d44f8c559c904e41edf464a467")); executeTestParallel("testSelect1", spec); //executeTest("testSelect1", spec); } @@ -50,7 +336,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { for (String vcfFile : vcfFiles) { WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B:eval,VCF " + validationDataLocation + vcfFile + " -B:comp,VCF " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", 1, - Arrays.asList("bb16335f9510bcab2bd14a4299afd879")); + Arrays.asList("1387fcf8d5c53ff2c820fe79cc999bcf")); executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); //executeTest("testVEGenotypeConcordance" + vcfFile, spec); } @@ -60,8 +346,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testVESimple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "47990cfb955720421c29991954af4450"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "ce7bb86e6281b1aef1ad94f9ba7301a9"); + expectations.put("-L 1:1-10,000,000", "b28516b4d3627d2eb017a5449284a4e4"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "a49d897095586ddb72bfe9faf0291312"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -84,10 +370,9 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:comp_hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String matchingMD5 = "6388fbad81b0f281298812496fd3ed6c"; - expectations.put("", matchingMD5); - expectations.put(" -knownName comp_hapmap -knownName dbsnp", matchingMD5); - expectations.put(" -knownName comp_hapmap", "6388fbad81b0f281298812496fd3ed6c"); + expectations.put("", "65d74eb7eea2355989c389e8fa886c06"); + expectations.put(" -knownName comp_hapmap -knownName dbsnp", "776827cd5a6fcaf8b8508813e8dc023c"); + expectations.put(" -knownName comp_hapmap", "e99a89cbca2027c983edc00d31ea4ec9"); for (String tests : testsEnumerations) { for (Map.Entry entry : expectations.entrySet()) { String extraArgs2 = entry.getKey(); @@ -102,6 +387,79 @@ public class VariantEvalIntegrationTest extends WalkerTest { } } + @Test + public void testCompVsEvalAC() { + String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -EV GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("49cb4a6126c5383abd9a49a6c22b8d93")); + executeTestParallel("testCompVsEvalAC",spec); + //executeTest("testCompVsEvalAC",spec); + } + + private static String withSelect(String cmd, String select, String name) { + return String.format("%s -select '%s' -selectName %s", cmd, select, name); + } + + @Test + public void testTranches() { + String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -tf " + testDir + "tranches.6.txt"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("68044a69f03ba4cc11d2061cc96e9eb5")); + executeTestParallel("testTranches",spec); + //executeTest("testTranches",spec); + } + + @Test + public void testCompOverlap() { + String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e8b5561eb60ea98a9be4a45abee00e07")); + executeTestParallel("testCompOverlap",spec); + //executeTest("testCompOverlap",spec); + } + + @Test + public void testEvalTrackWithoutGenotypes() { + String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod"; + + String extraArgs = "-T VariantEval -R " + + b37KGReference + + " -L 20" + + " -D " + dbsnp + + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9323a6ad62dedbdb08752411960db60f")); + executeTestParallel("testEvalTrackWithoutGenotypes",spec); + } + + @Test + public void testMultipleEvalTracksWithoutGenotypes() { + String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod"; + + String extraArgs = "-T VariantEval -R " + b37KGReference + + " -L 20" + + " -D " + dbsnp + + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + + " -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ef23195331affd332af1de9d261fdd0a")); + executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); + } + + @Test + public void testMultipleCompTracks() { + String dbsnp = GATKDataLocation + "dbsnp_132_b37.vcf"; + + String extraArgs = "-T VariantEval" + + " -R " + b37KGReference + + " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + + " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + + " -B:dbsnp,VCF " + dbsnp + + " -L 20:10000000-10100000" + + " -noST -noEV -ST Novelty -EV CompOverlap" + + " -o %s"; + + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("3fced8e5fa7a1c952d08fead0accd3fb")); + executeTestParallel("testMultipleCompTracks",spec); + } + // @Test // public void testVEGenomicallyAnnotated() { // String vecmd = "-T VariantEval" + @@ -129,81 +487,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { // //executeTest("testVEWriteVCF", spec); // } // } - - @Test - public void testCompVsEvalAC() { - String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -EV GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d8d59ec86ec9e00abad4ec44741de22f")); - executeTestParallel("testCompVsEvalAC",spec); - //executeTest("testCompVsEvalAC",spec); - } - - private static String withSelect(String cmd, String select, String name) { - return String.format("%s -select '%s' -selectName %s", cmd, select, name); - } - - @Test - public void testTranches() { - String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -tf " + testDir + "tranches.6.txt"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("68044a69f03ba4cc11d2061cc96e9eb5")); - executeTestParallel("testTranches",spec); - //executeTest("testTranches",spec); - } - - @Test - public void testCompOverlap() { - String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("81377be26bf8fa32339d01c173428f7d")); - executeTestParallel("testCompOverlap",spec); - //executeTest("testCompOverlap",spec); - } - - @Test - public void testEvalTrackWithoutGenotypes() { - String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod"; - - String extraArgs = "-T VariantEval -R " + - b37KGReference + - " -L 20" + - " -D " + dbsnp + - " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + - " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("2e2c24b49f699506b967befbde5a6fa8")); - executeTestParallel("testEvalTrackWithoutGenotypes",spec); - } - - @Test - public void testMultipleEvalTracksWithoutGenotypes() { - String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod"; - - String extraArgs = "-T VariantEval -R " + b37KGReference + - " -L 20" + - " -D " + dbsnp + - " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + - " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + - " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("144053b8bef5a79b23d0abd17b561294")); - executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); - } - - @Test - public void testMultipleCompTracks() { - String dbsnp = GATKDataLocation + "dbsnp_132_b37.vcf"; - - String extraArgs = "-T VariantEval" + - " -R " + b37KGReference + - " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + - " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + - " -B:dbsnp,VCF " + dbsnp + - " -L 20:10000000-10100000" + - " -noST -noEV -ST Novelty -EV CompOverlap" + - " -o %s"; - - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("2d2c6e7850ec964624bb032d24834e2f")); - executeTestParallel("testMultipleCompTracks",spec); - } - - +// // @Test // public void testVEValidatePass() { // String extraArgs = "-L 1:1-10,000,000"; @@ -227,4 +511,5 @@ public class VariantEvalIntegrationTest extends WalkerTest { // private static String withValidateTiTv(String cmd, double min, double max) { // return String.format("%s -validate 'eval.comp_genotypes.all.called.all.titv.tiTvRatio >= %2$s' -validate 'eval.comp_genotypes.all.called.all.titv.tiTvRatio <= %3$s'", cmd, min, max); // } + }