diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java new file mode 100755 index 000000000..dce08da45 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java @@ -0,0 +1,20 @@ +package org.broadinstitute.sting.gatk.walkers.varianteval.util; + +import org.apache.commons.jexl2.*; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; + +public class SortableJexlVCMatchExp extends VariantContextUtils.JexlVCMatchExp implements Comparable { + /** + * Create a new matcher expression with name and JEXL expression exp + * + * @param name name + * @param exp expression + */ + public SortableJexlVCMatchExp(String name, Expression exp) { + super(name, exp); + } + + public int compareTo(SortableJexlVCMatchExp sortableJexlVCMatchExp) { + return this.name.compareTo(sortableJexlVCMatchExp.name); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java new file mode 100755 index 000000000..adb1b56b1 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -0,0 +1,470 @@ +package org.broadinstitute.sting.gatk.walkers.varianteval.util; + +import org.apache.log4j.Logger; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; +import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval; +import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; +import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.RequiredStratification; +import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.StandardStratification; +import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; +import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.report.utils.TableType; + +import java.lang.reflect.Field; +import java.util.*; + +public class VariantEvalUtils { + private final VariantEvalWalker variantEvalWalker; + Logger logger; + + public VariantEvalUtils(VariantEvalWalker variantEvalWalker) { + this.variantEvalWalker = variantEvalWalker; + this.logger = variantEvalWalker.getLogger(); + } + + /** + * List all of the available evaluation modules, then exit successfully + */ + public void listModulesAndExit() { + List> vsClasses = new PluginManager( VariantStratifier.class ).getPlugins(); + List> veClasses = new PluginManager( VariantEvaluator.class ).getPlugins(); + + logger.info("Available stratification modules:"); + logger.info("(Standard modules are starred)"); + for (Class vsClass : vsClasses) { + logger.info("\t" + vsClass.getSimpleName() + (RequiredStratification.class.isAssignableFrom(vsClass) || StandardStratification.class.isAssignableFrom(vsClass) ? "*" : "")); + } + logger.info(""); + + logger.info("Available evaluation modules:"); + logger.info("(Standard modules are starred)"); + for (Class veClass : veClasses) { + logger.info("\t" + veClass.getSimpleName() + (StandardEval.class.isAssignableFrom(veClass) ? "*" : "")); + } + logger.info(""); + + System.exit(0); + } + + /** + * Initialize required, standard and user-specified stratification objects + * + * @param noStandardStrats don't use the standard stratifications + * @param modulesToUse the list of stratification modules to use + * @return set of stratifications to use + */ + public TreeSet initializeStratificationObjects(boolean noStandardStrats, String[] modulesToUse) { + TreeSet strats = new TreeSet(); + Set stratsToUse = new HashSet(); + + // Create a map for all stratification modules for easy lookup. + HashMap> classMap = new HashMap>(); + for (Class c : new PluginManager(VariantStratifier.class).getPlugins()) { + classMap.put(c.getSimpleName(), c); + } + + // We must use all required stratification modules. + for (Class reqClass : new PluginManager(RequiredStratification.class).getPlugins()) { + if (classMap.containsKey(reqClass.getSimpleName())) { + stratsToUse.add(reqClass.getSimpleName()); + } + } + + // By default, use standard stratification modules. + if (!noStandardStrats) { + for (Class stdClass : new PluginManager(StandardStratification.class).getPlugins()) { + if (classMap.containsKey(stdClass.getSimpleName())) { + stratsToUse.add(stdClass.getSimpleName()); + } + } + } + + // Now add the user-selected modules + stratsToUse.addAll(Arrays.asList(modulesToUse)); + + // Instantiate the stratifications + for (String module : stratsToUse) { + if (!classMap.containsKey(module)) { + throw new UserException.CommandLineException("Module " + module + " could not be found; please check that you have specified the class name correctly"); + } + + if (classMap.containsKey(module)) { + Class c = classMap.get(module); + + try { + VariantStratifier vs = c.newInstance(); + vs.initialize(variantEvalWalker.getJexlExpressions(), variantEvalWalker.getCompNames(), variantEvalWalker.getKnownNames(), variantEvalWalker.getEvalNames(), variantEvalWalker.getSampleNamesForStratification()); + + strats.add(vs); + } catch (InstantiationException e) { + throw new StingException("Unable to instantiate stratification module '" + c.getSimpleName() + "'"); + } catch (IllegalAccessException e) { + throw new StingException("Illegal access error when trying to instantiate stratification module '" + c.getSimpleName() + "'"); + } + } + } + + return strats; + } + + /** + * Initialize required, standard and user-specified evaluation objects + * + * @param noStandardEvals don't use the standard evaluations + * @param modulesToUse the list of evaluation modules to use + * @return set of evaluations to use + */ + public Set> initializeEvaluationObjects(boolean noStandardEvals, String[] modulesToUse) { + Set> evals = new HashSet>(); + + // Create a map for all eval modules for easy lookup. + HashMap> classMap = new HashMap>(); + for (Class c : new PluginManager(VariantEvaluator.class).getPlugins()) { + classMap.put(c.getSimpleName(), c); + } + + // By default, use standard eval modules. + if (!noStandardEvals) { + for (Class stdClass : new PluginManager(StandardEval.class).getPlugins()) { + if (classMap.containsKey(stdClass.getSimpleName())) { + evals.add(classMap.get(stdClass.getSimpleName())); + } + } + } + + // Get the specific classes provided. + for (String module : modulesToUse) { + if (!classMap.containsKey(module)) { + throw new UserException.CommandLineException("Module " + module + " could not be found; please check that you have specified the class name correctly"); + } + + if (classMap.containsKey(module)) { + evals.add(classMap.get(module)); + } + } + + return evals; + } + + /** + * Recursively initialize the evaluation contexts + * + * @param stratificationObjects the stratifications to use + * @param evaluationObjects the evaluations to use + * @param stratStack a stack of stratifications to apply + * @param ec evaluation context + * @return a map of all the evaluation contexts + */ + public HashMap initializeEvaluationContexts(Set stratificationObjects, Set> evaluationObjects, Stack stratStack, NewEvaluationContext ec) { + HashMap ecs = new HashMap(); + + if (stratStack == null) { + stratStack = new Stack(); + stratStack.addAll(stratificationObjects); + } + + if (!stratStack.isEmpty()) { + Stack newStratStack = new Stack(); + newStratStack.addAll(stratStack); + + VariantStratifier vs = newStratStack.pop(); + + for (String state : vs.getAllStates()) { + NewEvaluationContext nec = new NewEvaluationContext(); + if (ec != null) { + nec.putAll(ec); + } + nec.put(vs, state); + + ecs.putAll(initializeEvaluationContexts(stratificationObjects, evaluationObjects, newStratStack, nec)); + } + } else { + HashMap necs = new HashMap(); + + StateKey stateKey = new StateKey(); + for (VariantStratifier vs : ec.keySet()) { + String state = ec.get(vs); + + stateKey.put(vs.getClass().getSimpleName(), state); + } + + ec.addEvaluationClassList(variantEvalWalker, stateKey, evaluationObjects); + + necs.put(stateKey, ec); + + return necs; + } + + return ecs; + } + + /** + * Initialize the output report + * + * @param stratificationObjects the stratifications to use + * @param evaluationObjects the evaluations to use + * @return an initialized report object + */ + public GATKReport initializeGATKReport(Set stratificationObjects, Set> evaluationObjects) { + GATKReport report = new GATKReport(); + + for (Class ve : evaluationObjects) { + String tableName = ve.getSimpleName(); + String tableDesc = ve.getAnnotation(Analysis.class).description(); + + report.addTable(tableName, tableDesc); + + GATKReportTable table = report.getTable(tableName); + table.addPrimaryKey("entry", false); + table.addColumn(tableName, tableName); + + for (VariantStratifier vs : stratificationObjects) { + String columnName = vs.getClass().getSimpleName(); + + table.addColumn(columnName, "unknown"); + } + + try { + VariantEvaluator vei = ve.newInstance(); + vei.initialize(variantEvalWalker); + + AnalysisModuleScanner scanner = new AnalysisModuleScanner(vei); + Map datamap = scanner.getData(); + + for (Field field : datamap.keySet()) { + field.setAccessible(true); + + if (!(field.get(vei) instanceof TableType)) { + table.addColumn(field.getName(), 0.0); + } + } + } catch (InstantiationException e) { + throw new StingException("InstantiationException: " + e); + } catch (IllegalAccessException e) { + throw new StingException("IllegalAccessException: " + e); + } + } + + return report; + } + + /** + * Figure out what the allowable variation types are based on the eval context + * + * @param tracker the reference metadata tracker + * @param ref the reference context + * @param compNames the comp track names + * @param evalNames the evaluation track names + * @return the set of allowable variation types + */ + public EnumSet getAllowableVariationTypes(RefMetaDataTracker tracker, ReferenceContext ref, Set compNames, Set evalNames) { + EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION); + + if (tracker != null) { + Collection evalvcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false); + + for (VariantContext vc : evalvcs) { + allowableTypes.add(vc.getType()); + } + + if (allowableTypes.size() == 1) { + // We didn't find any variation in the eval track, so now let's look at the comp track for allowable types + Collection compvcs = tracker.getVariantContexts(ref, compNames, null, ref.getLocus(), true, false); + + for (VariantContext vc : compvcs) { + allowableTypes.add(vc.getType()); + } + } + } + + return allowableTypes; + } + + /** + * Subset a VariantContext to a single sample + * + * @param vc the VariantContext object containing multiple samples + * @param sampleName the sample to pull out of the VariantContext + * @return a new VariantContext with just the requested sample + */ + public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) { + ArrayList sampleNames = new ArrayList(); + sampleNames.add(sampleName); + + return getSubsetOfVariantContext(vc, sampleNames); + } + + /** + * Subset a VariantContext to a set of samples + * + * @param vc the VariantContext object containing multiple samples + * @param sampleNames the samples to pull out of the VariantContext + * @return a new VariantContext with just the requested samples + */ + public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection sampleNames) { + VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values()); + + HashMap newAts = new HashMap(vcsub.getAttributes()); + + int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount(); + int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount(); + + if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) { + newAts.put("ISSINGLETON", true); + } + + VariantContextUtils.calculateChromosomeCounts(vcsub, newAts, true); + vcsub = VariantContext.modifyAttributes(vcsub, newAts); + + //VariantEvalWalker.logger.debug(String.format("VC %s subset to %s AC%n", vc.getSource(), vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY))); + + return vcsub; + } + + /** + * For a list of track names, bind the variant contexts to a trackName->sampleName->VariantContext mapping. + * Additional variant contexts per sample are automatically generated and added to the map unless the sample name + * matches the ALL_SAMPLE_NAME constant. + * + * @param tracker the metadata tracker + * @param ref the reference context + * @param trackNames the list of track names to process + * @param allowableTypes a set of allowable variation types + * @param byFilter if false, only accept PASSing VariantContexts. Otherwise, accept both PASSing and filtered + * sites + * @param trackPerSample if false, don't stratify per sample (and don't cut up the VariantContext like we would need + * to do this) + * @param allowNoCalls if false, don't accept no-call loci from a variant track + * @return a mapping of track names to a list of VariantContext objects + */ + public HashMap> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set trackNames, EnumSet allowableTypes, boolean byFilter, boolean trackPerSample, boolean allowNoCalls) { + HashMap> bindings = new HashMap>(); + + for (String trackName : trackNames) { + HashMap vcs = new HashMap(); + + Collection contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true); + VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null; + + // First, filter the VariantContext to represent only the samples for evaluation + if (vc != null) { + VariantContext vcsub = vc; + + if (vc.hasGenotypes() && vc.hasGenotypes(variantEvalWalker.getSampleNamesForEvaluation())) { + vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation()); + } + + if ((byFilter || !vcsub.isFiltered()) && (allowNoCalls || vcsub.getType() != VariantContext.Type.NO_VARIATION)) { + vcs.put(VariantEvalWalker.getAllSampleName(), vcsub); + } + + // Now, if stratifying, split the subsetted vc per sample and add each as a new context + if (vc.hasGenotypes() && trackPerSample) { + for (String sampleName : variantEvalWalker.getSampleNamesForEvaluation()) { + VariantContext samplevc = getSubsetOfVariantContext(vc, sampleName); + + if ((byFilter || !samplevc.isFiltered()) && (allowNoCalls || samplevc.getType() != VariantContext.Type.NO_VARIATION)) { + vcs.put(sampleName, samplevc); + } + } + } + + bindings.put(trackName, vcs); + } + } + + return bindings; + } + + /** + * Maps track names to sample name to VariantContext objects. For eval tracks, VariantContexts per specified sample + * are also included. + * + * @param tracker the metadata tracker + * @param ref the reference context + * @param compNames the list of comp names to process + * @param evalNames the list of eval names to process + * @return a mapping of track names to a list of VariantContext objects + */ + public HashMap> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set compNames, Set evalNames) { + HashMap> vcs = new HashMap>(); + + EnumSet allowableTypes = getAllowableVariationTypes(tracker, ref, compNames, evalNames); + + boolean byFilter = false; + boolean perSampleIsEnabled = false; + for (VariantStratifier vs : variantEvalWalker.getStratificationObjects()) { + if (vs.getClass().getSimpleName().equals("Filter")) { + byFilter = true; + } else if (vs.getClass().getSimpleName().equals("Sample")) { + perSampleIsEnabled = true; + } + } + + HashMap> evalBindings = bindVariantContexts(tracker, ref, evalNames, allowableTypes, byFilter, perSampleIsEnabled, true); + HashMap> compBindings = bindVariantContexts(tracker, ref, compNames, allowableTypes, byFilter, false, false); + + vcs.putAll(compBindings); + vcs.putAll(evalBindings); + + return vcs; + } + + /** + * Recursively initialize the state keys used to look up the right evaluation context based on the state of the + * variant context + * + * @param stateMap the map of allowable states + * @param stateStack a stack of the states + * @param stateKey a state key object + * @param stateKeys all the state keys + * @return a list of state keys + */ + public ArrayList initializeStateKeys(HashMap> stateMap, Stack>> stateStack, StateKey stateKey, ArrayList stateKeys) { + if (stateStack == null) { + stateStack = new Stack>>(); + + for (VariantStratifier vs : stateMap.keySet()) { + HashMap> oneSetOfStates = new HashMap>(); + oneSetOfStates.put(vs, stateMap.get(vs)); + + stateStack.add(oneSetOfStates); + } + } + + if (!stateStack.isEmpty()) { + Stack>> newStateStack = new Stack>>(); + newStateStack.addAll(stateStack); + + HashMap> oneSetOfStates = newStateStack.pop(); + VariantStratifier vs = oneSetOfStates.keySet().iterator().next(); + + for (String state : oneSetOfStates.get(vs)) { + StateKey newStateKey = new StateKey(); + if (stateKey != null) { + newStateKey.putAll(stateKey); + } + + newStateKey.put(vs.getClass().getSimpleName(), state); + + initializeStateKeys(stateMap, newStateStack, newStateKey, stateKeys); + } + } else { + stateKeys.add(stateKey); + + return stateKeys; + } + + return stateKeys; + } +} \ No newline at end of file