diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/NewVariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/NewVariantEvalWalker.java new file mode 100755 index 000000000..a1ef70956 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/NewVariantEvalWalker.java @@ -0,0 +1,565 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFConstants; +import org.broad.tribble.vcf.VCFHeader; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.Reference; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.StandardEval; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.VariantEvaluator; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.RequiredStratification; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.StandardStratification; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.VariantStratifier; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.AnalysisModuleScanner; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.NewEvaluationContext; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.StateKey; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.vcf.VCFUtils; + +import java.io.PrintStream; +import java.lang.reflect.Field; +import java.util.*; + +/** + * General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ts/Tv ratios, and a lot more) + */ +@Reference(window=@Window(start=-50, stop=50)) +public class NewVariantEvalWalker extends RodWalker implements TreeReducible { + // Output arguments + @Output + protected PrintStream out; + + // Help arguments + @Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit") + protected Boolean LIST = false; + + // Partitioning the data arguments + @Argument(shortName="select", doc="One or more stratifications to use when evaluating the data", required=false) + protected ArrayList SELECT_EXPS = new ArrayList(); + + @Argument(shortName="selectName", doc="Names to use for the list of stratifications (must be a 1-to-1 mapping)", required=false) + protected ArrayList SELECT_NAMES = new ArrayList(); + + @Argument(shortName="sample", doc="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context", required=false) + protected Set SAMPLE_EXPRESSIONS; + + @Argument(shortName="knownName", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) + protected String[] KNOWN_NAMES = {DbSNPHelper.STANDARD_DBSNP_TRACK_NAME}; + + // Stratification arguments + @Argument(fullName="stratificationModule", shortName="ST", doc="One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified)", required=false) + protected String[] STRATIFICATIONS_TO_USE = {}; + + @Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)") + protected Boolean NO_STANDARD_STRATIFICATIONS = false; + + // Evaluator arguments + @Argument(fullName="evalModule", shortName="EV", doc="One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified)", required=false) + protected String[] MODULES_TO_USE = {}; + + @Argument(fullName="doNotUseAllStandardModules", shortName="noEV", doc="Do not use the standard modules by default (instead, only those that are specified with the -E option)") + protected Boolean NO_STANDARD_MODULES = false; + + // Variables + private Set jexlExpressions = new TreeSet(); + private Set compNames = new TreeSet(); + private Set knownNames = new TreeSet(); + private Set evalNames = new TreeSet(); + private Set sampleNames = new TreeSet(); + + // The list of stratifiers and evaluators to use + private TreeSet stratificationObjects = null; + private Set> evaluationObjects = null; + + // The set of all possible evaluation contexts + private HashMap evaluationContexts = null; + + // Output report + private GATKReport report = null; + + // Public constants + public static String ALL_SAMPLE_NAME = "all"; + + /** + * List all of the available evaluation modules, then exit successfully + */ + private void listModulesAndExit() { + List> vsClasses = new PluginManager( VariantStratifier.class ).getPlugins(); + List> veClasses = new PluginManager( VariantEvaluator.class ).getPlugins(); + + logger.info("Available stratifcation modules:"); + logger.info("(Standard modules are starred)"); + for (Class vsClass : vsClasses) { + logger.info("\t" + vsClass.getSimpleName() + (RequiredStratification.class.isAssignableFrom(vsClass) || StandardStratification.class.isAssignableFrom(vsClass) ? "*" : "")); + } + logger.info(""); + + logger.info("Available eval modules:"); + logger.info("(Standard modules are starred)"); + for (Class veClass : veClasses) { + logger.info("\t" + veClass.getSimpleName() + (StandardEval.class.isAssignableFrom(veClass) ? "*" : "")); + } + logger.info(""); + + System.exit(0); + } + + private TreeSet initializeStratificationObjects(boolean noStandardStrats, String[] modulesToUse) { + TreeSet strats = new TreeSet(); + Set stratsToUse = new HashSet(); + + // Create a map for all stratification modules for easy lookup. + HashMap> classMap = new HashMap>(); + for ( Class c : new PluginManager( VariantStratifier.class ).getPlugins() ) { + classMap.put(c.getSimpleName(), c); + } + + // We must use all required stratification modules. + for ( Class reqClass : new PluginManager( RequiredStratification.class ).getPlugins() ) { + if ( classMap.containsKey(reqClass.getSimpleName()) ) { + stratsToUse.add(reqClass.getSimpleName()); + } + } + + // By default, use standard stratification modules. + if ( !noStandardStrats ) { + for ( Class stdClass : new PluginManager( StandardStratification.class ).getPlugins() ) { + if ( classMap.containsKey(stdClass.getSimpleName()) ) { + stratsToUse.add(stdClass.getSimpleName()); + } + } + } + + // Now add the user-selected modules + stratsToUse.addAll(Arrays.asList(modulesToUse)); + + // Instantiate the stratifications + for ( String module : stratsToUse ) { + if ( !classMap.containsKey(module) ) { + throw new UserException.CommandLineException("Module " + module + " could not be found; please check that you have specified the class name correctly"); + } + + if ( classMap.containsKey(module) ) { + Class c = classMap.get(module); + + try { + VariantStratifier vs = c.newInstance(); + vs.initialize(jexlExpressions, compNames, knownNames, evalNames, sampleNames); + + strats.add(vs); + } catch (InstantiationException e) { + throw new StingException("Unable to instantiate stratification module '" + c.getSimpleName() + "'"); + } catch (IllegalAccessException e) { + throw new StingException("Illegal access error when trying to instantiate stratification module '" + c.getSimpleName() + "'"); + } + } + } + + return strats; + } + + private Set> initializeEvaluationObjects(boolean noStandardEvals, String[] modulesToUse) { + Set> evals = new HashSet>(); + + // Create a map for all eval modules for easy lookup. + HashMap> classMap = new HashMap>(); + for ( Class c : new PluginManager( VariantEvaluator.class ).getPlugins() ) { + classMap.put(c.getSimpleName(), c); + } + + // By default, use standard eval modules. + if ( !noStandardEvals ) { + for ( Class stdClass : new PluginManager( StandardEval.class ).getPlugins() ) { + if ( classMap.containsKey(stdClass.getSimpleName()) ) { + evals.add(classMap.get(stdClass.getSimpleName())); + } + } + } + + // Get the specific classes provided. + for ( String module : modulesToUse ) { + if ( !classMap.containsKey(module) ) { + throw new UserException.CommandLineException("Module " + module + " could not be found; please check that you have specified the class name correctly"); + } + + if ( classMap.containsKey(module) ) { + evals.add(classMap.get(module)); + } + } + + return evals; + } + + private HashMap initializeEvaluationContexts(Set stratificationObjects, Set> evaluationObjects, Stack stratStack, NewEvaluationContext ec) { + HashMap ecs = new HashMap(); + + if (stratStack == null) { + stratStack = new Stack(); + stratStack.addAll(stratificationObjects); + } + + if (!stratStack.isEmpty()) { + Stack newStratStack = new Stack(); + newStratStack.addAll(stratStack); + + NewEvaluationContext nec = new NewEvaluationContext(); + if (ec != null) { + nec.putAll(ec); + } + + VariantStratifier vs = newStratStack.pop(); + + for ( String state : vs.getAllStates() ) { + nec.put(vs, state); + + ecs.putAll(initializeEvaluationContexts(stratificationObjects, evaluationObjects, newStratStack, nec)); + } + } else { + HashMap necs = new HashMap(); + + StateKey statekey = new StateKey(); + for ( VariantStratifier vs : ec.keySet() ) { + String state = ec.get(vs); + + statekey.put(vs.getClass().getSimpleName(), state); + } + + ec.addEvaluationClassList(evaluationObjects); + + necs.put(statekey, ec); + + return necs; + } + + return ecs; + } + + private GATKReport initializeGATKReport(Set stratificationObjects, Set> evaluationObjects) { + GATKReport report = new GATKReport(); + + for ( Class ve : evaluationObjects ) { + String tableName = ve.getSimpleName(); + String tableDesc = ve.getAnnotation(Analysis.class).description(); + + report.addTable(tableName, tableDesc); + + GATKReportTable table = report.getTable(tableName); + table.addPrimaryKey("entry", false); + + for ( VariantStratifier vs : stratificationObjects ) { + String columnName = vs.getClass().getSimpleName(); + + columnName = columnName.replace("Stratifier", ""); + columnName = columnName.replace("Status", ""); + + table.addColumn(columnName, "unknown"); + } + + AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve); + Map datamap = scanner.getData(); + + for (Field field : datamap.keySet()) { + table.addColumn(field.getName(), 0.0); + } + } + + return report; + } + + public void initialize() { + // Just list the modules, and exit quickly. + if (LIST) { listModulesAndExit(); } + + // Categorize each rod as an eval or a comp rod. + for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { + if ( d.getName().startsWith("eval") ) { + evalNames.add(d.getName()); + } else if ( d.getName().startsWith("comp") || d.getName().startsWith(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { + compNames.add(d.getName()); + } else { + logger.info(String.format("Not evaluating ROD binding '%s' because the name did not start with %s, comp, or eval", d.getName(), Utils.join(", ", KNOWN_NAMES))); + } + } + + // Set up set of known names + knownNames.addAll(Arrays.asList(KNOWN_NAMES)); + + // Now that we have all the rods categorized, determine the sample list from the eval rods. + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evalNames); + Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + sampleNames.addAll(SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS)); + + // Initialize select expressions + jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)); + + // Initialize the set of stratifications and evaluations to use + stratificationObjects = initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); + evaluationObjects = initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); + + // Initialize the evaluation contexts + evaluationContexts = initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null); + + // Initialize report table + report = initializeGATKReport(stratificationObjects, evaluationObjects); + } + + private EnumSet getAllowableVariationTypes(RefMetaDataTracker tracker, ReferenceContext ref, Set evalNames) { + EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION); + + Collection vcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false); + + for ( VariantContext vc : vcs ) { + allowableTypes.add(vc.getType()); + } + + return allowableTypes; + } + + /** + * For a list of track names, bind the variant contexts to a trackName->sampleName->VariantContext mapping. + * Additional variant contexts per sample are automatically generated and added to the map unless the + * sample name matches the ALL_SAMPLE_NAME constant. + * + * @param tracker the metadata tracker + * @param ref the reference context + * @param trackNames the list of track names to process + * @param sampleNames the list of samples to include + * @param allowableTypes a set of allowable variation types + * @return a mapping of track names to a list of VariantContext objects + */ + private HashMap> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set trackNames, Set sampleNames, EnumSet allowableTypes) { + HashMap> bindings = new HashMap>(); + + for ( String trackName : trackNames ) { + Collection contexts = tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true); + + VariantContext vc = contexts.size() == 1 ? contexts.iterator().next() : null; + + HashMap vcs = new HashMap(); + + if ( vc != null ) { + for ( String sample : sampleNames ) { + VariantContext vcsub = vc; + + if (!sample.equals(ALL_SAMPLE_NAME)) { + vcsub = vcsub.subContextFromGenotypes(vcsub.getGenotype(sample)); + + HashMap newAts = new HashMap(vcsub.getAttributes()); + VariantContextUtils.calculateChromosomeCounts(vcsub,newAts,true); + vcsub = VariantContext.modifyAttributes(vcsub,newAts); + + logger.debug(String.format("VC %s subset to %s AC%n", vc.getSource(), vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY))); + } + + vcs.put(sample, vcsub); + } + + bindings.put(trackName, vcs); + } + } + + return bindings; + } + + /** + * Maps track names to sample name to VariantContext objects. For eval tracks, VariantContexts per specified + * sample are also included. + * + * @param tracker the metadata tracker + * @param ref the reference context + * @param compNames the list of comp names to process + * @param evalNames the list of eval names to process + * @param sampleNames the list of samples to include + * @return a mapping of track names to a list of VariantContext objects + */ + private HashMap> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set compNames, Set evalNames, Set sampleNames) { + HashMap> vcs = new HashMap>(); + + Set allSamplesList = new HashSet(); + allSamplesList.add(ALL_SAMPLE_NAME); + + EnumSet allowableTypes = getAllowableVariationTypes(tracker, ref, evalNames); + + HashMap> compBindings = bindVariantContexts(tracker, ref, compNames, allSamplesList, allowableTypes); + HashMap> evalBindings = bindVariantContexts(tracker, ref, evalNames, sampleNames, allowableTypes); + + //HashMap> evalBindings; + //if (stratificationObjects.contains(SampleStratifier) { + // evalBindings = bindVariantContexts(tracker, ref, evalNames, sampleNames, allowableTypes); + //} else { + // evalBindings = bindVariantContexts(tracker, ref, evalNames, allSamplesList, allowableTypes); + //} + + vcs.putAll(compBindings); + vcs.putAll(evalBindings); + + return vcs; + } + + private ArrayList initializeStateKeys(HashMap> stateMap, Stack>> stateStack, StateKey stateKey) { + ArrayList stateKeys = new ArrayList(); + + if (stateStack == null) { + stateStack = new Stack>>(); + + for ( VariantStratifier vs : stateMap.keySet() ) { + HashMap> oneSetOfStates = new HashMap>(); + oneSetOfStates.put(vs, stateMap.get(vs)); + + stateStack.add(oneSetOfStates); + } + } + + if (!stateStack.isEmpty()) { + Stack>> newStateStack = new Stack>>(); + newStateStack.addAll(stateStack); + + StateKey newStateKey = new StateKey(); + if (stateKey != null) { + newStateKey.putAll(stateKey); + } + + HashMap> oneSetOfStates = newStateStack.pop(); + VariantStratifier vs = oneSetOfStates.keySet().iterator().next(); + + for ( String state : oneSetOfStates.get(vs)) { + newStateKey.put(vs.getClass().getSimpleName(), state); + + stateKeys.addAll(initializeStateKeys(stateMap, newStateStack, newStateKey)); + } + } else { + ArrayList newStateKeys = new ArrayList(); + + newStateKeys.add(stateKey); + + return newStateKeys; + } + + return stateKeys; + } + + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker != null) { + // track sample vc + HashMap> vcs = getVariantContexts(tracker, ref, compNames, evalNames, sampleNames); + + for ( String compName : compNames ) { + VariantContext comp = vcs.containsKey(compName) && vcs.get(compName) != null && vcs.get(compName).containsKey(ALL_SAMPLE_NAME) ? vcs.get(compName).get(ALL_SAMPLE_NAME) : null; + + for ( String evalName : evalNames ) { + for ( String sampleName : sampleNames ) { + VariantContext eval = vcs.get(evalName) == null ? null : vcs.get(evalName).get(sampleName); + + HashMap> stateMap = new HashMap>(); + for ( VariantStratifier vs : stratificationObjects ) { + ArrayList states = vs.getRelevantStates(ref, comp, eval, sampleName); + stateMap.put(vs, states); + } + + ArrayList stateKeys = initializeStateKeys(stateMap, null, null); + + for ( StateKey stateKey : stateKeys ) { + NewEvaluationContext nec = evaluationContexts.get(stateKey); + + nec.apply(tracker, ref, context, comp, eval); + } + + //logger.info(ref.getLocus()); + //logger.info("\tcomp: " + comp); + //logger.info("\teval: " + eval); + } + } + } + } + + return null; + } + + /** + * A composite, 'reduce of reduces' function. + * + * @param lhs 'left-most' portion of data in the composite reduce. + * @param rhs 'right-most' portion of data in the composite reduce. + * @return The composite reduce type. + */ + public Integer treeReduce(Integer lhs, Integer rhs) { + return null; + } + + /** + * Provide an initial value for reduce computations. + * + * @return Initial value of reduce. + */ + @Override + public Integer reduceInit() { + return null; + } + + /** + * Reduces a single map with the accumulator provided as the ReduceType. + * + * @param value result of the map. + * @param sum accumulator for the reduce. + * @return accumulator with result of the map taken into account. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return null; + } + + public void onTraversalDone(Integer result) { + for ( StateKey stateKey : evaluationContexts.keySet() ) { + NewEvaluationContext nec = evaluationContexts.get(stateKey); + + for ( VariantEvaluator ve : nec.getEvaluationClassList().values() ) { + ve.finalizeEvaluation(); + + AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve); + Map datamap = scanner.getData(); + + GATKReportTable table = report.getTable(ve.getClass().getSimpleName()); + + for ( VariantStratifier vs : stratificationObjects ) { + String columnName = vs.getClass().getSimpleName(); + + columnName = columnName.replace("Stratifier", ""); + columnName = columnName.replace("Status", ""); + + table.set(stateKey.toString(), columnName, stateKey.get(vs.getClass().getSimpleName())); + } + + for (Field field : datamap.keySet()) { + try { + field.setAccessible(true); + table.set(stateKey.toString(), field.getName(), field.get(ve)); + } catch (IllegalAccessException e) { + throw new ReviewedStingException("Unable to access a data field in a VariantEval analysis module: " + e); + } + } + } + } + + report.print(out); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/CompEvalGenotypes.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/CompEvalGenotypes.java new file mode 100755 index 000000000..641878acd --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/CompEvalGenotypes.java @@ -0,0 +1,35 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators; + +import org.broad.tribble.util.variantcontext.Genotype; +import org.broadinstitute.sting.utils.GenomeLoc; + +class CompEvalGenotypes { + private GenomeLoc loc; + private Genotype compGt; + private Genotype evalGt; + + public CompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) { + this.loc = loc; + this.compGt = compGt; + this.evalGt = evalGt; + } + + public GenomeLoc getLocus() { + return loc; + } + + public Genotype getCompGenotpye() { + return compGt; + } + public Genotype getEvalGenotype() { + return evalGt; + } + + public void setCompGenotype(Genotype compGt) { + this.compGt = compGt; + } + + public void setEvalGenotype(Genotype evalGt) { + this.evalGt = evalGt; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/CompOverlap.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/CompOverlap.java new file mode 100755 index 000000000..d96a135a1 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/CompOverlap.java @@ -0,0 +1,96 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators; + +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint; + +/** + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ +@Analysis(description = "The overlap between eval and comp sites") +public class CompOverlap extends VariantEvaluator implements StandardEval { + + @DataPoint(description = "number of eval SNP sites") + long nEvalSNPs = 0; + + @DataPoint(description = "number of comp SNP sites") + long nCompSNPs = 0; + + @DataPoint(description = "number of eval sites outside of comp sites") + long novelSites = 0; + + @DataPoint(description = "number of eval sites at comp sites") + long nSNPsAtComp = 0; + + @DataPoint(description = "percentage of eval sites at comp sites") + double compRate = 0.0; + + @DataPoint(description = "number of concordant sites") + long nConcordant = 0; + + @DataPoint(description = "the concordance rate") + double concordantRate = 0.0; + + public int getComparisonOrder() { + return 2; // we need to see each eval track and each comp track + } + + public long nNovelSites() { return nEvalSNPs - nSNPsAtComp; } + public double compRate() { return rate(nSNPsAtComp, nEvalSNPs); } + public double concordanceRate() { return rate(nConcordant, nSNPsAtComp); } + + public void finalizeEvaluation() { + compRate = 100 * compRate(); + concordantRate = 100 * concordanceRate(); + novelSites = nNovelSites(); + } + + public boolean enabled() { + return true; + } + + /** + * Returns true if every allele in eval is also in comp + * + * @param eval eval context + * @param comp db context + * @return true if eval and db are discordant + */ + public boolean discordantP(VariantContext eval, VariantContext comp) { + for (Allele a : eval.getAlleles()) { + if (!comp.hasAllele(a, true)) + return true; + } + + return false; + } + + public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + boolean expectingIndels = false; + + boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ; + boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ; + + if (compIsGood) nCompSNPs++; // count the number of comp events + if (evalIsGood) nEvalSNPs++; // count the number of eval events + + if (compIsGood && evalIsGood) { + nSNPsAtComp++; + + if (!discordantP(eval, comp)) // count whether we're concordant or not with the comp value + nConcordant++; + } + + return null; // we don't capture any interesting sites + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/CountVariants.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/CountVariants.java new file mode 100755 index 000000000..e371cd1bf --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/CountVariants.java @@ -0,0 +1,144 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators; + +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +@Analysis(description = "Counts different classes of variants in the sample") +public class CountVariants extends VariantEvaluator implements StandardEval { + + // the following fields are in output order: + + // basic counts on various rates found + @DataPoint(description = "Number of processed loci") + long nProcessedLoci = 0; + @DataPoint(description = "Number of called loci") + long nCalledLoci = 0; + @DataPoint(description = "Number of reference loci") + long nRefLoci = 0; + @DataPoint(description = "Number of variant loci") + long nVariantLoci = 0; + + // the following two calculations get set in the finalizeEvaluation + @DataPoint(description = "Variants per loci rate") + double variantRate = 0; + @DataPoint(description = "Number of variants per base") + double variantRatePerBp = 0; + + + @DataPoint(description = "Number of snp loci") + long nSNPs = 0; + @DataPoint(description = "Number of insertions") + long nInsertions = 0; + @DataPoint(description = "Number of deletions") + long nDeletions = 0; + @DataPoint(description = "Number of complex loci") + long nComplex = 0; + + @DataPoint(description = "Number of no calls loci") + long nNoCalls = 0; + @DataPoint(description = "Number of het loci") + long nHets = 0; + @DataPoint(description = "Number of hom ref loci") + long nHomRef = 0; + @DataPoint(description = "Number of hom var loci") + long nHomVar = 0; + @DataPoint(description = "Number of singletons") + long nSingletons = 0; + + // calculations that get set in the finalizeEvaluation method + @DataPoint(description = "heterozygosity per locus rate") + double heterozygosity = 0; + @DataPoint(description = "heterozygosity per base pair") + double heterozygosityPerBp = 0; + @DataPoint(description = "heterozygosity to homozygosity ratio") + double hetHomRatio = 0; + @DataPoint(description = "indel rate (insertion count + deletion count)") + double indelRate = 0; + @DataPoint(description = "indel rate per base pair") + double indelRatePerBp = 0; + @DataPoint(description = "deletion to insertion ratio") + double deletionInsertionRatio = 0; + + private double perLocusRate(long n) { + return rate(n, nProcessedLoci); + } + + private long perLocusRInverseRate(long n) { + return inverseRate(n, nProcessedLoci); + } + + public boolean enabled() { + return true; + } + + public int getComparisonOrder() { + return 1; // we only need to see each eval track + } + + public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); + } + + public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + //nProcessedLoci++; + nCalledLoci++; + + if (vc1.isVariant()) nVariantLoci++; + switch (vc1.getType()) { + case NO_VARIATION: + nRefLoci++; + break; + case SNP: + nSNPs++; + if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++; + break; + case INDEL: + if (vc1.isInsertion()) nInsertions++; + else nDeletions++; + break; + case MIXED: + nComplex++; + break; + default: + throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType()); + } + + for (Genotype g : vc1.getGenotypes().values()) { + switch (g.getType()) { + case NO_CALL: + nNoCalls++; + break; + case HOM_REF: + nHomRef++; + break; + case HET: + nHets++; + break; + case HOM_VAR: + nHomVar++; + break; + default: + throw new ReviewedStingException("BUG: Unexpected genotype type: " + g); + } + } + + return null; // we don't capture any interesting sites + } + + public void finalizeEvaluation() { + variantRate = perLocusRate(nVariantLoci); + variantRatePerBp = perLocusRInverseRate(nVariantLoci); + heterozygosity = perLocusRate(nHets); + heterozygosityPerBp = perLocusRInverseRate(nHets); + hetHomRatio = ratio(nHets, nHomVar); + indelRate = perLocusRate(nDeletions + nInsertions); + indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions); + deletionInsertionRatio = ratio(nDeletions, nInsertions); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/PhaseStats.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/PhaseStats.java new file mode 100755 index 000000000..7e4419ecd --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/PhaseStats.java @@ -0,0 +1,54 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators; + +/** + * Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings + * | File Templates. + */ +class PhaseStats { + public int neitherPhased; + public int onlyCompPhased; + public int onlyEvalPhased; + public int phasesAgree; + public int phasesDisagree; + + public PhaseStats() { + this.neitherPhased = 0; + this.onlyCompPhased = 0; + this.onlyEvalPhased = 0; + this.phasesAgree = 0; + this.phasesDisagree = 0; + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree); + return sb.toString(); + } + + public static String[] getFieldNamesArray() { + return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"}; + } + + public Object getField(int index) { + switch (index) { + case (0): + return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree); + case (1): + return neitherPhased; + case (2): + return onlyCompPhased; + case (3): + return onlyEvalPhased; + case (4): + return (phasesAgree + phasesDisagree); + case (5): + return phasesAgree; + case (6): + return phasesDisagree; + case (7): + return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree)); + default: + return -1; + } + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/SamplePreviousGenotypes.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/SamplePreviousGenotypes.java new file mode 100755 index 000000000..0d5604efe --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/SamplePreviousGenotypes.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators; + +import org.broad.tribble.util.variantcontext.Genotype; +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.HashMap; + +/** + * Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings + * | File Templates. + */ +class SamplePreviousGenotypes { + private HashMap sampleGenotypes = null; + + public SamplePreviousGenotypes() { + this.sampleGenotypes = new HashMap(); + } + + public CompEvalGenotypes get(String sample) { + return sampleGenotypes.get(sample); + } + + public void put(String sample, CompEvalGenotypes compEvalGts) { + sampleGenotypes.put(sample, compEvalGts); + } + + public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) { + sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt)); + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/StandardEval.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/StandardEval.java new file mode 100755 index 000000000..787def0d0 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/StandardEval.java @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators; + +public interface StandardEval {} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/ThetaVariantEvaluator.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/ThetaVariantEvaluator.java new file mode 100755 index 000000000..676d62183 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/ThetaVariantEvaluator.java @@ -0,0 +1,127 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators; + +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +@Analysis(description = "Computes different estimates of theta based on variant sites and genotypes") +public class ThetaVariantEvaluator extends VariantEvaluator { + @DataPoint(description = "Average heterozygosity at variant sites; note that missing genotypes are ignored when computing this value") + double avgHet = 0.0; + @DataPoint(description = "Average pairwise differences at aligned sequences; averaged over both number of sequeneces and number of variant sites; note that missing genotypes are ignored when computing this value") + double avgAvgDiffs = 0.0; + @DataPoint(description = "Sum of heterozygosity over all variant sites; divide this by total target to get estimate of per base theta") + double totalHet = 0.0; + @DataPoint(description = "Sum of pairwise diffs over all variant sites; divide this by total target to get estimate of per base theta") + double totalAvgDiffs = 0.0; + @DataPoint(description = "Theta for entire region estimated based on number of segregating sites; divide ths by total target to get estimate of per base theta") + double thetaRegionNumSites = 0.0; + + //helper variables + double numSites = 0; + + public boolean enabled() { + return true; + } + + public int getComparisonOrder() { + return 1; + } + + public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) { + return null; //no interesting sites + } + + if (vc.hasGenotypes()) { + + //this maps allele to a count + ConcurrentMap alleleCounts = new ConcurrentHashMap(); + + int numHetsHere = 0; + float numGenosHere = 0; + int numIndsHere = 0; + + for (Genotype genotype : vc.getGenotypes().values()) { + numIndsHere++; + if (!genotype.isNoCall()) { + //increment stats for heterozygosity + if (genotype.isHet()) { + numHetsHere++; + } + + numGenosHere++; + //increment stats for pairwise mismatches + + for (Allele allele : genotype.getAlleles()) { + if (allele.isNonNull() && allele.isCalled()) { + String alleleString = allele.toString(); + alleleCounts.putIfAbsent(alleleString, 0); + alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1); + } + } + } + } + if (numGenosHere > 0) { + //only if have one called genotype at least + this.numSites++; + + this.totalHet += numHetsHere / numGenosHere; + + //compute based on num sites + float harmonicFactor = 0; + for (int i = 1; i <= numIndsHere; i++) { + harmonicFactor += 1.0 / i; + } + this.thetaRegionNumSites += 1.0 / harmonicFactor; + + //now compute pairwise mismatches + float numPairwise = 0; + float numDiffs = 0; + for (String allele1 : alleleCounts.keySet()) { + int allele1Count = alleleCounts.get(allele1); + + for (String allele2 : alleleCounts.keySet()) { + if (allele1.compareTo(allele2) < 0) { + continue; + } + if (allele1 .compareTo(allele2) == 0) { + numPairwise += allele1Count * (allele1Count - 1) * .5; + + } + else { + int allele2Count = alleleCounts.get(allele2); + numPairwise += allele1Count * allele2Count; + numDiffs += allele1Count * allele2Count; + } + } + } + + if (numPairwise > 0) { + this.totalAvgDiffs += numDiffs / numPairwise; + } + } + } + + return null; + } + + @Override + public void finalizeEvaluation() { + + if (this.numSites > 0) { + + this.avgHet = this.totalHet / this.numSites; + this.avgAvgDiffs = this.totalAvgDiffs / this.numSites; + + } + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/TiTvVariantEvaluator.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/TiTvVariantEvaluator.java new file mode 100755 index 000000000..52fbe5cc9 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/TiTvVariantEvaluator.java @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint; + +@Analysis(description = "Ti/Tv Variant Evaluator") +public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval { + + @DataPoint(description = "number of transition loci") + long nTi = 0; + @DataPoint(description = "number of transversion loci") + long nTv = 0; + @DataPoint(description = "the transition to transversion ratio") + double tiTvRatio = 0.0; + @DataPoint(description = "number of comp transition sites") + long nTiInComp = 0; + @DataPoint(description = "number of comp transversion sites") + long nTvInComp = 0; + @DataPoint(description = "the transition to transversion ratio for comp sites") + double TiTvRatioStandard = 0.0; + + public boolean enabled() { + return true; + } + + public int getComparisonOrder() { + return 2; // we only need to see each eval track + } + + public void updateTiTv(VariantContext vc, boolean updateStandard) { + if (vc != null && vc.isSNP() && vc.isBiallelic()) { + if (VariantContextUtils.isTransition(vc)) { + if (updateStandard) nTiInComp++; + else nTi++; + } else { + if (updateStandard) nTvInComp++; + else nTv++; + } + } + } + + public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (vc1 != null) updateTiTv(vc1, false); + if (vc2 != null) updateTiTv(vc2, true); + + return null; // we don't capture any intersting sites + } + + @Override + public void finalizeEvaluation() { + // the ti/tv ratio needs to be set (it's not calculated per-variant). + this.tiTvRatio = rate(nTi,nTv); + this.TiTvRatioStandard = rate(nTiInComp, nTvInComp); + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/VariantEvaluator.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/VariantEvaluator.java new file mode 100755 index 000000000..a18547b7b --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/VariantEvaluator.java @@ -0,0 +1,49 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.NewEvaluationContext; + +public abstract class VariantEvaluator { + public abstract boolean enabled(); + + // Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2 + public abstract int getComparisonOrder(); + + // called at all sites, regardless of eval context itself; useful for counting processed bases + public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } + + public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return null; + } + + public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, NewEvaluationContext group) { + return update1(vc1, tracker, ref, context); + } + + + public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return null; + } + + public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, NewEvaluationContext group) { + return update2(vc1, vc2, tracker, ref, context); + } + + public void finalizeEvaluation() {} + + protected double rate(long n, long d) { + return n / (1.0 * Math.max(d, 1)); + } + + protected long inverseRate(long n, long d) { + return n == 0 ? 0 : d / Math.max(n, 1); + } + + protected double ratio(long num, long denom) { + return ((double)num) / (Math.max(denom, 1)); + } + +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/VariantQualityScore.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/VariantQualityScore.java new file mode 100755 index 000000000..494b42bfb --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/VariantQualityScore.java @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators; + +/** + * @author rpoplin + * @since Apr 6, 2010 + */ + +//@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score") +//public class VariantQualityScore extends VariantEvaluator { +// +// // a mapping from quality score histogram bin to Ti/Tv ratio +// @DataPoint(name="TiTv by Quality", description = "the Ti/Tv ratio broken out by variant quality") +// TiTvStats titvStats; +// +// @DataPoint(name="Quality by Allele Count", description = "average variant quality for each allele count") +// AlleleCountStats alleleCountStats = null; +// +// static class TiTvStats implements TableType { +// final static int NUM_BINS = 20; +// final HashMap> qualByIsTransition = new HashMap>(); // A hashMap holds all the qualities until we are able to bin them appropriately +// final long transitionByQuality[] = new long[NUM_BINS]; +// final long transversionByQuality[] = new long[NUM_BINS]; +// final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out +// +// public Object[] getRowKeys() { +// return new String[]{"sample"}; +// } +// +// public Object[] getColumnKeys() { +// final String columnKeys[] = new String[NUM_BINS]; +// for( int iii = 0; iii < NUM_BINS; iii++ ) { +// columnKeys[iii] = "titvBin" + iii; +// } +// return columnKeys; +// } +// +// public String getName() { +// return "TiTvStats"; +// } +// +// public String getCell(int x, int y) { +// return String.valueOf(titvByQuality[y]); +// } +// +// public String toString() { +// StringBuffer returnString = new StringBuffer(); +// // output the ti/tv array +// returnString.append("titvByQuality: "); +// for( int iii = 0; iii < NUM_BINS; iii++ ) { +// returnString.append(titvByQuality[iii]); +// returnString.append(" "); +// } +// return returnString.toString(); +// } +// +// public void incrValue( final double qual, final boolean isTransition ) { +// final Integer qualKey = Math.round((float) qual); +// final long numTransition = (isTransition ? 1L : 0L); +// final long numTransversion = (isTransition ? 0L : 1L); +// if( qualByIsTransition.containsKey(qualKey) ) { +// Pair transitionPair = qualByIsTransition.get(qualKey); +// transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion); +// qualByIsTransition.put(qualKey, transitionPair); +// } else { +// qualByIsTransition.put(qualKey, new Pair(numTransition,numTransversion)); +// } +// } +// +// public void organizeTiTvTables() { +// for( int iii = 0; iii < NUM_BINS; iii++ ) { +// transitionByQuality[iii] = 0L; +// transversionByQuality[iii] = 0L; +// titvByQuality[iii] = 0.0; +// } +// +// int maxQual = 0; +// +// // Calculate the maximum quality score in order to normalize and histogram +// for( final Integer qual : qualByIsTransition.keySet() ) { +// if( qual > maxQual ) { +// maxQual = qual; +// } +// } +// +// final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1)); +// +// for( final Integer qual : qualByIsTransition.keySet() ) { +// final int index = (int)Math.floor( ((double) qual) / binSize ); +// if( index >= 0 ) { // BUGBUG: why is there overflow here? +// Pair transitionPair = qualByIsTransition.get(qual); +// transitionByQuality[index] += transitionPair.getFirst(); +// transversionByQuality[index] += transitionPair.getSecond(); +// } +// } +// +// for( int iii = 0; iii < NUM_BINS; iii++ ) { +// if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio +// titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]); +// } else { +// titvByQuality[iii] = 0.0; +// } +// } +// +// } +// } +// +// class AlleleCountStats implements TableType { +// final HashMap> qualityListMap = new HashMap>(); +// final HashMap qualityMap = new HashMap(); +// +// public Object[] getRowKeys() { +// final int NUM_BINS = qualityListMap.keySet().size(); +// final String rowKeys[] = new String[NUM_BINS]; +// int iii = 0; +// for( final Integer key : qualityListMap.keySet() ) { +// rowKeys[iii] = "AC" + key; +// iii++; +// } +// return rowKeys; +// +// } +// +// public Object[] getColumnKeys() { +// return new String[]{"alleleCount","avgQual"}; +// } +// +// public String getName() { +// return "AlleleCountStats"; +// } +// +// public String getCell(int x, int y) { +// int iii = 0; +// for( final Integer key : qualityListMap.keySet() ) { +// if(iii == x) { +// if(y == 0) { return String.valueOf(key); } +// else { return String.valueOf(qualityMap.get(key)); } +// } +// iii++; +// } +// return null; +// } +// +// public String toString() { +// String returnString = ""; +// // output the quality map +// returnString += "AlleleCountStats: "; +// //for( int iii = 0; iii < NUM_BINS; iii++ ) { +// // returnString += titvByQuality[iii] + " "; +// //} +// return returnString; +// } +// +// public void incrValue( final double qual, final int alleleCount ) { +// ArrayList list = qualityListMap.get(alleleCount); +// if(list==null) { list = new ArrayList(); } +// list.add(qual); +// qualityListMap.put(alleleCount, list); +// } +// +// public void organizeAlleleCountTables() { +// for( final Integer key : qualityListMap.keySet() ) { +// final ArrayList list = qualityListMap.get(key); +// double meanQual = 0.0; +// final double numQuals = (double)list.size(); +// for( Double qual : list ) { +// meanQual += qual / numQuals; +// } +// qualityMap.put(key, meanQual); +// } +// } +// } +// +// public VariantQualityScore(VariantEvalWalker parent) { +// super(parent); +// titvStats = null; +// } +// +// public String getName() { +// return "VariantQualityScore"; +// } +// +// public int getComparisonOrder() { +// return 1; // we only need to see each eval track +// } +// +// public boolean enabled() { +// return true; +// } +// +// public String toString() { +// return getName(); +// } +// +// public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { +// final String interesting = null; +// +// if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites) +// if( titvStats == null ) { titvStats = new TiTvStats(); } +// titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval)); +// +// if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); } +// int alternateAlleleCount = 0; +// for (final Allele a : eval.getAlternateAlleles()) { +// alternateAlleleCount += eval.getChromosomeCount(a); +// } +// alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount); +// } +// +// return interesting; // This module doesn't capture any interesting sites, so return null +// } +// +// public void finalizeEvaluation() { +// if( titvStats != null ) { +// titvStats.organizeTiTvTables(); +// } +// if( alleleCountStats != null ) { +// alleleCountStats.organizeAlleleCountTables(); +// } +// } +//} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/CompRodStratifier.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/CompRodStratifier.java new file mode 100755 index 000000000..4acd62a79 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/CompRodStratifier.java @@ -0,0 +1,34 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; + +import java.util.ArrayList; +import java.util.Set; + +public class CompRodStratifier extends VariantStratifier implements RequiredStratification { + // Needs to know the comp rods + private Set compNames; + private ArrayList states; + + @Override + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + this.compNames = compNames; + + states = new ArrayList(); + states.add("comp"); + } + + public ArrayList getAllStates() { + return states; + } + + public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) { + ArrayList relevantStates = new ArrayList(); + + relevantStates.add("comp"); + + return relevantStates; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/CpGStatusStratifier.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/CpGStatusStratifier.java new file mode 100755 index 000000000..49523950e --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/CpGStatusStratifier.java @@ -0,0 +1,44 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; + +import java.util.ArrayList; +import java.util.Set; + +public class CpGStatusStratifier extends VariantStratifier { + private ArrayList states; + + @Override + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + states = new ArrayList(); + states.add("all"); + states.add("CpG"); + states.add("non_CpG"); + } + + public ArrayList getAllStates() { + return states; + } + + public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) { + boolean isCpG = false; + if (ref != null && ref.getBases() != null) { + String fwRefBases = new String(ref.getBases()); + + String leftFlank = fwRefBases.substring((fwRefBases.length()/2) - 1, (fwRefBases.length()/2) + 1); + String rightFlank = fwRefBases.substring((fwRefBases.length()/2), (fwRefBases.length()/2) + 2); + + if (leftFlank.equalsIgnoreCase("CG") || leftFlank.equalsIgnoreCase("GC") || rightFlank.equalsIgnoreCase("CG") || rightFlank.equalsIgnoreCase("GC")) { + isCpG = true; + } + } + + ArrayList relevantStates = new ArrayList(); + relevantStates.add("all"); + relevantStates.add(isCpG ? "CpG" : "non_CpG"); + + return relevantStates; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/EvalRodStratifier.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/EvalRodStratifier.java new file mode 100755 index 000000000..d5946af3b --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/EvalRodStratifier.java @@ -0,0 +1,34 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; + +import java.util.ArrayList; +import java.util.Set; + +public class EvalRodStratifier extends VariantStratifier implements RequiredStratification { + // needs to know the eval rods + private Set evalNames; + private ArrayList states; + + @Override + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + this.evalNames = evalNames; + + states = new ArrayList(); + states.add("eval"); + } + + public ArrayList getAllStates() { + return states; + } + + public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) { + ArrayList relevantStates = new ArrayList(); + + relevantStates.add("eval"); + + return relevantStates; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/FilterStatusStratifier.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/FilterStatusStratifier.java new file mode 100755 index 000000000..a58d14e5a --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/FilterStatusStratifier.java @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; + +import java.util.ArrayList; +import java.util.Set; + +public class FilterStatusStratifier extends VariantStratifier implements StandardStratification { + // needs to know the variant context + private ArrayList states; + + @Override + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + states = new ArrayList(); + states.add("called"); + states.add("filtered"); + states.add("raw"); + } + + public ArrayList getAllStates() { + return states; + } + + public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) { + ArrayList relevantStates = new ArrayList(); + + relevantStates.add("raw"); + if (eval != null) { + relevantStates.add(eval.isFiltered() ? "filtered" : "called"); + } + + return relevantStates; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/FunctionalClassStratifier.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/FunctionalClassStratifier.java new file mode 100755 index 000000000..1aa10d352 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/FunctionalClassStratifier.java @@ -0,0 +1,67 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; + +import java.util.ArrayList; +import java.util.Set; + +public class FunctionalClassStratifier extends VariantStratifier { + // needs to know the variant context + private ArrayList states; + + @Override + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + states = new ArrayList(); + states.add("all"); + states.add("silent"); + states.add("missense"); + states.add("nonsense"); + } + + public ArrayList getAllStates() { + return states; + } + + public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) { + ArrayList relevantStates = new ArrayList(); + + relevantStates.add("all"); + + if (eval != null && eval.isVariant()) { + String type = null; + + if (eval.getAttributeAsString("refseq.functionalClass") != null) { + type = eval.getAttributeAsString("refseq.functionalClass"); + } else if (eval.getAttributeAsString("refseq.functionalClass_1") != null) { + int annotationId = 1; + String key; + + do { + key = String.format("refseq.functionalClass_%d", annotationId); + + String newtype = eval.getAttributeAsString(key); + + if ( newtype != null && + ( type == null || + ( type.equals("silent") && !newtype.equals("silent") ) || + ( type.equals("missense") && newtype.equals("nonsense") ) ) + ) { + type = newtype; + } + + annotationId++; + } while (eval.getAttributeAsString(key) != null); + } + + if (type != null) { + if (type.equals("silent")) { relevantStates.add("silent"); } + else if (type.equals("missense")) { relevantStates.add("missense"); } + else if (type.equals("nonsense")) { relevantStates.add("nonsense"); } + } + } + + return relevantStates; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/JexlExpressionStratifier.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/JexlExpressionStratifier.java new file mode 100755 index 000000000..7703d741f --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/JexlExpressionStratifier.java @@ -0,0 +1,43 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; + +import java.util.ArrayList; +import java.util.Set; + +public class JexlExpressionStratifier extends VariantStratifier implements StandardStratification { + // needs to know the jexl expressions + private Set jexlExpressions; + private ArrayList states; + + @Override + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + this.jexlExpressions = jexlExpressions; + + states = new ArrayList(); + states.add("none"); + for ( VariantContextUtils.JexlVCMatchExp jexlExpression : jexlExpressions ) { + states.add(jexlExpression.name); + } + } + + public ArrayList getAllStates() { + return states; + } + + public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) { + ArrayList relevantStates = new ArrayList(); + relevantStates.add("none"); + + for ( VariantContextUtils.JexlVCMatchExp jexlExpression : jexlExpressions ) { + System.out.println(jexlExpression.name + " " + jexlExpression.exp.getExpression()); + if (VariantContextUtils.match(eval, jexlExpression)) { + relevantStates.add(jexlExpression.name); + } + } + + return relevantStates; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/NoveltyStatusStratifier.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/NoveltyStatusStratifier.java new file mode 100755 index 000000000..3bae86928 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/NoveltyStatusStratifier.java @@ -0,0 +1,37 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; + +import java.util.ArrayList; +import java.util.Set; + +public class NoveltyStatusStratifier extends VariantStratifier implements StandardStratification { + // needs the variant contexts and known names + private Set knownNames; + private ArrayList states; + + @Override + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + this.knownNames = knownNames; + + states = new ArrayList(); + states.add("all"); + states.add("known"); + states.add("novel"); + } + + public ArrayList getAllStates() { + return states; + } + + public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) { + ArrayList relevantStates = new ArrayList(); + + relevantStates.add("all"); + relevantStates.add(comp == null ? "novel" : "known"); + + return relevantStates; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/RequiredStratification.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/RequiredStratification.java new file mode 100755 index 000000000..d6213f3c6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/RequiredStratification.java @@ -0,0 +1,3 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications; + +public interface RequiredStratification {} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/SampleStratifier.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/SampleStratifier.java new file mode 100755 index 000000000..b19b2a788 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/SampleStratifier.java @@ -0,0 +1,32 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.NewVariantEvalWalker; + +import java.util.ArrayList; +import java.util.Set; + +public class SampleStratifier extends VariantStratifier { + // needs the sample names + private ArrayList samples; + + @Override + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + samples = new ArrayList(); + samples.add(NewVariantEvalWalker.ALL_SAMPLE_NAME); + samples.addAll(sampleNames); + } + + public ArrayList getAllStates() { + return samples; + } + + public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) { + ArrayList relevantStates = new ArrayList(); + relevantStates.add(sampleName); + + return relevantStates; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/StandardStratification.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/StandardStratification.java new file mode 100755 index 000000000..e4b58c739 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/StandardStratification.java @@ -0,0 +1,4 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications; + +public interface StandardStratification { +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/VariantStratifier.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/VariantStratifier.java new file mode 100755 index 000000000..9a19592c6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/VariantStratifier.java @@ -0,0 +1,29 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; + +import java.util.ArrayList; +import java.util.Set; + +public abstract class VariantStratifier implements Comparable { + public abstract void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames); + + public ArrayList getAllStates() { + return new ArrayList(); + } + + public boolean isApplicable(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext comp, VariantContext eval, String state) { + return false; + } + + public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) { + return null; + } + + public int compareTo(Object o1) { + return this.getClass().getSimpleName().compareTo(o1.getClass().getSimpleName()); + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/tags/Analysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/tags/Analysis.java new file mode 100755 index 000000000..8e6a6f682 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/tags/Analysis.java @@ -0,0 +1,10 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags; + +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; + +@Retention(RetentionPolicy.RUNTIME) +public @interface Analysis { + String name() default ""; // its description, required + String description(); // its description, required +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/tags/DataPoint.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/tags/DataPoint.java new file mode 100755 index 000000000..abdce9736 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/tags/DataPoint.java @@ -0,0 +1,9 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags; + +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; + +@Retention(RetentionPolicy.RUNTIME) +public @interface DataPoint { + String description() default ""; // the description, optional +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/tags/Param.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/tags/Param.java new file mode 100755 index 000000000..3c3fb91a2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/tags/Param.java @@ -0,0 +1,19 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags; + +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; + +/** + * @author aaron + *

+ * Annotation Param + *

+ * a description annotation for a parameter; a variable used as input to the + * analysis, but not (nessasarally) an output. Some formats will store this + * information in comments, others will not include it. + */ +@Retention(RetentionPolicy.RUNTIME) +public @interface Param { + String name() default ""; // the name, defaulted to the variable name + String description(); // the description of the parameter +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/AnalysisModuleScanner.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/AnalysisModuleScanner.java new file mode 100755 index 000000000..fa6d2bee1 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/AnalysisModuleScanner.java @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util; + +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Param; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.lang.annotation.Annotation; +import java.lang.reflect.Field; +import java.util.LinkedHashMap; +import java.util.Map; + + +/** + * @author aaron + *

+ * Class AnalysisModuleScanner + *

+ * Given an analysis, find the annotated fields and methods. Given this module and + * the object, a Mashalling object can serialize or deserialize a analysis module. + */ +public class AnalysisModuleScanner { + + // what we extracted from the class + private Map parameters = new LinkedHashMap(); // the parameter annotations + private Map datums = new LinkedHashMap(); // the data we've discovered + private Analysis analysis; // the analysis annotation + + // private storage of the class type + private final Class cls; + + /** + * create a report scanner from the passed in class + * @param cls the target class, annotated with the @Analysis annotation + */ + public AnalysisModuleScanner(Class cls) { + this.cls = cls; + scan(); // scan the passed in class + } + + /** + * create a report scanner from the passed in class + * @param obj the target object, annotated with the @Analysis annotation + */ + public AnalysisModuleScanner(Object obj) { + this.cls = obj.getClass(); + scan(); // scan the passed in class + } + + /** scan the class and find all appropriate fields and tables */ + public void scan() { + if (cls == null || !cls.isAnnotationPresent(Analysis.class)) + throw new ReviewedStingException("The class passed in cannot be null, " + "" + + "and must contain the @Analysis annotation, class " + cls + " was the input"); + + // get the annotation off of the class + analysis = (Analysis) cls.getAnnotation(Analysis.class); + scanFields(); + } + + /** + * scan the fields of the class, extracting parameters and table annotations and their associated fields + */ + private void scanFields() { + // get the fields from the class, and extract + for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) { + for (Field f : superCls.getDeclaredFields()) + for (Annotation annotation : f.getAnnotations()) { + if (annotation.annotationType().equals(Param.class)) + parameters.put(f, (Param) annotation); + if (annotation.annotationType().equals(DataPoint.class)) + datums.put(f,(DataPoint) annotation); + } + } + } + + /** + * + * @return get the list of parameters we found + */ + public Map getParameters() { + return parameters; + } + + /** + * + * @return a map of the datum annotations found + */ + public Map getData() { + return datums; + } + + /** + * + * @return the analysis annotation found + */ + public Analysis getAnalysis() { + return analysis; + } + + public Class getModuleClass() { + return cls; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/ComplexDataUtils.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/ComplexDataUtils.java new file mode 100755 index 000000000..7f74abcd6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/ComplexDataUtils.java @@ -0,0 +1,137 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util; + +import java.lang.reflect.Field; + + +/** + * @author aaron + *

+ * Class ComplexDataUtils + *

+ * This class contains methods and techniques for breaking down complex data in the output system + */ +public class ComplexDataUtils { + public static Object resolveObject(Field f, Object e) { + //try { + //return f.toGenericString(); + //return f.getName(); //f.get(e); + + + + //} catch (IllegalAccessException e1) { + // throw new ReviewedStingException("Unable to access field " + f); + //} + return null; + } + + /* + private static String extractPlainObjectOrPrimitive(Object obj) { + String value = ""; + if (obj instanceof Float || obj instanceof Double) + value = String.format("%.4f",(Double)obj); + else + value = obj.toString(); + + return value; + } + */ + + + /** + * convert any string -> object pairing into a string keyed tree + * + * @param obj the object + * @return a mapping of the name to the associated value tree. All non-leaf nodes will be Strings + */ + /* + public static Collection resolveObjects(Object obj) { // TODO: fix this, we need a way to get the name of the list from the data point + Collection nodes = new ArrayList(); + + // the simplest case, the object is null + if (obj == null) nodes.add(new Node("", "", "")); + // capture objects of type TableTable + else if (obj instanceof TableType) + nodes.add(tableToNode((TableType) obj, ((TableType) obj).getName())); + + // try to handle maps + else if (obj instanceof Map) { + throw new UnsupportedOperationException("The report generation system is currently unable to output Maps, due to their ambiguity"); + + // handle collections + } else if (obj instanceof Collection) + nodes.addAll(listToNode((Collection) obj, "collection")); + + // arrays + else if (obj.getClass().isArray()) + nodes.addAll(listToNode(Arrays.asList(obj), "array")); + + // else we have a simple object (at least try to handle it that way + else + nodes.add(extractPlainObjectOrPrimitive(obj.getClass().getSimpleName(),obj)); + + // return the collection of nodes we've parsed out + return nodes; + } + */ + + /** + * extract a (hopefully) primitive value + * @param obj the object + */ + /* + private static Node extractPlainObjectOrPrimitive(String name, Object obj) { + String value = ""; + if (obj instanceof Float || obj instanceof Double) + value = String.format("%.4f",(Double)obj); + else + value = obj.toString(); + return new Node(name, value, "value"); + } + */ + + /** + * given a TableType object, make it into a tree using maps. + * + * @param table the table type to convert into a map + * @return a node representing this table + */ + /* + private static Node tableToNode(TableType table, String name) { + Node root = new Node("table", name, "Table"); + root.setTable(); + Object[] rows = table.getRowKeys(); + Object[] cols = table.getColumnKeys(); + + // add the columns names + for (int x = 0; x < table.getRowKeys().length; x++) { + Node row = new Node("row", rows[x].toString(), "a row in a table"); + root.addChild(row); + for (int y = 0; y < table.getColumnKeys().length; y++) { + Node col = new Node("column", cols[y].toString(), "columns in a table"); + row.addChild(col); + col.addChild(extractPlainObjectOrPrimitive("cell(" + x + "," + y + ")", table.getCell(x, y))); + } + } + return root; + } + */ + + /** + * given a Collection object, make it into a tree using maps. + * + * @param coll the collection to iterate, and turn into a list + * @return a mapping of String to Object + */ + /* + private static Collection listToNode(Collection coll, String name) { + Collection nodes = new ArrayList(); + Iterator iter = coll.iterator(); + for (int x = 0; x < coll.size(); x++) { + Node value = new Node("column " + x, String.valueOf(x), "column"); + value.addChild(new Node("value " + x, iter.next().toString(), "value")); + nodes.add(value); + } + return nodes; + } + */ +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/EvaluationContext.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/EvaluationContext.java new file mode 100755 index 000000000..77014a3fe --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/EvaluationContext.java @@ -0,0 +1,96 @@ +//package org.broadinstitute.sting.playground.gatk.walkers.varianteval.util; +// +//import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +//import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.VariantEvalWalker; +//import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.VariantEvaluator; +//import org.broadinstitute.sting.utils.Utils; +//import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; +// +//import java.lang.reflect.Constructor; +//import java.util.Arrays; +//import java.util.HashSet; +//import java.util.Set; +// +///** +// * Created by IntelliJ IDEA. +// * User: kiran +// * Date: Dec 14, 2010 +// * Time: 11:15:32 PM +// * To change this template use File | Settings | File Templates. +// */ +//public class EvaluationContext implements Comparable { +// // useful for typing +// public String evalTrackName, compTrackName, novelty, filtered, cpgStatus, functionalClass, sample; +// public boolean enableInterestingSiteCaptures = false; +// public VariantContextUtils.JexlVCMatchExp selectExp; +// public Set evaluations; +// +// private Set> evaluationClasses; +// +// private static String RAW_SET_NAME = "raw"; +// private static String RETAINED_SET_NAME = "called"; +// private static String FILTERED_SET_NAME = "filtered"; +// private static String ALL_SET_NAME = "all"; +// private static String KNOWN_SET_NAME = "known"; +// private static String NOVEL_SET_NAME = "novel"; +// private final static String CONTEXT_SEPARATOR = "XXX"; +// +// public boolean isIgnoringFilters() { return filtered.equals(RAW_SET_NAME); } +// public boolean requiresFiltered() { return filtered.equals(FILTERED_SET_NAME); } +// public boolean requiresNotFiltered() { return filtered.equals(RETAINED_SET_NAME); } +// public boolean isIgnoringNovelty() { return novelty.equals(ALL_SET_NAME); } +// public boolean requiresNovel() { return novelty.equals(NOVEL_SET_NAME); } +// public boolean requiresKnown() { return novelty.equals(KNOWN_SET_NAME); } +// +// public boolean isSelected() { return selectExp == null; } +// +// public String getDisplayName() { +// return getName(CONTEXT_SEPARATOR); +// } +// +// public String getJexlName() { +// return getName("."); +// } +// +// private String getName(String separator) { +// return Utils.join(separator, Arrays.asList(evalTrackName, compTrackName, selectExp == null ? "all" : selectExp.name, filtered, novelty, cpgStatus, functionalClass, sample)); +// } +// +// public String toString() { return getDisplayName(); } +// +// public int compareTo(EvaluationContext other) { +// return this.getDisplayName().compareTo(other.getDisplayName()); +// } +// +// public EvaluationContext( String evalName, String compName, String novelty, String filtered, String cpgStatus, String functionalClass, String sample, VariantContextUtils.JexlVCMatchExp selectExp, Set> evaluationClasses ) { +// this.evalTrackName = evalName; +// this.compTrackName = compName; +// this.novelty = novelty; +// this.filtered = filtered; +// this.selectExp = selectExp; +// this.cpgStatus = cpgStatus; +// this.functionalClass = functionalClass; +// this.sample = sample; +// this.enableInterestingSiteCaptures = selectExp == null; +// this.evaluationClasses = evaluationClasses; +// this.evaluations = instantiateEvalationsSet(); +// } +// +// private Set instantiateEvalationsSet() { +// Set evals = new HashSet(); +// Object[] args = new Object[]{this}; +// Class[] argTypes = new Class[]{VariantEvalWalker.class}; +// +// for ( Class c : evaluationClasses ) { +// try { +// Constructor constructor = c.getConstructor(argTypes); +// VariantEvaluator eval = constructor.newInstance(args); +// evals.add(eval); +// } catch (Exception e) { +// throw new DynamicClassResolutionException(c, e); +// } +// } +// +// return evals; +// } +//} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/NewEvaluationContext.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/NewEvaluationContext.java new file mode 100755 index 000000000..2f4eaad3c --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/NewEvaluationContext.java @@ -0,0 +1,78 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.VariantEvaluator; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.VariantStratifier; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; + +import java.util.HashMap; +import java.util.Set; +import java.util.TreeMap; + +public class NewEvaluationContext extends HashMap { + private TreeMap evaluationInstances; + + public String toString() { + String value = ""; + + for ( VariantStratifier key : this.keySet() ) { + value += "\t" + key.getClass().getSimpleName() + ":" + this.get(key) + "\n"; + } + + return value; + } + + public void addEvaluationClassList(Set> evaluationClasses) { + evaluationInstances = new TreeMap(); + + for ( Class c : evaluationClasses ) { + try { + VariantEvaluator eval = c.newInstance(); + evaluationInstances.put(c.getSimpleName(), eval); + } catch (InstantiationException e) { + throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'"); + } catch (IllegalAccessException e) { + throw new StingException("Illegal access error when trying to instantiate eval module '" + c.getSimpleName() + "'"); + } + } + } + + public TreeMap getEvaluationClassList() { + return evaluationInstances; + } + + public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) { + for ( VariantEvaluator evaluation : evaluationInstances.values() ) { + if ( evaluation.enabled() ) { + // we always call update0 in case the evaluation tracks things like number of bases covered + evaluation.update0(tracker, ref, context); + + // the other updateN methods don't see a null context + if ( tracker == null ) + continue; + + // now call the single or paired update function + switch ( evaluation.getComparisonOrder() ) { + case 1: + if (eval != null) { + evaluation.update1(eval, tracker, ref, context); + } + + break; + case 2: + if (eval != null && comp != null) { + evaluation.update2(eval, comp, tracker, ref, context); + } + + break; + default: + throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation); + } + } + } + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/StateKey.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/StateKey.java new file mode 100755 index 000000000..d1da0af68 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/StateKey.java @@ -0,0 +1,27 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util; + +import java.util.TreeMap; + +public class StateKey extends TreeMap { + public int hashCode() { + int hashCode = 1; + + for (String key : this.keySet()) { + String value = this.get(key); + + hashCode *= key.hashCode() + value.hashCode(); + } + + return hashCode; + } + + public String toString() { + String value = ""; + + for ( String key : this.keySet() ) { + value += "\tstate " + key + ":" + this.get(key) + "\n"; + } + + return value; + } +}