Intermediate commit for VariantEval 3.0. Among the changes:
* Stratifications (by comp rod, by eval rod, novelty, filter status, etc.) have been generalized. They are very symmetric with evaluators now. Each stratification can have multiple states (e.g. known, novel, all). New stratifications can be added and optionally applied. Some new stratifications include: - by sample - by functional class - by CpG status * Output is to a single file in GATKReport format, rather than having the options of CSV, R, table, etc. * Rather than needing to state up front that the allowable variant type is a SNP or an indel, each eval record is inspected and the appropriate record type is fetched from the comp track. (This will require a bit more testing...) * Evaluation context (basically a single row in a VariantEval report) generation and retrieval has been overhauled. Now, every possible configuration of stratification state is generated recursively and stored in a HashMap. The key of the HashMap is a key that represents that exact state configuration. When examining a comp track and eval track, this key is computed based on the data, providing easy lookup for the appropriate evaluation context. When there are only a handful of stratification configurations, this isn't a big deal. But when operating on a file with hundreds of samples, multipled by 3 states for novelty, 3 states for filtration, 3 states for CpG status, etc., it becomes a very big deal. There are still some known issues: * When the per-sample stratification is turned off, things are getting overcounted (too many variants are showing up when compared to the VariantEval 2.0 code). It's probably because I break out the VariantContext by sample even when not necessary, and those irrelevant contexts are still being counted. Or my recursion is overaggressively creating evaluation contexts, and they all get added up in a weird way. But that's why I'm committing now - so I can track down this issue without losing my work so far. * The Jexl expressions are sometimes throwing an exception that I don't yet understand (they complain of an incorrect specification on the command-line... *after* the program has made it through a few thousand records. * The request to have evaluations be smart enough to reject certain stratification states is not implemented yet. There's still some work to do before I can replace VariantEval 2.0 with VariantEval 3.0, but feel free to take a look. I'd love comments on the new code. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4946 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e9201b81d1
commit
fdc514ded3
|
|
@ -0,0 +1,565 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.StandardEval;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.VariantEvaluator;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.RequiredStratification;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.StandardStratification;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.VariantStratifier;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.AnalysisModuleScanner;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.NewEvaluationContext;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.StateKey;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ts/Tv ratios, and a lot more)
|
||||
*/
|
||||
@Reference(window=@Window(start=-50, stop=50))
|
||||
public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||
// Output arguments
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
// Help arguments
|
||||
@Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit")
|
||||
protected Boolean LIST = false;
|
||||
|
||||
// Partitioning the data arguments
|
||||
@Argument(shortName="select", doc="One or more stratifications to use when evaluating the data", required=false)
|
||||
protected ArrayList<String> SELECT_EXPS = new ArrayList<String>();
|
||||
|
||||
@Argument(shortName="selectName", doc="Names to use for the list of stratifications (must be a 1-to-1 mapping)", required=false)
|
||||
protected ArrayList<String> SELECT_NAMES = new ArrayList<String>();
|
||||
|
||||
@Argument(shortName="sample", doc="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context", required=false)
|
||||
protected Set<String> SAMPLE_EXPRESSIONS;
|
||||
|
||||
@Argument(shortName="knownName", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)
|
||||
protected String[] KNOWN_NAMES = {DbSNPHelper.STANDARD_DBSNP_TRACK_NAME};
|
||||
|
||||
// Stratification arguments
|
||||
@Argument(fullName="stratificationModule", shortName="ST", doc="One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified)", required=false)
|
||||
protected String[] STRATIFICATIONS_TO_USE = {};
|
||||
|
||||
@Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)")
|
||||
protected Boolean NO_STANDARD_STRATIFICATIONS = false;
|
||||
|
||||
// Evaluator arguments
|
||||
@Argument(fullName="evalModule", shortName="EV", doc="One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified)", required=false)
|
||||
protected String[] MODULES_TO_USE = {};
|
||||
|
||||
@Argument(fullName="doNotUseAllStandardModules", shortName="noEV", doc="Do not use the standard modules by default (instead, only those that are specified with the -E option)")
|
||||
protected Boolean NO_STANDARD_MODULES = false;
|
||||
|
||||
// Variables
|
||||
private Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions = new TreeSet<VariantContextUtils.JexlVCMatchExp>();
|
||||
private Set<String> compNames = new TreeSet<String>();
|
||||
private Set<String> knownNames = new TreeSet<String>();
|
||||
private Set<String> evalNames = new TreeSet<String>();
|
||||
private Set<String> sampleNames = new TreeSet<String>();
|
||||
|
||||
// The list of stratifiers and evaluators to use
|
||||
private TreeSet<VariantStratifier> stratificationObjects = null;
|
||||
private Set<Class<? extends VariantEvaluator>> evaluationObjects = null;
|
||||
|
||||
// The set of all possible evaluation contexts
|
||||
private HashMap<StateKey, NewEvaluationContext> evaluationContexts = null;
|
||||
|
||||
// Output report
|
||||
private GATKReport report = null;
|
||||
|
||||
// Public constants
|
||||
public static String ALL_SAMPLE_NAME = "all";
|
||||
|
||||
/**
|
||||
* List all of the available evaluation modules, then exit successfully
|
||||
*/
|
||||
private void listModulesAndExit() {
|
||||
List<Class<? extends VariantStratifier>> vsClasses = new PluginManager<VariantStratifier>( VariantStratifier.class ).getPlugins();
|
||||
List<Class<? extends VariantEvaluator>> veClasses = new PluginManager<VariantEvaluator>( VariantEvaluator.class ).getPlugins();
|
||||
|
||||
logger.info("Available stratifcation modules:");
|
||||
logger.info("(Standard modules are starred)");
|
||||
for (Class<? extends VariantStratifier> vsClass : vsClasses) {
|
||||
logger.info("\t" + vsClass.getSimpleName() + (RequiredStratification.class.isAssignableFrom(vsClass) || StandardStratification.class.isAssignableFrom(vsClass) ? "*" : ""));
|
||||
}
|
||||
logger.info("");
|
||||
|
||||
logger.info("Available eval modules:");
|
||||
logger.info("(Standard modules are starred)");
|
||||
for (Class<? extends VariantEvaluator> veClass : veClasses) {
|
||||
logger.info("\t" + veClass.getSimpleName() + (StandardEval.class.isAssignableFrom(veClass) ? "*" : ""));
|
||||
}
|
||||
logger.info("");
|
||||
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
private TreeSet<VariantStratifier> initializeStratificationObjects(boolean noStandardStrats, String[] modulesToUse) {
|
||||
TreeSet<VariantStratifier> strats = new TreeSet<VariantStratifier>();
|
||||
Set<String> stratsToUse = new HashSet<String>();
|
||||
|
||||
// Create a map for all stratification modules for easy lookup.
|
||||
HashMap<String, Class<? extends VariantStratifier>> classMap = new HashMap<String, Class<? extends VariantStratifier>>();
|
||||
for ( Class<? extends VariantStratifier> c : new PluginManager<VariantStratifier>( VariantStratifier.class ).getPlugins() ) {
|
||||
classMap.put(c.getSimpleName(), c);
|
||||
}
|
||||
|
||||
// We must use all required stratification modules.
|
||||
for ( Class<? extends RequiredStratification> reqClass : new PluginManager<RequiredStratification>( RequiredStratification.class ).getPlugins() ) {
|
||||
if ( classMap.containsKey(reqClass.getSimpleName()) ) {
|
||||
stratsToUse.add(reqClass.getSimpleName());
|
||||
}
|
||||
}
|
||||
|
||||
// By default, use standard stratification modules.
|
||||
if ( !noStandardStrats ) {
|
||||
for ( Class<? extends StandardStratification> stdClass : new PluginManager<StandardStratification>( StandardStratification.class ).getPlugins() ) {
|
||||
if ( classMap.containsKey(stdClass.getSimpleName()) ) {
|
||||
stratsToUse.add(stdClass.getSimpleName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now add the user-selected modules
|
||||
stratsToUse.addAll(Arrays.asList(modulesToUse));
|
||||
|
||||
// Instantiate the stratifications
|
||||
for ( String module : stratsToUse ) {
|
||||
if ( !classMap.containsKey(module) ) {
|
||||
throw new UserException.CommandLineException("Module " + module + " could not be found; please check that you have specified the class name correctly");
|
||||
}
|
||||
|
||||
if ( classMap.containsKey(module) ) {
|
||||
Class<? extends VariantStratifier> c = classMap.get(module);
|
||||
|
||||
try {
|
||||
VariantStratifier vs = c.newInstance();
|
||||
vs.initialize(jexlExpressions, compNames, knownNames, evalNames, sampleNames);
|
||||
|
||||
strats.add(vs);
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException("Unable to instantiate stratification module '" + c.getSimpleName() + "'");
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new StingException("Illegal access error when trying to instantiate stratification module '" + c.getSimpleName() + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return strats;
|
||||
}
|
||||
|
||||
private Set<Class<? extends VariantEvaluator>> initializeEvaluationObjects(boolean noStandardEvals, String[] modulesToUse) {
|
||||
Set<Class<? extends VariantEvaluator>> evals = new HashSet<Class<? extends VariantEvaluator>>();
|
||||
|
||||
// Create a map for all eval modules for easy lookup.
|
||||
HashMap<String, Class<? extends VariantEvaluator>> classMap = new HashMap<String, Class<? extends VariantEvaluator>>();
|
||||
for ( Class<? extends VariantEvaluator> c : new PluginManager<VariantEvaluator>( VariantEvaluator.class ).getPlugins() ) {
|
||||
classMap.put(c.getSimpleName(), c);
|
||||
}
|
||||
|
||||
// By default, use standard eval modules.
|
||||
if ( !noStandardEvals ) {
|
||||
for ( Class<? extends StandardEval> stdClass : new PluginManager<StandardEval>( StandardEval.class ).getPlugins() ) {
|
||||
if ( classMap.containsKey(stdClass.getSimpleName()) ) {
|
||||
evals.add(classMap.get(stdClass.getSimpleName()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get the specific classes provided.
|
||||
for ( String module : modulesToUse ) {
|
||||
if ( !classMap.containsKey(module) ) {
|
||||
throw new UserException.CommandLineException("Module " + module + " could not be found; please check that you have specified the class name correctly");
|
||||
}
|
||||
|
||||
if ( classMap.containsKey(module) ) {
|
||||
evals.add(classMap.get(module));
|
||||
}
|
||||
}
|
||||
|
||||
return evals;
|
||||
}
|
||||
|
||||
private HashMap<StateKey, NewEvaluationContext> initializeEvaluationContexts(Set<VariantStratifier> stratificationObjects, Set<Class<? extends VariantEvaluator>> evaluationObjects, Stack<VariantStratifier> stratStack, NewEvaluationContext ec) {
|
||||
HashMap<StateKey, NewEvaluationContext> ecs = new HashMap<StateKey, NewEvaluationContext>();
|
||||
|
||||
if (stratStack == null) {
|
||||
stratStack = new Stack<VariantStratifier>();
|
||||
stratStack.addAll(stratificationObjects);
|
||||
}
|
||||
|
||||
if (!stratStack.isEmpty()) {
|
||||
Stack<VariantStratifier> newStratStack = new Stack<VariantStratifier>();
|
||||
newStratStack.addAll(stratStack);
|
||||
|
||||
NewEvaluationContext nec = new NewEvaluationContext();
|
||||
if (ec != null) {
|
||||
nec.putAll(ec);
|
||||
}
|
||||
|
||||
VariantStratifier vs = newStratStack.pop();
|
||||
|
||||
for ( String state : vs.getAllStates() ) {
|
||||
nec.put(vs, state);
|
||||
|
||||
ecs.putAll(initializeEvaluationContexts(stratificationObjects, evaluationObjects, newStratStack, nec));
|
||||
}
|
||||
} else {
|
||||
HashMap<StateKey, NewEvaluationContext> necs = new HashMap<StateKey, NewEvaluationContext>();
|
||||
|
||||
StateKey statekey = new StateKey();
|
||||
for ( VariantStratifier vs : ec.keySet() ) {
|
||||
String state = ec.get(vs);
|
||||
|
||||
statekey.put(vs.getClass().getSimpleName(), state);
|
||||
}
|
||||
|
||||
ec.addEvaluationClassList(evaluationObjects);
|
||||
|
||||
necs.put(statekey, ec);
|
||||
|
||||
return necs;
|
||||
}
|
||||
|
||||
return ecs;
|
||||
}
|
||||
|
||||
private GATKReport initializeGATKReport(Set<VariantStratifier> stratificationObjects, Set<Class<? extends VariantEvaluator>> evaluationObjects) {
|
||||
GATKReport report = new GATKReport();
|
||||
|
||||
for ( Class<? extends VariantEvaluator> ve : evaluationObjects ) {
|
||||
String tableName = ve.getSimpleName();
|
||||
String tableDesc = ve.getAnnotation(Analysis.class).description();
|
||||
|
||||
report.addTable(tableName, tableDesc);
|
||||
|
||||
GATKReportTable table = report.getTable(tableName);
|
||||
table.addPrimaryKey("entry", false);
|
||||
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
String columnName = vs.getClass().getSimpleName();
|
||||
|
||||
columnName = columnName.replace("Stratifier", "");
|
||||
columnName = columnName.replace("Status", "");
|
||||
|
||||
table.addColumn(columnName, "unknown");
|
||||
}
|
||||
|
||||
AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
|
||||
Map<Field, DataPoint> datamap = scanner.getData();
|
||||
|
||||
for (Field field : datamap.keySet()) {
|
||||
table.addColumn(field.getName(), 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
return report;
|
||||
}
|
||||
|
||||
public void initialize() {
|
||||
// Just list the modules, and exit quickly.
|
||||
if (LIST) { listModulesAndExit(); }
|
||||
|
||||
// Categorize each rod as an eval or a comp rod.
|
||||
for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
|
||||
if ( d.getName().startsWith("eval") ) {
|
||||
evalNames.add(d.getName());
|
||||
} else if ( d.getName().startsWith("comp") || d.getName().startsWith(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
|
||||
compNames.add(d.getName());
|
||||
} else {
|
||||
logger.info(String.format("Not evaluating ROD binding '%s' because the name did not start with %s, comp, or eval", d.getName(), Utils.join(", ", KNOWN_NAMES)));
|
||||
}
|
||||
}
|
||||
|
||||
// Set up set of known names
|
||||
knownNames.addAll(Arrays.asList(KNOWN_NAMES));
|
||||
|
||||
// Now that we have all the rods categorized, determine the sample list from the eval rods.
|
||||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evalNames);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
sampleNames.addAll(SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS));
|
||||
|
||||
// Initialize select expressions
|
||||
jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS));
|
||||
|
||||
// Initialize the set of stratifications and evaluations to use
|
||||
stratificationObjects = initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
|
||||
evaluationObjects = initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
|
||||
|
||||
// Initialize the evaluation contexts
|
||||
evaluationContexts = initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null);
|
||||
|
||||
// Initialize report table
|
||||
report = initializeGATKReport(stratificationObjects, evaluationObjects);
|
||||
}
|
||||
|
||||
private EnumSet<VariantContext.Type> getAllowableVariationTypes(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> evalNames) {
|
||||
EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION);
|
||||
|
||||
Collection<VariantContext> vcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false);
|
||||
|
||||
for ( VariantContext vc : vcs ) {
|
||||
allowableTypes.add(vc.getType());
|
||||
}
|
||||
|
||||
return allowableTypes;
|
||||
}
|
||||
|
||||
/**
|
||||
* For a list of track names, bind the variant contexts to a trackName->sampleName->VariantContext mapping.
|
||||
* Additional variant contexts per sample are automatically generated and added to the map unless the
|
||||
* sample name matches the ALL_SAMPLE_NAME constant.
|
||||
*
|
||||
* @param tracker the metadata tracker
|
||||
* @param ref the reference context
|
||||
* @param trackNames the list of track names to process
|
||||
* @param sampleNames the list of samples to include
|
||||
* @param allowableTypes a set of allowable variation types
|
||||
* @return a mapping of track names to a list of VariantContext objects
|
||||
*/
|
||||
private HashMap<String, HashMap<String, VariantContext>> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> trackNames, Set<String> sampleNames, EnumSet<VariantContext.Type> allowableTypes) {
|
||||
HashMap<String, HashMap<String, VariantContext>> bindings = new HashMap<String, HashMap<String, VariantContext>>();
|
||||
|
||||
for ( String trackName : trackNames ) {
|
||||
Collection<VariantContext> contexts = tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true);
|
||||
|
||||
VariantContext vc = contexts.size() == 1 ? contexts.iterator().next() : null;
|
||||
|
||||
HashMap<String, VariantContext> vcs = new HashMap<String, VariantContext>();
|
||||
|
||||
if ( vc != null ) {
|
||||
for ( String sample : sampleNames ) {
|
||||
VariantContext vcsub = vc;
|
||||
|
||||
if (!sample.equals(ALL_SAMPLE_NAME)) {
|
||||
vcsub = vcsub.subContextFromGenotypes(vcsub.getGenotype(sample));
|
||||
|
||||
HashMap<String,Object> newAts = new HashMap<String,Object>(vcsub.getAttributes());
|
||||
VariantContextUtils.calculateChromosomeCounts(vcsub,newAts,true);
|
||||
vcsub = VariantContext.modifyAttributes(vcsub,newAts);
|
||||
|
||||
logger.debug(String.format("VC %s subset to %s AC%n", vc.getSource(), vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY)));
|
||||
}
|
||||
|
||||
vcs.put(sample, vcsub);
|
||||
}
|
||||
|
||||
bindings.put(trackName, vcs);
|
||||
}
|
||||
}
|
||||
|
||||
return bindings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps track names to sample name to VariantContext objects. For eval tracks, VariantContexts per specified
|
||||
* sample are also included.
|
||||
*
|
||||
* @param tracker the metadata tracker
|
||||
* @param ref the reference context
|
||||
* @param compNames the list of comp names to process
|
||||
* @param evalNames the list of eval names to process
|
||||
* @param sampleNames the list of samples to include
|
||||
* @return a mapping of track names to a list of VariantContext objects
|
||||
*/
|
||||
private HashMap<String, HashMap<String, VariantContext>> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> compNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
HashMap<String, HashMap<String, VariantContext>> vcs = new HashMap<String, HashMap<String, VariantContext>>();
|
||||
|
||||
Set<String> allSamplesList = new HashSet<String>();
|
||||
allSamplesList.add(ALL_SAMPLE_NAME);
|
||||
|
||||
EnumSet<VariantContext.Type> allowableTypes = getAllowableVariationTypes(tracker, ref, evalNames);
|
||||
|
||||
HashMap<String, HashMap<String, VariantContext>> compBindings = bindVariantContexts(tracker, ref, compNames, allSamplesList, allowableTypes);
|
||||
HashMap<String, HashMap<String, VariantContext>> evalBindings = bindVariantContexts(tracker, ref, evalNames, sampleNames, allowableTypes);
|
||||
|
||||
//HashMap<String, HashMap<String, VariantContext>> evalBindings;
|
||||
//if (stratificationObjects.contains(SampleStratifier) {
|
||||
// evalBindings = bindVariantContexts(tracker, ref, evalNames, sampleNames, allowableTypes);
|
||||
//} else {
|
||||
// evalBindings = bindVariantContexts(tracker, ref, evalNames, allSamplesList, allowableTypes);
|
||||
//}
|
||||
|
||||
vcs.putAll(compBindings);
|
||||
vcs.putAll(evalBindings);
|
||||
|
||||
return vcs;
|
||||
}
|
||||
|
||||
private ArrayList<StateKey> initializeStateKeys(HashMap<VariantStratifier, ArrayList<String>> stateMap, Stack<HashMap<VariantStratifier, ArrayList<String>>> stateStack, StateKey stateKey) {
|
||||
ArrayList<StateKey> stateKeys = new ArrayList<StateKey>();
|
||||
|
||||
if (stateStack == null) {
|
||||
stateStack = new Stack<HashMap<VariantStratifier, ArrayList<String>>>();
|
||||
|
||||
for ( VariantStratifier vs : stateMap.keySet() ) {
|
||||
HashMap<VariantStratifier, ArrayList<String>> oneSetOfStates = new HashMap<VariantStratifier, ArrayList<String>>();
|
||||
oneSetOfStates.put(vs, stateMap.get(vs));
|
||||
|
||||
stateStack.add(oneSetOfStates);
|
||||
}
|
||||
}
|
||||
|
||||
if (!stateStack.isEmpty()) {
|
||||
Stack<HashMap<VariantStratifier, ArrayList<String>>> newStateStack = new Stack<HashMap<VariantStratifier, ArrayList<String>>>();
|
||||
newStateStack.addAll(stateStack);
|
||||
|
||||
StateKey newStateKey = new StateKey();
|
||||
if (stateKey != null) {
|
||||
newStateKey.putAll(stateKey);
|
||||
}
|
||||
|
||||
HashMap<VariantStratifier, ArrayList<String>> oneSetOfStates = newStateStack.pop();
|
||||
VariantStratifier vs = oneSetOfStates.keySet().iterator().next();
|
||||
|
||||
for ( String state : oneSetOfStates.get(vs)) {
|
||||
newStateKey.put(vs.getClass().getSimpleName(), state);
|
||||
|
||||
stateKeys.addAll(initializeStateKeys(stateMap, newStateStack, newStateKey));
|
||||
}
|
||||
} else {
|
||||
ArrayList<StateKey> newStateKeys = new ArrayList<StateKey>();
|
||||
|
||||
newStateKeys.add(stateKey);
|
||||
|
||||
return newStateKeys;
|
||||
}
|
||||
|
||||
return stateKeys;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (tracker != null) {
|
||||
// track sample vc
|
||||
HashMap<String, HashMap<String, VariantContext>> vcs = getVariantContexts(tracker, ref, compNames, evalNames, sampleNames);
|
||||
|
||||
for ( String compName : compNames ) {
|
||||
VariantContext comp = vcs.containsKey(compName) && vcs.get(compName) != null && vcs.get(compName).containsKey(ALL_SAMPLE_NAME) ? vcs.get(compName).get(ALL_SAMPLE_NAME) : null;
|
||||
|
||||
for ( String evalName : evalNames ) {
|
||||
for ( String sampleName : sampleNames ) {
|
||||
VariantContext eval = vcs.get(evalName) == null ? null : vcs.get(evalName).get(sampleName);
|
||||
|
||||
HashMap<VariantStratifier, ArrayList<String>> stateMap = new HashMap<VariantStratifier, ArrayList<String>>();
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
ArrayList<String> states = vs.getRelevantStates(ref, comp, eval, sampleName);
|
||||
stateMap.put(vs, states);
|
||||
}
|
||||
|
||||
ArrayList<StateKey> stateKeys = initializeStateKeys(stateMap, null, null);
|
||||
|
||||
for ( StateKey stateKey : stateKeys ) {
|
||||
NewEvaluationContext nec = evaluationContexts.get(stateKey);
|
||||
|
||||
nec.apply(tracker, ref, context, comp, eval);
|
||||
}
|
||||
|
||||
//logger.info(ref.getLocus());
|
||||
//logger.info("\tcomp: " + comp);
|
||||
//logger.info("\teval: " + eval);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* A composite, 'reduce of reduces' function.
|
||||
*
|
||||
* @param lhs 'left-most' portion of data in the composite reduce.
|
||||
* @param rhs 'right-most' portion of data in the composite reduce.
|
||||
* @return The composite reduce type.
|
||||
*/
|
||||
public Integer treeReduce(Integer lhs, Integer rhs) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
for ( StateKey stateKey : evaluationContexts.keySet() ) {
|
||||
NewEvaluationContext nec = evaluationContexts.get(stateKey);
|
||||
|
||||
for ( VariantEvaluator ve : nec.getEvaluationClassList().values() ) {
|
||||
ve.finalizeEvaluation();
|
||||
|
||||
AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
|
||||
Map<Field, DataPoint> datamap = scanner.getData();
|
||||
|
||||
GATKReportTable table = report.getTable(ve.getClass().getSimpleName());
|
||||
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
String columnName = vs.getClass().getSimpleName();
|
||||
|
||||
columnName = columnName.replace("Stratifier", "");
|
||||
columnName = columnName.replace("Status", "");
|
||||
|
||||
table.set(stateKey.toString(), columnName, stateKey.get(vs.getClass().getSimpleName()));
|
||||
}
|
||||
|
||||
for (Field field : datamap.keySet()) {
|
||||
try {
|
||||
field.setAccessible(true);
|
||||
table.set(stateKey.toString(), field.getName(), field.get(ve));
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new ReviewedStingException("Unable to access a data field in a VariantEval analysis module: " + e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
report.print(out);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
class CompEvalGenotypes {
|
||||
private GenomeLoc loc;
|
||||
private Genotype compGt;
|
||||
private Genotype evalGt;
|
||||
|
||||
public CompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) {
|
||||
this.loc = loc;
|
||||
this.compGt = compGt;
|
||||
this.evalGt = evalGt;
|
||||
}
|
||||
|
||||
public GenomeLoc getLocus() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
public Genotype getCompGenotpye() {
|
||||
return compGt;
|
||||
}
|
||||
public Genotype getEvalGenotype() {
|
||||
return evalGt;
|
||||
}
|
||||
|
||||
public void setCompGenotype(Genotype compGt) {
|
||||
this.compGt = compGt;
|
||||
}
|
||||
|
||||
public void setEvalGenotype(Genotype evalGt) {
|
||||
this.evalGt = evalGt;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
@Analysis(description = "The overlap between eval and comp sites")
|
||||
public class CompOverlap extends VariantEvaluator implements StandardEval {
|
||||
|
||||
@DataPoint(description = "number of eval SNP sites")
|
||||
long nEvalSNPs = 0;
|
||||
|
||||
@DataPoint(description = "number of comp SNP sites")
|
||||
long nCompSNPs = 0;
|
||||
|
||||
@DataPoint(description = "number of eval sites outside of comp sites")
|
||||
long novelSites = 0;
|
||||
|
||||
@DataPoint(description = "number of eval sites at comp sites")
|
||||
long nSNPsAtComp = 0;
|
||||
|
||||
@DataPoint(description = "percentage of eval sites at comp sites")
|
||||
double compRate = 0.0;
|
||||
|
||||
@DataPoint(description = "number of concordant sites")
|
||||
long nConcordant = 0;
|
||||
|
||||
@DataPoint(description = "the concordance rate")
|
||||
double concordantRate = 0.0;
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 2; // we need to see each eval track and each comp track
|
||||
}
|
||||
|
||||
public long nNovelSites() { return nEvalSNPs - nSNPsAtComp; }
|
||||
public double compRate() { return rate(nSNPsAtComp, nEvalSNPs); }
|
||||
public double concordanceRate() { return rate(nConcordant, nSNPsAtComp); }
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
compRate = 100 * compRate();
|
||||
concordantRate = 100 * concordanceRate();
|
||||
novelSites = nNovelSites();
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if every allele in eval is also in comp
|
||||
*
|
||||
* @param eval eval context
|
||||
* @param comp db context
|
||||
* @return true if eval and db are discordant
|
||||
*/
|
||||
public boolean discordantP(VariantContext eval, VariantContext comp) {
|
||||
for (Allele a : eval.getAlleles()) {
|
||||
if (!comp.hasAllele(a, true))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean expectingIndels = false;
|
||||
|
||||
boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
|
||||
boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ;
|
||||
|
||||
if (compIsGood) nCompSNPs++; // count the number of comp events
|
||||
if (evalIsGood) nEvalSNPs++; // count the number of eval events
|
||||
|
||||
if (compIsGood && evalIsGood) {
|
||||
nSNPsAtComp++;
|
||||
|
||||
if (!discordantP(eval, comp)) // count whether we're concordant or not with the comp value
|
||||
nConcordant++;
|
||||
}
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
@Analysis(description = "Counts different classes of variants in the sample")
|
||||
public class CountVariants extends VariantEvaluator implements StandardEval {
|
||||
|
||||
// the following fields are in output order:
|
||||
|
||||
// basic counts on various rates found
|
||||
@DataPoint(description = "Number of processed loci")
|
||||
long nProcessedLoci = 0;
|
||||
@DataPoint(description = "Number of called loci")
|
||||
long nCalledLoci = 0;
|
||||
@DataPoint(description = "Number of reference loci")
|
||||
long nRefLoci = 0;
|
||||
@DataPoint(description = "Number of variant loci")
|
||||
long nVariantLoci = 0;
|
||||
|
||||
// the following two calculations get set in the finalizeEvaluation
|
||||
@DataPoint(description = "Variants per loci rate")
|
||||
double variantRate = 0;
|
||||
@DataPoint(description = "Number of variants per base")
|
||||
double variantRatePerBp = 0;
|
||||
|
||||
|
||||
@DataPoint(description = "Number of snp loci")
|
||||
long nSNPs = 0;
|
||||
@DataPoint(description = "Number of insertions")
|
||||
long nInsertions = 0;
|
||||
@DataPoint(description = "Number of deletions")
|
||||
long nDeletions = 0;
|
||||
@DataPoint(description = "Number of complex loci")
|
||||
long nComplex = 0;
|
||||
|
||||
@DataPoint(description = "Number of no calls loci")
|
||||
long nNoCalls = 0;
|
||||
@DataPoint(description = "Number of het loci")
|
||||
long nHets = 0;
|
||||
@DataPoint(description = "Number of hom ref loci")
|
||||
long nHomRef = 0;
|
||||
@DataPoint(description = "Number of hom var loci")
|
||||
long nHomVar = 0;
|
||||
@DataPoint(description = "Number of singletons")
|
||||
long nSingletons = 0;
|
||||
|
||||
// calculations that get set in the finalizeEvaluation method
|
||||
@DataPoint(description = "heterozygosity per locus rate")
|
||||
double heterozygosity = 0;
|
||||
@DataPoint(description = "heterozygosity per base pair")
|
||||
double heterozygosityPerBp = 0;
|
||||
@DataPoint(description = "heterozygosity to homozygosity ratio")
|
||||
double hetHomRatio = 0;
|
||||
@DataPoint(description = "indel rate (insertion count + deletion count)")
|
||||
double indelRate = 0;
|
||||
@DataPoint(description = "indel rate per base pair")
|
||||
double indelRatePerBp = 0;
|
||||
@DataPoint(description = "deletion to insertion ratio")
|
||||
double deletionInsertionRatio = 0;
|
||||
|
||||
private double perLocusRate(long n) {
|
||||
return rate(n, nProcessedLoci);
|
||||
}
|
||||
|
||||
private long perLocusRInverseRate(long n) {
|
||||
return inverseRate(n, nProcessedLoci);
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 1; // we only need to see each eval track
|
||||
}
|
||||
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
|
||||
}
|
||||
|
||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
//nProcessedLoci++;
|
||||
nCalledLoci++;
|
||||
|
||||
if (vc1.isVariant()) nVariantLoci++;
|
||||
switch (vc1.getType()) {
|
||||
case NO_VARIATION:
|
||||
nRefLoci++;
|
||||
break;
|
||||
case SNP:
|
||||
nSNPs++;
|
||||
if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
|
||||
break;
|
||||
case INDEL:
|
||||
if (vc1.isInsertion()) nInsertions++;
|
||||
else nDeletions++;
|
||||
break;
|
||||
case MIXED:
|
||||
nComplex++;
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType());
|
||||
}
|
||||
|
||||
for (Genotype g : vc1.getGenotypes().values()) {
|
||||
switch (g.getType()) {
|
||||
case NO_CALL:
|
||||
nNoCalls++;
|
||||
break;
|
||||
case HOM_REF:
|
||||
nHomRef++;
|
||||
break;
|
||||
case HET:
|
||||
nHets++;
|
||||
break;
|
||||
case HOM_VAR:
|
||||
nHomVar++;
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("BUG: Unexpected genotype type: " + g);
|
||||
}
|
||||
}
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
variantRate = perLocusRate(nVariantLoci);
|
||||
variantRatePerBp = perLocusRInverseRate(nVariantLoci);
|
||||
heterozygosity = perLocusRate(nHets);
|
||||
heterozygosityPerBp = perLocusRInverseRate(nHets);
|
||||
hetHomRatio = ratio(nHets, nHomVar);
|
||||
indelRate = perLocusRate(nDeletions + nInsertions);
|
||||
indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions);
|
||||
deletionInsertionRatio = ratio(nDeletions, nInsertions);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings
|
||||
* | File Templates.
|
||||
*/
|
||||
class PhaseStats {
|
||||
public int neitherPhased;
|
||||
public int onlyCompPhased;
|
||||
public int onlyEvalPhased;
|
||||
public int phasesAgree;
|
||||
public int phasesDisagree;
|
||||
|
||||
public PhaseStats() {
|
||||
this.neitherPhased = 0;
|
||||
this.onlyCompPhased = 0;
|
||||
this.onlyEvalPhased = 0;
|
||||
this.phasesAgree = 0;
|
||||
this.phasesDisagree = 0;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static String[] getFieldNamesArray() {
|
||||
return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"};
|
||||
}
|
||||
|
||||
public Object getField(int index) {
|
||||
switch (index) {
|
||||
case (0):
|
||||
return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree);
|
||||
case (1):
|
||||
return neitherPhased;
|
||||
case (2):
|
||||
return onlyCompPhased;
|
||||
case (3):
|
||||
return onlyEvalPhased;
|
||||
case (4):
|
||||
return (phasesAgree + phasesDisagree);
|
||||
case (5):
|
||||
return phasesAgree;
|
||||
case (6):
|
||||
return phasesDisagree;
|
||||
case (7):
|
||||
return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree));
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings
|
||||
* | File Templates.
|
||||
*/
|
||||
class SamplePreviousGenotypes {
|
||||
private HashMap<String, CompEvalGenotypes> sampleGenotypes = null;
|
||||
|
||||
public SamplePreviousGenotypes() {
|
||||
this.sampleGenotypes = new HashMap<String, CompEvalGenotypes>();
|
||||
}
|
||||
|
||||
public CompEvalGenotypes get(String sample) {
|
||||
return sampleGenotypes.get(sample);
|
||||
}
|
||||
|
||||
public void put(String sample, CompEvalGenotypes compEvalGts) {
|
||||
sampleGenotypes.put(sample, compEvalGts);
|
||||
}
|
||||
|
||||
public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) {
|
||||
sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||
|
||||
public interface StandardEval {}
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
|
||||
@Analysis(description = "Computes different estimates of theta based on variant sites and genotypes")
|
||||
public class ThetaVariantEvaluator extends VariantEvaluator {
|
||||
@DataPoint(description = "Average heterozygosity at variant sites; note that missing genotypes are ignored when computing this value")
|
||||
double avgHet = 0.0;
|
||||
@DataPoint(description = "Average pairwise differences at aligned sequences; averaged over both number of sequeneces and number of variant sites; note that missing genotypes are ignored when computing this value")
|
||||
double avgAvgDiffs = 0.0;
|
||||
@DataPoint(description = "Sum of heterozygosity over all variant sites; divide this by total target to get estimate of per base theta")
|
||||
double totalHet = 0.0;
|
||||
@DataPoint(description = "Sum of pairwise diffs over all variant sites; divide this by total target to get estimate of per base theta")
|
||||
double totalAvgDiffs = 0.0;
|
||||
@DataPoint(description = "Theta for entire region estimated based on number of segregating sites; divide ths by total target to get estimate of per base theta")
|
||||
double thetaRegionNumSites = 0.0;
|
||||
|
||||
//helper variables
|
||||
double numSites = 0;
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) {
|
||||
return null; //no interesting sites
|
||||
}
|
||||
|
||||
if (vc.hasGenotypes()) {
|
||||
|
||||
//this maps allele to a count
|
||||
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
|
||||
|
||||
int numHetsHere = 0;
|
||||
float numGenosHere = 0;
|
||||
int numIndsHere = 0;
|
||||
|
||||
for (Genotype genotype : vc.getGenotypes().values()) {
|
||||
numIndsHere++;
|
||||
if (!genotype.isNoCall()) {
|
||||
//increment stats for heterozygosity
|
||||
if (genotype.isHet()) {
|
||||
numHetsHere++;
|
||||
}
|
||||
|
||||
numGenosHere++;
|
||||
//increment stats for pairwise mismatches
|
||||
|
||||
for (Allele allele : genotype.getAlleles()) {
|
||||
if (allele.isNonNull() && allele.isCalled()) {
|
||||
String alleleString = allele.toString();
|
||||
alleleCounts.putIfAbsent(alleleString, 0);
|
||||
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (numGenosHere > 0) {
|
||||
//only if have one called genotype at least
|
||||
this.numSites++;
|
||||
|
||||
this.totalHet += numHetsHere / numGenosHere;
|
||||
|
||||
//compute based on num sites
|
||||
float harmonicFactor = 0;
|
||||
for (int i = 1; i <= numIndsHere; i++) {
|
||||
harmonicFactor += 1.0 / i;
|
||||
}
|
||||
this.thetaRegionNumSites += 1.0 / harmonicFactor;
|
||||
|
||||
//now compute pairwise mismatches
|
||||
float numPairwise = 0;
|
||||
float numDiffs = 0;
|
||||
for (String allele1 : alleleCounts.keySet()) {
|
||||
int allele1Count = alleleCounts.get(allele1);
|
||||
|
||||
for (String allele2 : alleleCounts.keySet()) {
|
||||
if (allele1.compareTo(allele2) < 0) {
|
||||
continue;
|
||||
}
|
||||
if (allele1 .compareTo(allele2) == 0) {
|
||||
numPairwise += allele1Count * (allele1Count - 1) * .5;
|
||||
|
||||
}
|
||||
else {
|
||||
int allele2Count = alleleCounts.get(allele2);
|
||||
numPairwise += allele1Count * allele2Count;
|
||||
numDiffs += allele1Count * allele2Count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numPairwise > 0) {
|
||||
this.totalAvgDiffs += numDiffs / numPairwise;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finalizeEvaluation() {
|
||||
|
||||
if (this.numSites > 0) {
|
||||
|
||||
this.avgHet = this.totalHet / this.numSites;
|
||||
this.avgAvgDiffs = this.totalAvgDiffs / this.numSites;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||
|
||||
@Analysis(description = "Ti/Tv Variant Evaluator")
|
||||
public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval {
|
||||
|
||||
@DataPoint(description = "number of transition loci")
|
||||
long nTi = 0;
|
||||
@DataPoint(description = "number of transversion loci")
|
||||
long nTv = 0;
|
||||
@DataPoint(description = "the transition to transversion ratio")
|
||||
double tiTvRatio = 0.0;
|
||||
@DataPoint(description = "number of comp transition sites")
|
||||
long nTiInComp = 0;
|
||||
@DataPoint(description = "number of comp transversion sites")
|
||||
long nTvInComp = 0;
|
||||
@DataPoint(description = "the transition to transversion ratio for comp sites")
|
||||
double TiTvRatioStandard = 0.0;
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 2; // we only need to see each eval track
|
||||
}
|
||||
|
||||
public void updateTiTv(VariantContext vc, boolean updateStandard) {
|
||||
if (vc != null && vc.isSNP() && vc.isBiallelic()) {
|
||||
if (VariantContextUtils.isTransition(vc)) {
|
||||
if (updateStandard) nTiInComp++;
|
||||
else nTi++;
|
||||
} else {
|
||||
if (updateStandard) nTvInComp++;
|
||||
else nTv++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc1 != null) updateTiTv(vc1, false);
|
||||
if (vc2 != null) updateTiTv(vc2, true);
|
||||
|
||||
return null; // we don't capture any intersting sites
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finalizeEvaluation() {
|
||||
// the ti/tv ratio needs to be set (it's not calculated per-variant).
|
||||
this.tiTvRatio = rate(nTi,nTv);
|
||||
this.TiTvRatioStandard = rate(nTiInComp, nTvInComp);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.NewEvaluationContext;
|
||||
|
||||
public abstract class VariantEvaluator {
|
||||
public abstract boolean enabled();
|
||||
|
||||
// Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2
|
||||
public abstract int getComparisonOrder();
|
||||
|
||||
// called at all sites, regardless of eval context itself; useful for counting processed bases
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
|
||||
|
||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, NewEvaluationContext group) {
|
||||
return update1(vc1, tracker, ref, context);
|
||||
}
|
||||
|
||||
|
||||
public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, NewEvaluationContext group) {
|
||||
return update2(vc1, vc2, tracker, ref, context);
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {}
|
||||
|
||||
protected double rate(long n, long d) {
|
||||
return n / (1.0 * Math.max(d, 1));
|
||||
}
|
||||
|
||||
protected long inverseRate(long n, long d) {
|
||||
return n == 0 ? 0 : d / Math.max(n, 1);
|
||||
}
|
||||
|
||||
protected double ratio(long num, long denom) {
|
||||
return ((double)num) / (Math.max(denom, 1));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,245 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||
|
||||
/**
|
||||
* @author rpoplin
|
||||
* @since Apr 6, 2010
|
||||
*/
|
||||
|
||||
//@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score")
|
||||
//public class VariantQualityScore extends VariantEvaluator {
|
||||
//
|
||||
// // a mapping from quality score histogram bin to Ti/Tv ratio
|
||||
// @DataPoint(name="TiTv by Quality", description = "the Ti/Tv ratio broken out by variant quality")
|
||||
// TiTvStats titvStats;
|
||||
//
|
||||
// @DataPoint(name="Quality by Allele Count", description = "average variant quality for each allele count")
|
||||
// AlleleCountStats alleleCountStats = null;
|
||||
//
|
||||
// static class TiTvStats implements TableType {
|
||||
// final static int NUM_BINS = 20;
|
||||
// final HashMap<Integer, Pair<Long,Long>> qualByIsTransition = new HashMap<Integer, Pair<Long,Long>>(); // A hashMap holds all the qualities until we are able to bin them appropriately
|
||||
// final long transitionByQuality[] = new long[NUM_BINS];
|
||||
// final long transversionByQuality[] = new long[NUM_BINS];
|
||||
// final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out
|
||||
//
|
||||
// public Object[] getRowKeys() {
|
||||
// return new String[]{"sample"};
|
||||
// }
|
||||
//
|
||||
// public Object[] getColumnKeys() {
|
||||
// final String columnKeys[] = new String[NUM_BINS];
|
||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// columnKeys[iii] = "titvBin" + iii;
|
||||
// }
|
||||
// return columnKeys;
|
||||
// }
|
||||
//
|
||||
// public String getName() {
|
||||
// return "TiTvStats";
|
||||
// }
|
||||
//
|
||||
// public String getCell(int x, int y) {
|
||||
// return String.valueOf(titvByQuality[y]);
|
||||
// }
|
||||
//
|
||||
// public String toString() {
|
||||
// StringBuffer returnString = new StringBuffer();
|
||||
// // output the ti/tv array
|
||||
// returnString.append("titvByQuality: ");
|
||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// returnString.append(titvByQuality[iii]);
|
||||
// returnString.append(" ");
|
||||
// }
|
||||
// return returnString.toString();
|
||||
// }
|
||||
//
|
||||
// public void incrValue( final double qual, final boolean isTransition ) {
|
||||
// final Integer qualKey = Math.round((float) qual);
|
||||
// final long numTransition = (isTransition ? 1L : 0L);
|
||||
// final long numTransversion = (isTransition ? 0L : 1L);
|
||||
// if( qualByIsTransition.containsKey(qualKey) ) {
|
||||
// Pair<Long,Long> transitionPair = qualByIsTransition.get(qualKey);
|
||||
// transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion);
|
||||
// qualByIsTransition.put(qualKey, transitionPair);
|
||||
// } else {
|
||||
// qualByIsTransition.put(qualKey, new Pair<Long,Long>(numTransition,numTransversion));
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public void organizeTiTvTables() {
|
||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// transitionByQuality[iii] = 0L;
|
||||
// transversionByQuality[iii] = 0L;
|
||||
// titvByQuality[iii] = 0.0;
|
||||
// }
|
||||
//
|
||||
// int maxQual = 0;
|
||||
//
|
||||
// // Calculate the maximum quality score in order to normalize and histogram
|
||||
// for( final Integer qual : qualByIsTransition.keySet() ) {
|
||||
// if( qual > maxQual ) {
|
||||
// maxQual = qual;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1));
|
||||
//
|
||||
// for( final Integer qual : qualByIsTransition.keySet() ) {
|
||||
// final int index = (int)Math.floor( ((double) qual) / binSize );
|
||||
// if( index >= 0 ) { // BUGBUG: why is there overflow here?
|
||||
// Pair<Long,Long> transitionPair = qualByIsTransition.get(qual);
|
||||
// transitionByQuality[index] += transitionPair.getFirst();
|
||||
// transversionByQuality[index] += transitionPair.getSecond();
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio
|
||||
// titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]);
|
||||
// } else {
|
||||
// titvByQuality[iii] = 0.0;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// class AlleleCountStats implements TableType {
|
||||
// final HashMap<Integer, ArrayList<Double>> qualityListMap = new HashMap<Integer, ArrayList<Double>>();
|
||||
// final HashMap<Integer, Double> qualityMap = new HashMap<Integer, Double>();
|
||||
//
|
||||
// public Object[] getRowKeys() {
|
||||
// final int NUM_BINS = qualityListMap.keySet().size();
|
||||
// final String rowKeys[] = new String[NUM_BINS];
|
||||
// int iii = 0;
|
||||
// for( final Integer key : qualityListMap.keySet() ) {
|
||||
// rowKeys[iii] = "AC" + key;
|
||||
// iii++;
|
||||
// }
|
||||
// return rowKeys;
|
||||
//
|
||||
// }
|
||||
//
|
||||
// public Object[] getColumnKeys() {
|
||||
// return new String[]{"alleleCount","avgQual"};
|
||||
// }
|
||||
//
|
||||
// public String getName() {
|
||||
// return "AlleleCountStats";
|
||||
// }
|
||||
//
|
||||
// public String getCell(int x, int y) {
|
||||
// int iii = 0;
|
||||
// for( final Integer key : qualityListMap.keySet() ) {
|
||||
// if(iii == x) {
|
||||
// if(y == 0) { return String.valueOf(key); }
|
||||
// else { return String.valueOf(qualityMap.get(key)); }
|
||||
// }
|
||||
// iii++;
|
||||
// }
|
||||
// return null;
|
||||
// }
|
||||
//
|
||||
// public String toString() {
|
||||
// String returnString = "";
|
||||
// // output the quality map
|
||||
// returnString += "AlleleCountStats: ";
|
||||
// //for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// // returnString += titvByQuality[iii] + " ";
|
||||
// //}
|
||||
// return returnString;
|
||||
// }
|
||||
//
|
||||
// public void incrValue( final double qual, final int alleleCount ) {
|
||||
// ArrayList<Double> list = qualityListMap.get(alleleCount);
|
||||
// if(list==null) { list = new ArrayList<Double>(); }
|
||||
// list.add(qual);
|
||||
// qualityListMap.put(alleleCount, list);
|
||||
// }
|
||||
//
|
||||
// public void organizeAlleleCountTables() {
|
||||
// for( final Integer key : qualityListMap.keySet() ) {
|
||||
// final ArrayList<Double> list = qualityListMap.get(key);
|
||||
// double meanQual = 0.0;
|
||||
// final double numQuals = (double)list.size();
|
||||
// for( Double qual : list ) {
|
||||
// meanQual += qual / numQuals;
|
||||
// }
|
||||
// qualityMap.put(key, meanQual);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public VariantQualityScore(VariantEvalWalker parent) {
|
||||
// super(parent);
|
||||
// titvStats = null;
|
||||
// }
|
||||
//
|
||||
// public String getName() {
|
||||
// return "VariantQualityScore";
|
||||
// }
|
||||
//
|
||||
// public int getComparisonOrder() {
|
||||
// return 1; // we only need to see each eval track
|
||||
// }
|
||||
//
|
||||
// public boolean enabled() {
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// public String toString() {
|
||||
// return getName();
|
||||
// }
|
||||
//
|
||||
// public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
// final String interesting = null;
|
||||
//
|
||||
// if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||
// if( titvStats == null ) { titvStats = new TiTvStats(); }
|
||||
// titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
|
||||
//
|
||||
// if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); }
|
||||
// int alternateAlleleCount = 0;
|
||||
// for (final Allele a : eval.getAlternateAlleles()) {
|
||||
// alternateAlleleCount += eval.getChromosomeCount(a);
|
||||
// }
|
||||
// alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount);
|
||||
// }
|
||||
//
|
||||
// return interesting; // This module doesn't capture any interesting sites, so return null
|
||||
// }
|
||||
//
|
||||
// public void finalizeEvaluation() {
|
||||
// if( titvStats != null ) {
|
||||
// titvStats.organizeTiTvTables();
|
||||
// }
|
||||
// if( alleleCountStats != null ) {
|
||||
// alleleCountStats.organizeAlleleCountTables();
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
public class CompRodStratifier extends VariantStratifier implements RequiredStratification {
|
||||
// Needs to know the comp rods
|
||||
private Set<String> compNames;
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
this.compNames = compNames;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
states.add("comp");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add("comp");
|
||||
|
||||
return relevantStates;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
public class CpGStatusStratifier extends VariantStratifier {
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
states = new ArrayList<String>();
|
||||
states.add("all");
|
||||
states.add("CpG");
|
||||
states.add("non_CpG");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) {
|
||||
boolean isCpG = false;
|
||||
if (ref != null && ref.getBases() != null) {
|
||||
String fwRefBases = new String(ref.getBases());
|
||||
|
||||
String leftFlank = fwRefBases.substring((fwRefBases.length()/2) - 1, (fwRefBases.length()/2) + 1);
|
||||
String rightFlank = fwRefBases.substring((fwRefBases.length()/2), (fwRefBases.length()/2) + 2);
|
||||
|
||||
if (leftFlank.equalsIgnoreCase("CG") || leftFlank.equalsIgnoreCase("GC") || rightFlank.equalsIgnoreCase("CG") || rightFlank.equalsIgnoreCase("GC")) {
|
||||
isCpG = true;
|
||||
}
|
||||
}
|
||||
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
relevantStates.add("all");
|
||||
relevantStates.add(isCpG ? "CpG" : "non_CpG");
|
||||
|
||||
return relevantStates;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
public class EvalRodStratifier extends VariantStratifier implements RequiredStratification {
|
||||
// needs to know the eval rods
|
||||
private Set<String> evalNames;
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
this.evalNames = evalNames;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
states.add("eval");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add("eval");
|
||||
|
||||
return relevantStates;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
public class FilterStatusStratifier extends VariantStratifier implements StandardStratification {
|
||||
// needs to know the variant context
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
states = new ArrayList<String>();
|
||||
states.add("called");
|
||||
states.add("filtered");
|
||||
states.add("raw");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add("raw");
|
||||
if (eval != null) {
|
||||
relevantStates.add(eval.isFiltered() ? "filtered" : "called");
|
||||
}
|
||||
|
||||
return relevantStates;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
public class FunctionalClassStratifier extends VariantStratifier {
|
||||
// needs to know the variant context
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
states = new ArrayList<String>();
|
||||
states.add("all");
|
||||
states.add("silent");
|
||||
states.add("missense");
|
||||
states.add("nonsense");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add("all");
|
||||
|
||||
if (eval != null && eval.isVariant()) {
|
||||
String type = null;
|
||||
|
||||
if (eval.getAttributeAsString("refseq.functionalClass") != null) {
|
||||
type = eval.getAttributeAsString("refseq.functionalClass");
|
||||
} else if (eval.getAttributeAsString("refseq.functionalClass_1") != null) {
|
||||
int annotationId = 1;
|
||||
String key;
|
||||
|
||||
do {
|
||||
key = String.format("refseq.functionalClass_%d", annotationId);
|
||||
|
||||
String newtype = eval.getAttributeAsString(key);
|
||||
|
||||
if ( newtype != null &&
|
||||
( type == null ||
|
||||
( type.equals("silent") && !newtype.equals("silent") ) ||
|
||||
( type.equals("missense") && newtype.equals("nonsense") ) )
|
||||
) {
|
||||
type = newtype;
|
||||
}
|
||||
|
||||
annotationId++;
|
||||
} while (eval.getAttributeAsString(key) != null);
|
||||
}
|
||||
|
||||
if (type != null) {
|
||||
if (type.equals("silent")) { relevantStates.add("silent"); }
|
||||
else if (type.equals("missense")) { relevantStates.add("missense"); }
|
||||
else if (type.equals("nonsense")) { relevantStates.add("nonsense"); }
|
||||
}
|
||||
}
|
||||
|
||||
return relevantStates;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
public class JexlExpressionStratifier extends VariantStratifier implements StandardStratification {
|
||||
// needs to know the jexl expressions
|
||||
private Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions;
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
this.jexlExpressions = jexlExpressions;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
states.add("none");
|
||||
for ( VariantContextUtils.JexlVCMatchExp jexlExpression : jexlExpressions ) {
|
||||
states.add(jexlExpression.name);
|
||||
}
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
relevantStates.add("none");
|
||||
|
||||
for ( VariantContextUtils.JexlVCMatchExp jexlExpression : jexlExpressions ) {
|
||||
System.out.println(jexlExpression.name + " " + jexlExpression.exp.getExpression());
|
||||
if (VariantContextUtils.match(eval, jexlExpression)) {
|
||||
relevantStates.add(jexlExpression.name);
|
||||
}
|
||||
}
|
||||
|
||||
return relevantStates;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
public class NoveltyStatusStratifier extends VariantStratifier implements StandardStratification {
|
||||
// needs the variant contexts and known names
|
||||
private Set<String> knownNames;
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
this.knownNames = knownNames;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
states.add("all");
|
||||
states.add("known");
|
||||
states.add("novel");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add("all");
|
||||
relevantStates.add(comp == null ? "novel" : "known");
|
||||
|
||||
return relevantStates;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications;
|
||||
|
||||
public interface RequiredStratification {}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.NewVariantEvalWalker;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
public class SampleStratifier extends VariantStratifier {
|
||||
// needs the sample names
|
||||
private ArrayList<String> samples;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
samples = new ArrayList<String>();
|
||||
samples.add(NewVariantEvalWalker.ALL_SAMPLE_NAME);
|
||||
samples.addAll(sampleNames);
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return samples;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
relevantStates.add(sampleName);
|
||||
|
||||
return relevantStates;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications;
|
||||
|
||||
public interface StandardStratification {
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
public abstract class VariantStratifier implements Comparable {
|
||||
public abstract void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames);
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return new ArrayList<String>();
|
||||
}
|
||||
|
||||
public boolean isApplicable(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext comp, VariantContext eval, String state) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, VariantContext eval, String sampleName) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public int compareTo(Object o1) {
|
||||
return this.getClass().getSimpleName().compareTo(o1.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags;
|
||||
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
public @interface Analysis {
|
||||
String name() default ""; // its description, required
|
||||
String description(); // its description, required
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags;
|
||||
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
public @interface DataPoint {
|
||||
String description() default ""; // the description, optional
|
||||
}
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags;
|
||||
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Annotation Param
|
||||
* <p/>
|
||||
* a description annotation for a parameter; a variable used as input to the
|
||||
* analysis, but not (nessasarally) an output. Some formats will store this
|
||||
* information in comments, others will not include it.
|
||||
*/
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
public @interface Param {
|
||||
String name() default ""; // the name, defaulted to the variable name
|
||||
String description(); // the description of the parameter
|
||||
}
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util;
|
||||
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Param;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class AnalysisModuleScanner
|
||||
* <p/>
|
||||
* Given an analysis, find the annotated fields and methods. Given this module and
|
||||
* the object, a Mashalling object can serialize or deserialize a analysis module.
|
||||
*/
|
||||
public class AnalysisModuleScanner {
|
||||
|
||||
// what we extracted from the class
|
||||
private Map<Field, Param> parameters = new LinkedHashMap<Field, Param>(); // the parameter annotations
|
||||
private Map<Field, DataPoint> datums = new LinkedHashMap<Field, DataPoint>(); // the data we've discovered
|
||||
private Analysis analysis; // the analysis annotation
|
||||
|
||||
// private storage of the class type
|
||||
private final Class cls;
|
||||
|
||||
/**
|
||||
* create a report scanner from the passed in class
|
||||
* @param cls the target class, annotated with the @Analysis annotation
|
||||
*/
|
||||
public AnalysisModuleScanner(Class cls) {
|
||||
this.cls = cls;
|
||||
scan(); // scan the passed in class
|
||||
}
|
||||
|
||||
/**
|
||||
* create a report scanner from the passed in class
|
||||
* @param obj the target object, annotated with the @Analysis annotation
|
||||
*/
|
||||
public AnalysisModuleScanner(Object obj) {
|
||||
this.cls = obj.getClass();
|
||||
scan(); // scan the passed in class
|
||||
}
|
||||
|
||||
/** scan the class and find all appropriate fields and tables */
|
||||
public void scan() {
|
||||
if (cls == null || !cls.isAnnotationPresent(Analysis.class))
|
||||
throw new ReviewedStingException("The class passed in cannot be null, " + "" +
|
||||
"and must contain the @Analysis annotation, class " + cls + " was the input");
|
||||
|
||||
// get the annotation off of the class
|
||||
analysis = (Analysis) cls.getAnnotation(Analysis.class);
|
||||
scanFields();
|
||||
}
|
||||
|
||||
/**
|
||||
* scan the fields of the class, extracting parameters and table annotations and their associated fields
|
||||
*/
|
||||
private void scanFields() {
|
||||
// get the fields from the class, and extract
|
||||
for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) {
|
||||
for (Field f : superCls.getDeclaredFields())
|
||||
for (Annotation annotation : f.getAnnotations()) {
|
||||
if (annotation.annotationType().equals(Param.class))
|
||||
parameters.put(f, (Param) annotation);
|
||||
if (annotation.annotationType().equals(DataPoint.class))
|
||||
datums.put(f,(DataPoint) annotation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return get the list of parameters we found
|
||||
*/
|
||||
public Map<Field, Param> getParameters() {
|
||||
return parameters;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return a map of the datum annotations found
|
||||
*/
|
||||
public Map<Field, DataPoint> getData() {
|
||||
return datums;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return the analysis annotation found
|
||||
*/
|
||||
public Analysis getAnalysis() {
|
||||
return analysis;
|
||||
}
|
||||
|
||||
public Class getModuleClass() {
|
||||
return cls;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,137 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class ComplexDataUtils
|
||||
* <p/>
|
||||
* This class contains methods and techniques for breaking down complex data in the output system
|
||||
*/
|
||||
public class ComplexDataUtils {
|
||||
public static Object resolveObject(Field f, Object e) {
|
||||
//try {
|
||||
//return f.toGenericString();
|
||||
//return f.getName(); //f.get(e);
|
||||
|
||||
|
||||
|
||||
//} catch (IllegalAccessException e1) {
|
||||
// throw new ReviewedStingException("Unable to access field " + f);
|
||||
//}
|
||||
return null;
|
||||
}
|
||||
|
||||
/*
|
||||
private static String extractPlainObjectOrPrimitive(Object obj) {
|
||||
String value = "<null>";
|
||||
if (obj instanceof Float || obj instanceof Double)
|
||||
value = String.format("%.4f",(Double)obj);
|
||||
else
|
||||
value = obj.toString();
|
||||
|
||||
return value;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* convert any string -> object pairing into a string keyed tree
|
||||
*
|
||||
* @param obj the object
|
||||
* @return a mapping of the name to the associated value tree. All non-leaf nodes will be Strings
|
||||
*/
|
||||
/*
|
||||
public static Collection<Node> resolveObjects(Object obj) { // TODO: fix this, we need a way to get the name of the list from the data point
|
||||
Collection<Node> nodes = new ArrayList<Node>();
|
||||
|
||||
// the simplest case, the object is null
|
||||
if (obj == null) nodes.add(new Node("<null>", "<null>", "<null>"));
|
||||
// capture objects of type TableTable
|
||||
else if (obj instanceof TableType)
|
||||
nodes.add(tableToNode((TableType) obj, ((TableType) obj).getName()));
|
||||
|
||||
// try to handle maps
|
||||
else if (obj instanceof Map) {
|
||||
throw new UnsupportedOperationException("The report generation system is currently unable to output Maps, due to their ambiguity");
|
||||
|
||||
// handle collections
|
||||
} else if (obj instanceof Collection)
|
||||
nodes.addAll(listToNode((Collection) obj, "collection"));
|
||||
|
||||
// arrays
|
||||
else if (obj.getClass().isArray())
|
||||
nodes.addAll(listToNode(Arrays.asList(obj), "array"));
|
||||
|
||||
// else we have a simple object (at least try to handle it that way
|
||||
else
|
||||
nodes.add(extractPlainObjectOrPrimitive(obj.getClass().getSimpleName(),obj));
|
||||
|
||||
// return the collection of nodes we've parsed out
|
||||
return nodes;
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* extract a (hopefully) primitive value
|
||||
* @param obj the object
|
||||
*/
|
||||
/*
|
||||
private static Node extractPlainObjectOrPrimitive(String name, Object obj) {
|
||||
String value = "<null>";
|
||||
if (obj instanceof Float || obj instanceof Double)
|
||||
value = String.format("%.4f",(Double)obj);
|
||||
else
|
||||
value = obj.toString();
|
||||
return new Node(name, value, "value");
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* given a TableType object, make it into a tree using maps.
|
||||
*
|
||||
* @param table the table type to convert into a map
|
||||
* @return a node representing this table
|
||||
*/
|
||||
/*
|
||||
private static Node tableToNode(TableType table, String name) {
|
||||
Node root = new Node("table", name, "Table");
|
||||
root.setTable();
|
||||
Object[] rows = table.getRowKeys();
|
||||
Object[] cols = table.getColumnKeys();
|
||||
|
||||
// add the columns names
|
||||
for (int x = 0; x < table.getRowKeys().length; x++) {
|
||||
Node row = new Node("row", rows[x].toString(), "a row in a table");
|
||||
root.addChild(row);
|
||||
for (int y = 0; y < table.getColumnKeys().length; y++) {
|
||||
Node col = new Node("column", cols[y].toString(), "columns in a table");
|
||||
row.addChild(col);
|
||||
col.addChild(extractPlainObjectOrPrimitive("cell(" + x + "," + y + ")", table.getCell(x, y)));
|
||||
}
|
||||
}
|
||||
return root;
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* given a Collection object, make it into a tree using maps.
|
||||
*
|
||||
* @param coll the collection to iterate, and turn into a list
|
||||
* @return a mapping of String to Object
|
||||
*/
|
||||
/*
|
||||
private static Collection<Node> listToNode(Collection coll, String name) {
|
||||
Collection<Node> nodes = new ArrayList<Node>();
|
||||
Iterator<Object> iter = coll.iterator();
|
||||
for (int x = 0; x < coll.size(); x++) {
|
||||
Node value = new Node("column " + x, String.valueOf(x), "column");
|
||||
value.addChild(new Node("value " + x, iter.next().toString(), "value"));
|
||||
nodes.add(value);
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
//package org.broadinstitute.sting.playground.gatk.walkers.varianteval.util;
|
||||
//
|
||||
//import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
//import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.VariantEvalWalker;
|
||||
//import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.VariantEvaluator;
|
||||
//import org.broadinstitute.sting.utils.Utils;
|
||||
//import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
//
|
||||
//import java.lang.reflect.Constructor;
|
||||
//import java.util.Arrays;
|
||||
//import java.util.HashSet;
|
||||
//import java.util.Set;
|
||||
//
|
||||
///**
|
||||
// * Created by IntelliJ IDEA.
|
||||
// * User: kiran
|
||||
// * Date: Dec 14, 2010
|
||||
// * Time: 11:15:32 PM
|
||||
// * To change this template use File | Settings | File Templates.
|
||||
// */
|
||||
//public class EvaluationContext implements Comparable<EvaluationContext> {
|
||||
// // useful for typing
|
||||
// public String evalTrackName, compTrackName, novelty, filtered, cpgStatus, functionalClass, sample;
|
||||
// public boolean enableInterestingSiteCaptures = false;
|
||||
// public VariantContextUtils.JexlVCMatchExp selectExp;
|
||||
// public Set<VariantEvaluator> evaluations;
|
||||
//
|
||||
// private Set<Class<? extends VariantEvaluator>> evaluationClasses;
|
||||
//
|
||||
// private static String RAW_SET_NAME = "raw";
|
||||
// private static String RETAINED_SET_NAME = "called";
|
||||
// private static String FILTERED_SET_NAME = "filtered";
|
||||
// private static String ALL_SET_NAME = "all";
|
||||
// private static String KNOWN_SET_NAME = "known";
|
||||
// private static String NOVEL_SET_NAME = "novel";
|
||||
// private final static String CONTEXT_SEPARATOR = "XXX";
|
||||
//
|
||||
// public boolean isIgnoringFilters() { return filtered.equals(RAW_SET_NAME); }
|
||||
// public boolean requiresFiltered() { return filtered.equals(FILTERED_SET_NAME); }
|
||||
// public boolean requiresNotFiltered() { return filtered.equals(RETAINED_SET_NAME); }
|
||||
// public boolean isIgnoringNovelty() { return novelty.equals(ALL_SET_NAME); }
|
||||
// public boolean requiresNovel() { return novelty.equals(NOVEL_SET_NAME); }
|
||||
// public boolean requiresKnown() { return novelty.equals(KNOWN_SET_NAME); }
|
||||
//
|
||||
// public boolean isSelected() { return selectExp == null; }
|
||||
//
|
||||
// public String getDisplayName() {
|
||||
// return getName(CONTEXT_SEPARATOR);
|
||||
// }
|
||||
//
|
||||
// public String getJexlName() {
|
||||
// return getName(".");
|
||||
// }
|
||||
//
|
||||
// private String getName(String separator) {
|
||||
// return Utils.join(separator, Arrays.asList(evalTrackName, compTrackName, selectExp == null ? "all" : selectExp.name, filtered, novelty, cpgStatus, functionalClass, sample));
|
||||
// }
|
||||
//
|
||||
// public String toString() { return getDisplayName(); }
|
||||
//
|
||||
// public int compareTo(EvaluationContext other) {
|
||||
// return this.getDisplayName().compareTo(other.getDisplayName());
|
||||
// }
|
||||
//
|
||||
// public EvaluationContext( String evalName, String compName, String novelty, String filtered, String cpgStatus, String functionalClass, String sample, VariantContextUtils.JexlVCMatchExp selectExp, Set<Class<? extends VariantEvaluator>> evaluationClasses ) {
|
||||
// this.evalTrackName = evalName;
|
||||
// this.compTrackName = compName;
|
||||
// this.novelty = novelty;
|
||||
// this.filtered = filtered;
|
||||
// this.selectExp = selectExp;
|
||||
// this.cpgStatus = cpgStatus;
|
||||
// this.functionalClass = functionalClass;
|
||||
// this.sample = sample;
|
||||
// this.enableInterestingSiteCaptures = selectExp == null;
|
||||
// this.evaluationClasses = evaluationClasses;
|
||||
// this.evaluations = instantiateEvalationsSet();
|
||||
// }
|
||||
//
|
||||
// private Set<VariantEvaluator> instantiateEvalationsSet() {
|
||||
// Set<VariantEvaluator> evals = new HashSet<VariantEvaluator>();
|
||||
// Object[] args = new Object[]{this};
|
||||
// Class<?>[] argTypes = new Class<?>[]{VariantEvalWalker.class};
|
||||
//
|
||||
// for ( Class<? extends VariantEvaluator> c : evaluationClasses ) {
|
||||
// try {
|
||||
// Constructor<? extends VariantEvaluator> constructor = c.getConstructor(argTypes);
|
||||
// VariantEvaluator eval = constructor.newInstance(args);
|
||||
// evals.add(eval);
|
||||
// } catch (Exception e) {
|
||||
// throw new DynamicClassResolutionException(c, e);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return evals;
|
||||
// }
|
||||
//}
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.VariantEvaluator;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.VariantStratifier;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
|
||||
private TreeMap<String, VariantEvaluator> evaluationInstances;
|
||||
|
||||
public String toString() {
|
||||
String value = "";
|
||||
|
||||
for ( VariantStratifier key : this.keySet() ) {
|
||||
value += "\t" + key.getClass().getSimpleName() + ":" + this.get(key) + "\n";
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
public void addEvaluationClassList(Set<Class<? extends VariantEvaluator>> evaluationClasses) {
|
||||
evaluationInstances = new TreeMap<String, VariantEvaluator>();
|
||||
|
||||
for ( Class<? extends VariantEvaluator> c : evaluationClasses ) {
|
||||
try {
|
||||
VariantEvaluator eval = c.newInstance();
|
||||
evaluationInstances.put(c.getSimpleName(), eval);
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'");
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new StingException("Illegal access error when trying to instantiate eval module '" + c.getSimpleName() + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public TreeMap<String, VariantEvaluator> getEvaluationClassList() {
|
||||
return evaluationInstances;
|
||||
}
|
||||
|
||||
public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
|
||||
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
|
||||
if ( evaluation.enabled() ) {
|
||||
// we always call update0 in case the evaluation tracks things like number of bases covered
|
||||
evaluation.update0(tracker, ref, context);
|
||||
|
||||
// the other updateN methods don't see a null context
|
||||
if ( tracker == null )
|
||||
continue;
|
||||
|
||||
// now call the single or paired update function
|
||||
switch ( evaluation.getComparisonOrder() ) {
|
||||
case 1:
|
||||
if (eval != null) {
|
||||
evaluation.update1(eval, tracker, ref, context);
|
||||
}
|
||||
|
||||
break;
|
||||
case 2:
|
||||
if (eval != null && comp != null) {
|
||||
evaluation.update2(eval, comp, tracker, ref, context);
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util;
|
||||
|
||||
import java.util.TreeMap;
|
||||
|
||||
public class StateKey extends TreeMap<String, String> {
|
||||
public int hashCode() {
|
||||
int hashCode = 1;
|
||||
|
||||
for (String key : this.keySet()) {
|
||||
String value = this.get(key);
|
||||
|
||||
hashCode *= key.hashCode() + value.hashCode();
|
||||
}
|
||||
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
String value = "";
|
||||
|
||||
for ( String key : this.keySet() ) {
|
||||
value += "\tstate " + key + ":" + this.get(key) + "\n";
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue