V2 of VariantEval2. Framework is essentially complete., very simple and clear now compared to VE1. Support for any number of JEXL expressions. dbSNP% evaluation added to show paired comparison evaluation. Pretty printing output tables. Performance is poor but can easily be fixed (see todo notes).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2764 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ed23e54abd
commit
681c196097
|
|
@ -207,7 +207,19 @@ public class Allele {
|
|||
* @return true if these alleles are equal
|
||||
*/
|
||||
public boolean equals(Allele other) {
|
||||
return isRef == other.isRef && isNull == other.isNull && isNoCall == other.isNoCall && this.basesMatch(other.getBases());
|
||||
return equals(other, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this and other are equal. If ignoreRefState is true, then doesn't require both alleles has the
|
||||
* same ref tag
|
||||
*
|
||||
* @param other
|
||||
* @param ignoreRefState
|
||||
* @return
|
||||
*/
|
||||
public boolean equals(Allele other, boolean ignoreRefState) {
|
||||
return (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && this.basesMatch(other.getBases());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -44,6 +46,19 @@ public class Genotype extends AttributedObject {
|
|||
return al;
|
||||
}
|
||||
|
||||
private final static String ALLELE_SEPARATOR = "/";
|
||||
public String getGenotypeString() {
|
||||
return Utils.join(ALLELE_SEPARATOR, getAllelesString());
|
||||
}
|
||||
|
||||
private List<String> getAllelesString() {
|
||||
List<String> al = new ArrayList<String>();
|
||||
for ( Allele a : alleles )
|
||||
al.add(new String(a.getBases()));
|
||||
|
||||
return al;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the ploidy of this genotype
|
||||
|
|
|
|||
|
|
@ -500,14 +500,19 @@ public class VariantContext extends AttributedObject {
|
|||
* @return True if this context contains Allele allele, or false otherwise
|
||||
*/
|
||||
public boolean hasAllele(Allele allele) {
|
||||
return hasAllele(allele, false);
|
||||
}
|
||||
|
||||
public boolean hasAllele(Allele allele, boolean ignoreRefState) {
|
||||
for ( Allele a : getAlleles() ) {
|
||||
if ( a.equals(allele) )
|
||||
if ( a.equals(allele, ignoreRefState) )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the alleles. This method should return all of the alleles present at the location,
|
||||
* including the reference allele. There are no constraints imposed on the ordering of alleles
|
||||
|
|
|
|||
|
|
@ -4,22 +4,35 @@ import java.util.*;
|
|||
import org.apache.commons.jexl.*;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2.VariantEvaluator;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
|
||||
public class VariantContextUtils {
|
||||
/** */
|
||||
public static class MatchExp {
|
||||
String name;
|
||||
String expStr;
|
||||
Expression exp;
|
||||
public String name;
|
||||
public String expStr;
|
||||
public Expression exp;
|
||||
|
||||
public MatchExp(String name, String str, Expression exp) {
|
||||
this.name = name;
|
||||
this.expStr = str;
|
||||
this.exp = exp;
|
||||
}
|
||||
}
|
||||
|
||||
public static List<MatchExp> initializeMatchExps(String[] names, String[] exps) {
|
||||
if ( names == null || exps == null )
|
||||
throw new StingException("BUG: neither names nor exps can be null: names " + names + " exps=" + exps );
|
||||
|
||||
if ( names.length != exps.length )
|
||||
throw new StingException("Inconsistent number of provided filter names and expressions: names=" + names + " exps=" + exps);
|
||||
|
||||
Map<String, String> map = new HashMap<String, String>();
|
||||
for ( int i = 0; i < names.length; i++ ) { map.put(names[i], exps[i]); }
|
||||
|
||||
return VariantContextUtils.initializeMatchExps(map);
|
||||
}
|
||||
|
||||
public static List<MatchExp> initializeMatchExps(Map<String, String> names_and_exps) {
|
||||
List<MatchExp> exps = new ArrayList<MatchExp>();
|
||||
|
||||
|
|
@ -39,9 +52,65 @@ public class VariantContextUtils {
|
|||
return exps;
|
||||
}
|
||||
|
||||
// todo -- add generalize matching routine here
|
||||
// todo -- should file in all fields (loc, filter, etc) for selection
|
||||
// todo -- genotypes should be sampleNAME.field -> value bindings
|
||||
public static boolean match(VariantContext vc, MatchExp exp) {
|
||||
return match(vc,Arrays.asList(exp)).get(exp);
|
||||
}
|
||||
|
||||
public static Map<MatchExp, Boolean> match(VariantContext vc, Collection<MatchExp> exps) {
|
||||
// todo actually, we should implement a JEXL context interface to VariantContext,
|
||||
// which just looks up the values assigned statically here. Much better approach
|
||||
|
||||
Map<String, String> infoMap = new HashMap<String, String>();
|
||||
|
||||
infoMap.put("CHROM", vc.getLocation().getContig());
|
||||
infoMap.put("POS", String.valueOf(vc.getLocation().getStart()));
|
||||
infoMap.put("TYPE", vc.getType().toString());
|
||||
infoMap.put("QUAL", String.valueOf(10 * vc.getNegLog10PError()));
|
||||
|
||||
// add alleles
|
||||
infoMap.put("ALLELES", Utils.join(";", vc.getAlleles()));
|
||||
infoMap.put("N_ALLELES", String.valueOf(vc.getNAlleles()));
|
||||
|
||||
// add attributes
|
||||
addAttributesToMap(infoMap, vc.getAttributes(), "");
|
||||
|
||||
// add filter fields
|
||||
infoMap.put("FILTER", String.valueOf(vc.isFiltered() ? "1" : "0"));
|
||||
for ( Object filterCode : vc.getFilters() ) {
|
||||
infoMap.put(String.valueOf(filterCode), "1");
|
||||
}
|
||||
|
||||
// add genotypes
|
||||
// todo -- comment this back in when we figure out how to represent it nicely
|
||||
// for ( Genotype g : vc.getGenotypes().values() ) {
|
||||
// String prefix = g.getSampleName() + ".";
|
||||
// addAttributesToMap(infoMap, g.getAttributes(), prefix);
|
||||
// infoMap.put(prefix + "GT", g.getGenotypeString());
|
||||
// }
|
||||
|
||||
JexlContext jContext = JexlHelper.createContext();
|
||||
//System.out.printf(infoMap.toString());
|
||||
jContext.setVars(infoMap);
|
||||
|
||||
try {
|
||||
Map<MatchExp, Boolean> resultMap = new HashMap<MatchExp, Boolean>();
|
||||
for ( MatchExp e : exps ) {
|
||||
resultMap.put(e, (Boolean)e.exp.evaluate(jContext));
|
||||
}
|
||||
return resultMap;
|
||||
} catch (Exception e) {
|
||||
throw new StingException(e.getMessage());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static void addAttributesToMap(Map<String, String> infoMap, Map<Object, Object> attributes, String prefix ) {
|
||||
for (Map.Entry<Object, Object> e : attributes.entrySet()) {
|
||||
infoMap.put(prefix + String.valueOf(e.getKey()), String.valueOf(e.getValue()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static final String UNIQUIFIED_SUFFIX = ".unique";
|
||||
|
||||
|
|
|
|||
|
|
@ -11,20 +11,20 @@ import java.util.List;
|
|||
import java.util.Arrays;
|
||||
|
||||
public class CountVariants extends VariantEvaluator {
|
||||
int nProcessedLoci = 0;
|
||||
int nCalledLoci = 0;
|
||||
int nVariantLoci = 0;
|
||||
int nRefLoci = 0;
|
||||
long nProcessedLoci = 0;
|
||||
long nCalledLoci = 0;
|
||||
long nVariantLoci = 0;
|
||||
long nRefLoci = 0;
|
||||
|
||||
int nSNPs = 0;
|
||||
int nInsertions = 0;
|
||||
int nDeletions = 0;
|
||||
int nComplex = 0;
|
||||
long nSNPs = 0;
|
||||
long nInsertions = 0;
|
||||
long nDeletions = 0;
|
||||
long nComplex = 0;
|
||||
|
||||
int nNoCalls = 0;
|
||||
int nHets = 0;
|
||||
int nHomRef = 0;
|
||||
int nHomVar = 0;
|
||||
long nNoCalls = 0;
|
||||
long nHets = 0;
|
||||
long nHomRef = 0;
|
||||
long nHomVar = 0;
|
||||
|
||||
private double rate(long n) {
|
||||
return n / (1.0 * Math.max(nProcessedLoci, 1));
|
||||
|
|
@ -39,7 +39,7 @@ public class CountVariants extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public String getName() {
|
||||
return "Counter";
|
||||
return "counter";
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
|
|
@ -76,7 +76,7 @@ public class CountVariants extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
return "Counter: " + summaryLine();
|
||||
return getName() + ": " + summaryLine();
|
||||
}
|
||||
|
||||
private String summaryLine() {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,129 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.varianteval.BasicVariantAnalysis;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.varianteval.GenotypeAnalysis;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.varianteval.PopulationAnalysis;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.Allele;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public class DbSNPPercentage extends VariantEvaluator {
|
||||
private long nDBSNPs = 0;
|
||||
private long nEvalSNPs = 0;
|
||||
private long nSNPsAtdbSNPs = 0;
|
||||
private long nConcordant = 0;
|
||||
|
||||
public String getName() {
|
||||
return "dbsnp_percentage";
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 2; // we need to see each eval track and each comp track
|
||||
}
|
||||
|
||||
public long nDBSNPs() { return nDBSNPs; }
|
||||
public long nEvalSNPs() { return nEvalSNPs; }
|
||||
public long nSNPsAtdbSNPs() { return nSNPsAtdbSNPs; }
|
||||
public long nConcordant() { return nConcordant; }
|
||||
public long nNovelSites() { return Math.abs(nEvalSNPs() - nSNPsAtdbSNPs()); }
|
||||
|
||||
public String toString() {
|
||||
return getName() + ": " + summaryLine();
|
||||
}
|
||||
|
||||
private String summaryLine() {
|
||||
return String.format("%d %d %d %d %d %.2f %.2f",
|
||||
nDBSNPs(), nEvalSNPs(), nSNPsAtdbSNPs(), nConcordant(), nNovelSites(), 100 * dbSNPRate(), 100 * concordanceRate());
|
||||
}
|
||||
|
||||
private static List<String> HEADER =
|
||||
Arrays.asList("n_dbsnps", "n_eval_snps", "n_overlapping_snps", "n_concordant", "n_novel_snps", "dbsnp_rate", "concordance_rate");
|
||||
|
||||
// making it a table
|
||||
public List<String> getTableHeader() {
|
||||
return HEADER;
|
||||
}
|
||||
|
||||
public List<List<String>> getTableRows() {
|
||||
return Arrays.asList(Arrays.asList(summaryLine().split(" ")));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if every allele in eval is also in dbsnp
|
||||
*
|
||||
* @param eval
|
||||
* @param dbsnp
|
||||
* @return
|
||||
*/
|
||||
public boolean discordantP(VariantContext eval, VariantContext dbsnp ) {
|
||||
for ( Allele a : eval.getAlleles() ) {
|
||||
if ( ! dbsnp.hasAllele(a, true) )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* What fraction of the evaluated site variants were also found in the db?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double dbSNPRate() {
|
||||
return nSNPsAtdbSNPs() / (1.0 * Math.max(nEvalSNPs(), 1));
|
||||
}
|
||||
|
||||
public double concordanceRate() {
|
||||
return nConcordant() / (1.0 * Math.max(nSNPsAtdbSNPs(), 1));
|
||||
}
|
||||
|
||||
// public static Variation getFirstRealSNP(RODRecordList<ReferenceOrderedDatum> dbsnpList) {
|
||||
// if (dbsnpList == null)
|
||||
// return null;
|
||||
//
|
||||
// Variation dbsnp = null;
|
||||
// for (ReferenceOrderedDatum d : dbsnpList) {
|
||||
// if (((Variation) d).isSNP() && (! (d instanceof RodVCF) || ! ((RodVCF)d).isFiltered())) {
|
||||
// dbsnp = (Variation)d;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return dbsnp;
|
||||
// }
|
||||
|
||||
public void update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered();
|
||||
boolean evalIsGood = eval != null && eval.isSNP();
|
||||
|
||||
if ( dbSNPIsGood ) nDBSNPs++; // count the number of dbSNP events
|
||||
if ( evalIsGood ) nEvalSNPs++; // count the number of dbSNP events
|
||||
|
||||
if ( dbSNPIsGood && evalIsGood ) {
|
||||
nSNPsAtdbSNPs++;
|
||||
|
||||
if ( ! discordantP(eval, dbsnp) ) // count whether we're concordant or not with the dbSNP value
|
||||
nConcordant++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
|
||||
|
|
@ -18,76 +19,136 @@ import java.util.*;
|
|||
public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
||||
// todo -- add doc string
|
||||
@Argument(shortName="select", doc="", required=false)
|
||||
protected String[] SELECT_STRINGS = {};
|
||||
protected String[] SELECT_EXPS = {"QUAL > 500.0", "HARD_TO_VALIDATE==1", "GATK_STANDARD==1"};
|
||||
|
||||
// todo -- add doc string
|
||||
@Argument(shortName="selectName", doc="", required=false)
|
||||
protected String[] SELECT_NAMES = {};
|
||||
protected String[] SELECT_NAMES = {"q500plus", "low_mapq", "gatk_std_filters"};
|
||||
|
||||
@Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)
|
||||
protected String[] KNOWN_NAMES = {"dbsnp"};
|
||||
|
||||
|
||||
private class EvaluationGroup extends HashMap<String, Set<VariantEvaluator>> {
|
||||
/** private class holding all of the information about a single evaluation group (e.g., for eval ROD) */
|
||||
private class EvaluationContext extends HashMap<String, Set<VariantEvaluator>> {
|
||||
// useful for typing
|
||||
public String trackName, contextName;
|
||||
VariantContextUtils.MatchExp selectExp;
|
||||
|
||||
public EvaluationContext(String trackName, String contextName, VariantContextUtils.MatchExp selectExp) {
|
||||
this.trackName = trackName;
|
||||
this.contextName = contextName;
|
||||
this.selectExp = selectExp;
|
||||
}
|
||||
}
|
||||
|
||||
// todo -- generalize to multiple contexts, one for each eval
|
||||
private HashMap<String, EvaluationGroup> contexts = new HashMap<String, EvaluationGroup>();
|
||||
private List<VariantContextUtils.MatchExp> selectExps = null;
|
||||
private HashMap<String, EvaluationContext> contexts = new HashMap<String, EvaluationContext>();
|
||||
private Set<String> compNames = new HashSet<String>();
|
||||
|
||||
private static String RAW_SET_NAME = "raw";
|
||||
private static String RETAINED_SET_NAME = "called";
|
||||
private static String FILTERED_SET_NAME = "filtered";
|
||||
private static String ALL_SET_NAME = "all";
|
||||
private static String KNOWN_SET_NAME = "known";
|
||||
private static String NOVEL_SET_NAME = "novel";
|
||||
|
||||
// Dynamically determined variantEvaluation classes
|
||||
private List<String> variantEvaluationNames = new ArrayList<String>();
|
||||
private List<Class<? extends VariantEvaluator>> evaluationClasses = null;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// initialize
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
public void initialize() {
|
||||
if ( SELECT_NAMES.length != SELECT_STRINGS.length )
|
||||
throw new StingException("Inconsistent number of provided filter names and expressions.");
|
||||
Map<String, String> map = new HashMap<String, String>();
|
||||
for ( int i = 0; i < SELECT_NAMES.length; i++ ) { map.put(SELECT_NAMES[i], SELECT_STRINGS[i]); }
|
||||
determineAllEvalations();
|
||||
List<VariantContextUtils.MatchExp> selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS);
|
||||
|
||||
selectExps = VariantContextUtils.initializeMatchExps(map);
|
||||
|
||||
// setup contexts
|
||||
// todo -- add selects
|
||||
contexts.put("eval", createEvaluationGroup());
|
||||
contexts.put("eval.filtered", createEvaluationGroup());
|
||||
for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
|
||||
if ( d.getName().startsWith("eval") ) {
|
||||
for ( VariantContextUtils.MatchExp e : selectExps ) {
|
||||
addNewContext(d.getName(), d.getName() + "." + e.name, e);
|
||||
}
|
||||
addNewContext(d.getName(), d.getName(), null);
|
||||
} else if ( d.getName().startsWith("dbsnp") || d.getName().startsWith("hapmap") || d.getName().startsWith("comp") ) {
|
||||
compNames.add(d.getName());
|
||||
} else {
|
||||
logger.info("Not evaluating ROD binding " + d.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private EvaluationGroup createEvaluationGroup() {
|
||||
EvaluationGroup group = new EvaluationGroup();
|
||||
|
||||
for ( String name : Arrays.asList("all", "known", "novel") ) {
|
||||
group.put(name, new HashSet<VariantEvaluator>(Arrays.asList(new CountVariants())));
|
||||
private void determineAllEvalations() {
|
||||
evaluationClasses = PackageUtils.getClassesImplementingInterface(VariantEvaluator.class);
|
||||
for ( VariantEvaluator e : instantiateEvalationsSet() ) {
|
||||
// for collecting purposes
|
||||
variantEvaluationNames.add(e.getName());
|
||||
logger.debug("Including VariantEvaluator " + e.getName() + " of class " + e.getClass());
|
||||
}
|
||||
|
||||
return group;
|
||||
Collections.sort(variantEvaluationNames);
|
||||
}
|
||||
|
||||
private Set<VariantEvaluator> instantiateEvalationsSet() {
|
||||
Set<VariantEvaluator> evals = new HashSet<VariantEvaluator>();
|
||||
for ( Class c : evaluationClasses ) {
|
||||
try {
|
||||
evals.add((VariantEvaluator) c.newInstance());
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException(String.format("Cannot instantiate annotation class '%s': must be concrete class", c.getSimpleName()));
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new StingException(String.format("Cannot instantiate annotation class '%s': must have no-arg constructor", c.getSimpleName()));
|
||||
}
|
||||
}
|
||||
|
||||
return evals;
|
||||
}
|
||||
|
||||
private void addNewContext(String trackName, String contextName, VariantContextUtils.MatchExp selectExp) {
|
||||
EvaluationContext group = new EvaluationContext(trackName, contextName, selectExp);
|
||||
|
||||
for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) {
|
||||
for ( String subname : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) {
|
||||
group.put(subname + "." + filteredName, instantiateEvalationsSet());
|
||||
}
|
||||
}
|
||||
|
||||
contexts.put(contextName, group);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// map
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
//System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases());
|
||||
|
||||
Map<String, VariantContext> allNamedVCs = getVariantContexts(context, tracker);
|
||||
Map<String, VariantContext> evalNamedVCs = getEvalVCs(allNamedVCs);
|
||||
Map<String, VariantContext> comps = getComparisonVCs(allNamedVCs);
|
||||
Map<String, VariantContext> comps = getCompVariantContexts(tracker, context);
|
||||
|
||||
for ( VariantEvaluator eval : getAllEvaluations() ) {
|
||||
eval.update0(tracker, ref, context);
|
||||
}
|
||||
// to enable walking over pairs where eval or comps have no elements
|
||||
for ( EvaluationContext group : contexts.values() ) {
|
||||
VariantContext vc = getVariantContext(group.trackName, tracker, context);
|
||||
|
||||
if ( evalNamedVCs.size() > 1 ) throw new StingException("VariantEval doesn't yet support for multiple independent eval tracks");
|
||||
for ( Map.Entry<String, VariantContext> evalNameVC: evalNamedVCs.entrySet() ) {
|
||||
String name = evalNameVC.getKey();
|
||||
VariantContext vc = evalNameVC.getValue();
|
||||
boolean isKnown = vcIsKnown(vc, allNamedVCs);
|
||||
//logger.debug(String.format("Updating %s of %s with variant", group.name, vc));
|
||||
for ( Map.Entry<String, Set<VariantEvaluator>> namedEvaluations : group.entrySet() ) {
|
||||
String evaluationName = namedEvaluations.getKey();
|
||||
Set<VariantEvaluator> evaluations = namedEvaluations.getValue();
|
||||
boolean evalWantsVC = applyVCtoEvaluation(evaluationName, vc, comps, group);
|
||||
|
||||
if ( vc.isFiltered() ) name = name + ".filtered";
|
||||
EvaluationGroup group = contexts.get(name);
|
||||
|
||||
for ( Set<VariantEvaluator> evaluations : Arrays.asList(group.get("all"), group.get(isKnown ? "known" : "novel")) ) {
|
||||
for ( VariantEvaluator evaluation : evaluations ) {
|
||||
// we always call update0 in case the evaluation tracks things like number of bases covered
|
||||
evaluation.update0(tracker, ref, context);
|
||||
|
||||
// now call the single or paired update function
|
||||
switch ( evaluation.getComparisonOrder() ) {
|
||||
case 1:
|
||||
evaluation.update1(vc, tracker, ref, context);
|
||||
if ( vc != null ) evaluation.update1(vc, tracker, ref, context);
|
||||
break;
|
||||
case 2:
|
||||
for ( VariantContext comp : comps.values() ) {
|
||||
evaluation.update2(vc, comp, tracker, ref, context);
|
||||
evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
|
@ -100,93 +161,79 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
return 0;
|
||||
}
|
||||
|
||||
private boolean vcIsKnown(VariantContext vc, Map<String, VariantContext> vcs) {
|
||||
for ( VariantContext known : getKnownVCs(vcs).values() ) {
|
||||
if ( known.isNotFiltered() && known.getType() == vc.getType() )
|
||||
private boolean applyVCtoEvaluation(String evaluationName, VariantContext vc, Map<String, VariantContext> comps, EvaluationContext group) {
|
||||
if ( vc == null )
|
||||
return true;
|
||||
|
||||
if ( evaluationName.contains(FILTERED_SET_NAME) && vc.isNotFiltered() )
|
||||
return false;
|
||||
|
||||
if ( evaluationName.contains(RETAINED_SET_NAME) && vc.isFiltered() )
|
||||
return false;
|
||||
|
||||
boolean vcKnown = vcIsKnown(vc, comps, KNOWN_NAMES);
|
||||
if ( evaluationName.contains(KNOWN_SET_NAME) && ! vcKnown )
|
||||
return false;
|
||||
else if ( evaluationName.contains(NOVEL_SET_NAME) && vcKnown )
|
||||
return false;
|
||||
|
||||
if ( group.selectExp != null && ! VariantContextUtils.match(vc, group.selectExp) )
|
||||
return false;
|
||||
|
||||
// nothing invalidated our membership in this set
|
||||
return true;
|
||||
}
|
||||
|
||||
private Map<String, VariantContext> getCompVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) {
|
||||
Map<String, VariantContext> comps = new HashMap<String, VariantContext>();
|
||||
|
||||
for ( String compName : compNames ) {
|
||||
comps.put(compName, getVariantContext(compName, tracker, context));
|
||||
}
|
||||
|
||||
return comps;
|
||||
}
|
||||
|
||||
private boolean vcIsKnown(VariantContext vc, Map<String, VariantContext> comps, String[] knownNames ) {
|
||||
for ( String knownName : knownNames ) {
|
||||
VariantContext known = comps.get(knownName);
|
||||
if ( known != null && known.isNotFiltered() && known.getType() == vc.getType() )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// can't handle this situation
|
||||
// todo -- warning, this leads to some missing SNPs at complex loci, such as:
|
||||
// todo -- 591 1 841619 841620 rs4970464 0 - A A -/C/T genomic mixed unknown 0 0 near-gene-3 exact 1
|
||||
// todo -- 591 1 841619 841620 rs62677860 0 + A A C/T genomic single unknown 0 0 near-gene-3 exact 1
|
||||
//
|
||||
//logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec));
|
||||
|
||||
private Map<String, VariantContext> getEvalVCs(Map<String, VariantContext> vcs) {
|
||||
return getVCsStartingWith(vcs, false);
|
||||
}
|
||||
|
||||
private Map<String, VariantContext> getComparisonVCs(Map<String, VariantContext> vcs) {
|
||||
return getVCsStartingWith(vcs, true);
|
||||
}
|
||||
|
||||
private Map<String, VariantContext> getVCsStartingWith(Map<String, VariantContext> vcs, boolean notStartsWith) {
|
||||
Map<String, VariantContext> map = new HashMap<String, VariantContext>();
|
||||
|
||||
for ( Map.Entry<String, VariantContext> elt : vcs.entrySet() ) {
|
||||
boolean startP = elt.getKey().startsWith("eval");
|
||||
|
||||
if ( (startP && ! notStartsWith) || (!startP && notStartsWith) ) {
|
||||
map.put(elt.getKey(), elt.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
private Map<String, VariantContext> getKnownVCs(Map<String, VariantContext> vcs) {
|
||||
Map<String, VariantContext> map = new HashMap<String, VariantContext>();
|
||||
|
||||
for ( Map.Entry<String, VariantContext> elt : vcs.entrySet() ) {
|
||||
for ( String known1 : KNOWN_NAMES ) {
|
||||
if ( elt.getKey().equals(known1) ) {
|
||||
map.put(elt.getKey(), elt.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
private List<String> getAllEvaluationNames() {
|
||||
List<String> names = new ArrayList<String>();
|
||||
if ( contexts.size() == 0 ) throw new IllegalStateException("Contexts shouldn't be sized 0 when calling getAllEvaluationNames()");
|
||||
|
||||
for ( VariantEvaluator eval : contexts.values().iterator().next().get("all") ) {
|
||||
names.add(eval.getName());
|
||||
}
|
||||
|
||||
return names;
|
||||
}
|
||||
|
||||
|
||||
private Map<String, VariantContext> getVariantContexts(AlignmentContext context, RefMetaDataTracker tracker) {
|
||||
Map<String, VariantContext> map = new HashMap<String, VariantContext>();
|
||||
|
||||
private VariantContext getVariantContext(String name, RefMetaDataTracker tracker, AlignmentContext context) {
|
||||
if ( tracker != null ) {
|
||||
for ( RODRecordList<ReferenceOrderedDatum> rodList : tracker.getBoundRodTracks() ) {
|
||||
boolean alreadyGrabbedOne = false;
|
||||
|
||||
RODRecordList<ReferenceOrderedDatum> rodList = tracker.getTrackData(name, null);
|
||||
if ( rodList != null ) {
|
||||
for ( ReferenceOrderedDatum rec : rodList.getRecords() ) {
|
||||
if ( rec.getLocation().getStart() == context.getLocation().getStart() ) {
|
||||
// ignore things that span this location but started earlier
|
||||
if ( alreadyGrabbedOne ) {
|
||||
// can't handle this situation
|
||||
;
|
||||
//logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec));
|
||||
} else {
|
||||
VariantContext vc = VariantContextAdaptors.convertToVariantContext(rec);
|
||||
if ( vc != null ) {
|
||||
alreadyGrabbedOne = true;
|
||||
map.put(rec.getName(), vc);
|
||||
}
|
||||
VariantContext vc = VariantContextAdaptors.convertToVariantContext(rec);
|
||||
if ( vc != null ) {
|
||||
return vc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
return null;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// reduce
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -202,38 +249,31 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
return null;
|
||||
}
|
||||
|
||||
|
||||
public List<VariantEvaluator> getAllEvaluations() {
|
||||
List<VariantEvaluator> l = new ArrayList<VariantEvaluator>();
|
||||
|
||||
for ( EvaluationGroup group : contexts.values() ) {
|
||||
for ( Set<VariantEvaluator> evals : group.values() ) {
|
||||
l.addAll(evals);
|
||||
}
|
||||
}
|
||||
|
||||
public <T extends Comparable<T>> List<T> sorted(Collection<T> c ) {
|
||||
List<T> l = new ArrayList<T>(c);
|
||||
Collections.sort(l);
|
||||
return l;
|
||||
}
|
||||
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
for ( String evalName : getAllEvaluationNames() ) {
|
||||
for ( String evalName : variantEvaluationNames ) {
|
||||
boolean first = true;
|
||||
for ( Map.Entry<String, EvaluationGroup> elt : contexts.entrySet() ) {
|
||||
String contextName = elt.getKey();
|
||||
EvaluationGroup group = elt.getValue();
|
||||
out.printf("%n%n");
|
||||
for ( String contextName : sorted(contexts.keySet()) ) {
|
||||
EvaluationContext group = contexts.get(contextName);
|
||||
|
||||
for ( Map.Entry<String, Set<VariantEvaluator>> namedEvalGroup : group.entrySet() ) {
|
||||
String evalSubgroupName = namedEvalGroup.getKey();
|
||||
VariantEvaluator eval = getEvalByName(evalName, namedEvalGroup.getValue());
|
||||
out.printf("%s%n", Utils.dupString('-', 80));
|
||||
for ( String evalSubgroupName : sorted(group.keySet()) ) {
|
||||
Set<VariantEvaluator> evalSet = group.get(evalSubgroupName);
|
||||
VariantEvaluator eval = getEvalByName(evalName, evalSet);
|
||||
String keyWord = contextName + "." + evalSubgroupName;
|
||||
if ( first ) {
|
||||
out.printf("%s\t%s\t%s%n", evalName, "context", Utils.join("\t", eval.getTableHeader()));
|
||||
out.printf("%20s\t%40s\t%s%n", evalName, "context", Utils.join("\t\t", eval.getTableHeader()));
|
||||
first = false;
|
||||
}
|
||||
|
||||
for ( List<String> row : eval.getTableRows() )
|
||||
out.printf("%s\t%s\t%s%n", evalName, keyWord, Utils.join("\t", row));
|
||||
out.printf("%20s\t%40s\t%s%n", evalName, keyWord, Utils.join("\t\t", row));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue