From 681c196097fedbfe43564adee645b4041fe67c7d Mon Sep 17 00:00:00 2001 From: depristo Date: Tue, 2 Feb 2010 14:18:46 +0000 Subject: [PATCH] V2 of VariantEval2. Framework is essentially complete., very simple and clear now compared to VE1. Support for any number of JEXL expressions. dbSNP% evaluation added to show paired comparison evaluation. Pretty printing output tables. Performance is poor but can easily be fixed (see todo notes). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2764 348d0f76-0448-11de-a6fe-93d51630548a --- .../oneoffprojects/variantcontext/Allele.java | 14 +- .../variantcontext/Genotype.java | 15 + .../variantcontext/VariantContext.java | 7 +- .../variantcontext/VariantContextUtils.java | 83 ++++- .../varianteval2/CountVariants.java | 28 +- .../varianteval2/DbSNPPercentage.java | 129 ++++++++ .../varianteval2/VariantEval2Walker.java | 298 ++++++++++-------- 7 files changed, 422 insertions(+), 152 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/DbSNPPercentage.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java index b49aa6453..000487ee4 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java @@ -207,7 +207,19 @@ public class Allele { * @return true if these alleles are equal */ public boolean equals(Allele other) { - return isRef == other.isRef && isNull == other.isNull && isNoCall == other.isNoCall && this.basesMatch(other.getBases()); + return equals(other, false); + } + + /** + * Returns true if this and other are equal. If ignoreRefState is true, then doesn't require both alleles has the + * same ref tag + * + * @param other + * @param ignoreRefState + * @return + */ + public boolean equals(Allele other, boolean ignoreRefState) { + return (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && this.basesMatch(other.getBases()); } /** diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java index 2991ac819..5daef835d 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext; +import org.broadinstitute.sting.utils.Utils; + import java.util.*; /** @@ -44,6 +46,19 @@ public class Genotype extends AttributedObject { return al; } + private final static String ALLELE_SEPARATOR = "/"; + public String getGenotypeString() { + return Utils.join(ALLELE_SEPARATOR, getAllelesString()); + } + + private List getAllelesString() { + List al = new ArrayList(); + for ( Allele a : alleles ) + al.add(new String(a.getBases())); + + return al; + } + /** * @return the ploidy of this genotype diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java index a1569a16f..a58903f9f 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java @@ -500,14 +500,19 @@ public class VariantContext extends AttributedObject { * @return True if this context contains Allele allele, or false otherwise */ public boolean hasAllele(Allele allele) { + return hasAllele(allele, false); + } + + public boolean hasAllele(Allele allele, boolean ignoreRefState) { for ( Allele a : getAlleles() ) { - if ( a.equals(allele) ) + if ( a.equals(allele, ignoreRefState) ) return true; } return false; } + /** * Gets the alleles. This method should return all of the alleles present at the location, * including the reference allele. There are no constraints imposed on the ordering of alleles diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java index 629b4fe15..f2cd20f31 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java @@ -4,22 +4,35 @@ import java.util.*; import org.apache.commons.jexl.*; import org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2.VariantEvaluator; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.Utils; public class VariantContextUtils { /** */ public static class MatchExp { - String name; - String expStr; - Expression exp; + public String name; + public String expStr; + public Expression exp; public MatchExp(String name, String str, Expression exp) { this.name = name; - this.expStr = str; this.exp = exp; } } + public static List initializeMatchExps(String[] names, String[] exps) { + if ( names == null || exps == null ) + throw new StingException("BUG: neither names nor exps can be null: names " + names + " exps=" + exps ); + + if ( names.length != exps.length ) + throw new StingException("Inconsistent number of provided filter names and expressions: names=" + names + " exps=" + exps); + + Map map = new HashMap(); + for ( int i = 0; i < names.length; i++ ) { map.put(names[i], exps[i]); } + + return VariantContextUtils.initializeMatchExps(map); + } + public static List initializeMatchExps(Map names_and_exps) { List exps = new ArrayList(); @@ -39,9 +52,65 @@ public class VariantContextUtils { return exps; } - // todo -- add generalize matching routine here - // todo -- should file in all fields (loc, filter, etc) for selection - // todo -- genotypes should be sampleNAME.field -> value bindings + public static boolean match(VariantContext vc, MatchExp exp) { + return match(vc,Arrays.asList(exp)).get(exp); + } + + public static Map match(VariantContext vc, Collection exps) { + // todo actually, we should implement a JEXL context interface to VariantContext, + // which just looks up the values assigned statically here. Much better approach + + Map infoMap = new HashMap(); + + infoMap.put("CHROM", vc.getLocation().getContig()); + infoMap.put("POS", String.valueOf(vc.getLocation().getStart())); + infoMap.put("TYPE", vc.getType().toString()); + infoMap.put("QUAL", String.valueOf(10 * vc.getNegLog10PError())); + + // add alleles + infoMap.put("ALLELES", Utils.join(";", vc.getAlleles())); + infoMap.put("N_ALLELES", String.valueOf(vc.getNAlleles())); + + // add attributes + addAttributesToMap(infoMap, vc.getAttributes(), ""); + + // add filter fields + infoMap.put("FILTER", String.valueOf(vc.isFiltered() ? "1" : "0")); + for ( Object filterCode : vc.getFilters() ) { + infoMap.put(String.valueOf(filterCode), "1"); + } + + // add genotypes + // todo -- comment this back in when we figure out how to represent it nicely +// for ( Genotype g : vc.getGenotypes().values() ) { +// String prefix = g.getSampleName() + "."; +// addAttributesToMap(infoMap, g.getAttributes(), prefix); +// infoMap.put(prefix + "GT", g.getGenotypeString()); +// } + + JexlContext jContext = JexlHelper.createContext(); + //System.out.printf(infoMap.toString()); + jContext.setVars(infoMap); + + try { + Map resultMap = new HashMap(); + for ( MatchExp e : exps ) { + resultMap.put(e, (Boolean)e.exp.evaluate(jContext)); + } + return resultMap; + } catch (Exception e) { + throw new StingException(e.getMessage()); + } + + } + + private static void addAttributesToMap(Map infoMap, Map attributes, String prefix ) { + for (Map.Entry e : attributes.entrySet()) { + infoMap.put(prefix + String.valueOf(e.getKey()), String.valueOf(e.getValue())); + } + } + + private static final String UNIQUIFIED_SUFFIX = ".unique"; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java index b0a22e4e5..14a66b91f 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java @@ -11,20 +11,20 @@ import java.util.List; import java.util.Arrays; public class CountVariants extends VariantEvaluator { - int nProcessedLoci = 0; - int nCalledLoci = 0; - int nVariantLoci = 0; - int nRefLoci = 0; + long nProcessedLoci = 0; + long nCalledLoci = 0; + long nVariantLoci = 0; + long nRefLoci = 0; - int nSNPs = 0; - int nInsertions = 0; - int nDeletions = 0; - int nComplex = 0; + long nSNPs = 0; + long nInsertions = 0; + long nDeletions = 0; + long nComplex = 0; - int nNoCalls = 0; - int nHets = 0; - int nHomRef = 0; - int nHomVar = 0; + long nNoCalls = 0; + long nHets = 0; + long nHomRef = 0; + long nHomVar = 0; private double rate(long n) { return n / (1.0 * Math.max(nProcessedLoci, 1)); @@ -39,7 +39,7 @@ public class CountVariants extends VariantEvaluator { } public String getName() { - return "Counter"; + return "counter"; } public int getComparisonOrder() { @@ -76,7 +76,7 @@ public class CountVariants extends VariantEvaluator { } public String toString() { - return "Counter: " + summaryLine(); + return getName() + ": " + summaryLine(); } private String summaryLine() { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/DbSNPPercentage.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/DbSNPPercentage.java new file mode 100755 index 000000000..ae2d20c7f --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/DbSNPPercentage.java @@ -0,0 +1,129 @@ +package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.playground.gatk.walkers.varianteval.BasicVariantAnalysis; +import org.broadinstitute.sting.playground.gatk.walkers.varianteval.GenotypeAnalysis; +import org.broadinstitute.sting.playground.gatk.walkers.varianteval.PopulationAnalysis; +import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; +import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype; +import org.broadinstitute.sting.oneoffprojects.variantcontext.Allele; + +import java.util.ArrayList; +import java.util.List; +import java.util.Arrays; + +/** + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ +public class DbSNPPercentage extends VariantEvaluator { + private long nDBSNPs = 0; + private long nEvalSNPs = 0; + private long nSNPsAtdbSNPs = 0; + private long nConcordant = 0; + + public String getName() { + return "dbsnp_percentage"; + } + + public int getComparisonOrder() { + return 2; // we need to see each eval track and each comp track + } + + public long nDBSNPs() { return nDBSNPs; } + public long nEvalSNPs() { return nEvalSNPs; } + public long nSNPsAtdbSNPs() { return nSNPsAtdbSNPs; } + public long nConcordant() { return nConcordant; } + public long nNovelSites() { return Math.abs(nEvalSNPs() - nSNPsAtdbSNPs()); } + + public String toString() { + return getName() + ": " + summaryLine(); + } + + private String summaryLine() { + return String.format("%d %d %d %d %d %.2f %.2f", + nDBSNPs(), nEvalSNPs(), nSNPsAtdbSNPs(), nConcordant(), nNovelSites(), 100 * dbSNPRate(), 100 * concordanceRate()); + } + + private static List HEADER = + Arrays.asList("n_dbsnps", "n_eval_snps", "n_overlapping_snps", "n_concordant", "n_novel_snps", "dbsnp_rate", "concordance_rate"); + + // making it a table + public List getTableHeader() { + return HEADER; + } + + public List> getTableRows() { + return Arrays.asList(Arrays.asList(summaryLine().split(" "))); + } + + /** + * Returns true if every allele in eval is also in dbsnp + * + * @param eval + * @param dbsnp + * @return + */ + public boolean discordantP(VariantContext eval, VariantContext dbsnp ) { + for ( Allele a : eval.getAlleles() ) { + if ( ! dbsnp.hasAllele(a, true) ) + return true; + } + + return false; + } + + + /** + * What fraction of the evaluated site variants were also found in the db? + * + * @return + */ + public double dbSNPRate() { + return nSNPsAtdbSNPs() / (1.0 * Math.max(nEvalSNPs(), 1)); + } + + public double concordanceRate() { + return nConcordant() / (1.0 * Math.max(nSNPsAtdbSNPs(), 1)); + } + +// public static Variation getFirstRealSNP(RODRecordList dbsnpList) { +// if (dbsnpList == null) +// return null; +// +// Variation dbsnp = null; +// for (ReferenceOrderedDatum d : dbsnpList) { +// if (((Variation) d).isSNP() && (! (d instanceof RodVCF) || ! ((RodVCF)d).isFiltered())) { +// dbsnp = (Variation)d; +// break; +// } +// } +// +// return dbsnp; +// } + + public void update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered(); + boolean evalIsGood = eval != null && eval.isSNP(); + + if ( dbSNPIsGood ) nDBSNPs++; // count the number of dbSNP events + if ( evalIsGood ) nEvalSNPs++; // count the number of dbSNP events + + if ( dbSNPIsGood && evalIsGood ) { + nSNPsAtdbSNPs++; + + if ( ! discordantP(eval, dbsnp) ) // count whether we're concordant or not with the dbSNP value + nConcordant++; + } + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java index 0eed3a161..41ef3df4a 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; @@ -18,76 +19,136 @@ import java.util.*; public class VariantEval2Walker extends RodWalker { // todo -- add doc string @Argument(shortName="select", doc="", required=false) - protected String[] SELECT_STRINGS = {}; + protected String[] SELECT_EXPS = {"QUAL > 500.0", "HARD_TO_VALIDATE==1", "GATK_STANDARD==1"}; + // todo -- add doc string @Argument(shortName="selectName", doc="", required=false) - protected String[] SELECT_NAMES = {}; + protected String[] SELECT_NAMES = {"q500plus", "low_mapq", "gatk_std_filters"}; @Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) protected String[] KNOWN_NAMES = {"dbsnp"}; - - private class EvaluationGroup extends HashMap> { + /** private class holding all of the information about a single evaluation group (e.g., for eval ROD) */ + private class EvaluationContext extends HashMap> { // useful for typing + public String trackName, contextName; + VariantContextUtils.MatchExp selectExp; + + public EvaluationContext(String trackName, String contextName, VariantContextUtils.MatchExp selectExp) { + this.trackName = trackName; + this.contextName = contextName; + this.selectExp = selectExp; + } } - // todo -- generalize to multiple contexts, one for each eval - private HashMap contexts = new HashMap(); - private List selectExps = null; + private HashMap contexts = new HashMap(); + private Set compNames = new HashSet(); + private static String RAW_SET_NAME = "raw"; + private static String RETAINED_SET_NAME = "called"; + private static String FILTERED_SET_NAME = "filtered"; + private static String ALL_SET_NAME = "all"; + private static String KNOWN_SET_NAME = "known"; + private static String NOVEL_SET_NAME = "novel"; + + // Dynamically determined variantEvaluation classes + private List variantEvaluationNames = new ArrayList(); + private List> evaluationClasses = null; + + // -------------------------------------------------------------------------------------------------------------- + // + // initialize + // + // -------------------------------------------------------------------------------------------------------------- public void initialize() { - if ( SELECT_NAMES.length != SELECT_STRINGS.length ) - throw new StingException("Inconsistent number of provided filter names and expressions."); - Map map = new HashMap(); - for ( int i = 0; i < SELECT_NAMES.length; i++ ) { map.put(SELECT_NAMES[i], SELECT_STRINGS[i]); } + determineAllEvalations(); + List selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS); - selectExps = VariantContextUtils.initializeMatchExps(map); - - // setup contexts - // todo -- add selects - contexts.put("eval", createEvaluationGroup()); - contexts.put("eval.filtered", createEvaluationGroup()); + for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { + if ( d.getName().startsWith("eval") ) { + for ( VariantContextUtils.MatchExp e : selectExps ) { + addNewContext(d.getName(), d.getName() + "." + e.name, e); + } + addNewContext(d.getName(), d.getName(), null); + } else if ( d.getName().startsWith("dbsnp") || d.getName().startsWith("hapmap") || d.getName().startsWith("comp") ) { + compNames.add(d.getName()); + } else { + logger.info("Not evaluating ROD binding " + d.getName()); + } + } } - private EvaluationGroup createEvaluationGroup() { - EvaluationGroup group = new EvaluationGroup(); - - for ( String name : Arrays.asList("all", "known", "novel") ) { - group.put(name, new HashSet(Arrays.asList(new CountVariants()))); + private void determineAllEvalations() { + evaluationClasses = PackageUtils.getClassesImplementingInterface(VariantEvaluator.class); + for ( VariantEvaluator e : instantiateEvalationsSet() ) { + // for collecting purposes + variantEvaluationNames.add(e.getName()); + logger.debug("Including VariantEvaluator " + e.getName() + " of class " + e.getClass()); } - return group; + Collections.sort(variantEvaluationNames); } + private Set instantiateEvalationsSet() { + Set evals = new HashSet(); + for ( Class c : evaluationClasses ) { + try { + evals.add((VariantEvaluator) c.newInstance()); + } catch (InstantiationException e) { + throw new StingException(String.format("Cannot instantiate annotation class '%s': must be concrete class", c.getSimpleName())); + } catch (IllegalAccessException e) { + throw new StingException(String.format("Cannot instantiate annotation class '%s': must have no-arg constructor", c.getSimpleName())); + } + } + + return evals; + } + + private void addNewContext(String trackName, String contextName, VariantContextUtils.MatchExp selectExp) { + EvaluationContext group = new EvaluationContext(trackName, contextName, selectExp); + + for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) { + for ( String subname : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) { + group.put(subname + "." + filteredName, instantiateEvalationsSet()); + } + } + + contexts.put(contextName, group); + } + + // -------------------------------------------------------------------------------------------------------------- + // + // map + // + // -------------------------------------------------------------------------------------------------------------- + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { //System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases()); - Map allNamedVCs = getVariantContexts(context, tracker); - Map evalNamedVCs = getEvalVCs(allNamedVCs); - Map comps = getComparisonVCs(allNamedVCs); + Map comps = getCompVariantContexts(tracker, context); - for ( VariantEvaluator eval : getAllEvaluations() ) { - eval.update0(tracker, ref, context); - } + // to enable walking over pairs where eval or comps have no elements + for ( EvaluationContext group : contexts.values() ) { + VariantContext vc = getVariantContext(group.trackName, tracker, context); - if ( evalNamedVCs.size() > 1 ) throw new StingException("VariantEval doesn't yet support for multiple independent eval tracks"); - for ( Map.Entry evalNameVC: evalNamedVCs.entrySet() ) { - String name = evalNameVC.getKey(); - VariantContext vc = evalNameVC.getValue(); - boolean isKnown = vcIsKnown(vc, allNamedVCs); + //logger.debug(String.format("Updating %s of %s with variant", group.name, vc)); + for ( Map.Entry> namedEvaluations : group.entrySet() ) { + String evaluationName = namedEvaluations.getKey(); + Set evaluations = namedEvaluations.getValue(); + boolean evalWantsVC = applyVCtoEvaluation(evaluationName, vc, comps, group); - if ( vc.isFiltered() ) name = name + ".filtered"; - EvaluationGroup group = contexts.get(name); - - for ( Set evaluations : Arrays.asList(group.get("all"), group.get(isKnown ? "known" : "novel")) ) { for ( VariantEvaluator evaluation : evaluations ) { + // we always call update0 in case the evaluation tracks things like number of bases covered + evaluation.update0(tracker, ref, context); + + // now call the single or paired update function switch ( evaluation.getComparisonOrder() ) { case 1: - evaluation.update1(vc, tracker, ref, context); + if ( vc != null ) evaluation.update1(vc, tracker, ref, context); break; case 2: for ( VariantContext comp : comps.values() ) { - evaluation.update2(vc, comp, tracker, ref, context); + evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context); } break; default: @@ -100,93 +161,79 @@ public class VariantEval2Walker extends RodWalker { return 0; } - private boolean vcIsKnown(VariantContext vc, Map vcs) { - for ( VariantContext known : getKnownVCs(vcs).values() ) { - if ( known.isNotFiltered() && known.getType() == vc.getType() ) + private boolean applyVCtoEvaluation(String evaluationName, VariantContext vc, Map comps, EvaluationContext group) { + if ( vc == null ) + return true; + + if ( evaluationName.contains(FILTERED_SET_NAME) && vc.isNotFiltered() ) + return false; + + if ( evaluationName.contains(RETAINED_SET_NAME) && vc.isFiltered() ) + return false; + + boolean vcKnown = vcIsKnown(vc, comps, KNOWN_NAMES); + if ( evaluationName.contains(KNOWN_SET_NAME) && ! vcKnown ) + return false; + else if ( evaluationName.contains(NOVEL_SET_NAME) && vcKnown ) + return false; + + if ( group.selectExp != null && ! VariantContextUtils.match(vc, group.selectExp) ) + return false; + + // nothing invalidated our membership in this set + return true; + } + + private Map getCompVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) { + Map comps = new HashMap(); + + for ( String compName : compNames ) { + comps.put(compName, getVariantContext(compName, tracker, context)); + } + + return comps; + } + + private boolean vcIsKnown(VariantContext vc, Map comps, String[] knownNames ) { + for ( String knownName : knownNames ) { + VariantContext known = comps.get(knownName); + if ( known != null && known.isNotFiltered() && known.getType() == vc.getType() ) return true; } return false; } +// can't handle this situation +// todo -- warning, this leads to some missing SNPs at complex loci, such as: +// todo -- 591 1 841619 841620 rs4970464 0 - A A -/C/T genomic mixed unknown 0 0 near-gene-3 exact 1 +// todo -- 591 1 841619 841620 rs62677860 0 + A A C/T genomic single unknown 0 0 near-gene-3 exact 1 +// +//logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec)); - private Map getEvalVCs(Map vcs) { - return getVCsStartingWith(vcs, false); - } - - private Map getComparisonVCs(Map vcs) { - return getVCsStartingWith(vcs, true); - } - - private Map getVCsStartingWith(Map vcs, boolean notStartsWith) { - Map map = new HashMap(); - - for ( Map.Entry elt : vcs.entrySet() ) { - boolean startP = elt.getKey().startsWith("eval"); - - if ( (startP && ! notStartsWith) || (!startP && notStartsWith) ) { - map.put(elt.getKey(), elt.getValue()); - } - } - - return map; - } - - private Map getKnownVCs(Map vcs) { - Map map = new HashMap(); - - for ( Map.Entry elt : vcs.entrySet() ) { - for ( String known1 : KNOWN_NAMES ) { - if ( elt.getKey().equals(known1) ) { - map.put(elt.getKey(), elt.getValue()); - } - } - } - - return map; - } - - private List getAllEvaluationNames() { - List names = new ArrayList(); - if ( contexts.size() == 0 ) throw new IllegalStateException("Contexts shouldn't be sized 0 when calling getAllEvaluationNames()"); - - for ( VariantEvaluator eval : contexts.values().iterator().next().get("all") ) { - names.add(eval.getName()); - } - - return names; - } - - - private Map getVariantContexts(AlignmentContext context, RefMetaDataTracker tracker) { - Map map = new HashMap(); - + private VariantContext getVariantContext(String name, RefMetaDataTracker tracker, AlignmentContext context) { if ( tracker != null ) { - for ( RODRecordList rodList : tracker.getBoundRodTracks() ) { - boolean alreadyGrabbedOne = false; - + RODRecordList rodList = tracker.getTrackData(name, null); + if ( rodList != null ) { for ( ReferenceOrderedDatum rec : rodList.getRecords() ) { if ( rec.getLocation().getStart() == context.getLocation().getStart() ) { - // ignore things that span this location but started earlier - if ( alreadyGrabbedOne ) { - // can't handle this situation - ; - //logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec)); - } else { - VariantContext vc = VariantContextAdaptors.convertToVariantContext(rec); - if ( vc != null ) { - alreadyGrabbedOne = true; - map.put(rec.getName(), vc); - } + VariantContext vc = VariantContextAdaptors.convertToVariantContext(rec); + if ( vc != null ) { + return vc; } } } } } - return map; + return null; } + // -------------------------------------------------------------------------------------------------------------- + // + // reduce + // + // -------------------------------------------------------------------------------------------------------------- public Integer reduceInit() { return 0; } @@ -202,38 +249,31 @@ public class VariantEval2Walker extends RodWalker { return null; } - - public List getAllEvaluations() { - List l = new ArrayList(); - - for ( EvaluationGroup group : contexts.values() ) { - for ( Set evals : group.values() ) { - l.addAll(evals); - } - } - + public > List sorted(Collection c ) { + List l = new ArrayList(c); + Collections.sort(l); return l; } - public void onTraversalDone(Integer result) { - for ( String evalName : getAllEvaluationNames() ) { + for ( String evalName : variantEvaluationNames ) { boolean first = true; - for ( Map.Entry elt : contexts.entrySet() ) { - String contextName = elt.getKey(); - EvaluationGroup group = elt.getValue(); + out.printf("%n%n"); + for ( String contextName : sorted(contexts.keySet()) ) { + EvaluationContext group = contexts.get(contextName); - for ( Map.Entry> namedEvalGroup : group.entrySet() ) { - String evalSubgroupName = namedEvalGroup.getKey(); - VariantEvaluator eval = getEvalByName(evalName, namedEvalGroup.getValue()); + out.printf("%s%n", Utils.dupString('-', 80)); + for ( String evalSubgroupName : sorted(group.keySet()) ) { + Set evalSet = group.get(evalSubgroupName); + VariantEvaluator eval = getEvalByName(evalName, evalSet); String keyWord = contextName + "." + evalSubgroupName; if ( first ) { - out.printf("%s\t%s\t%s%n", evalName, "context", Utils.join("\t", eval.getTableHeader())); + out.printf("%20s\t%40s\t%s%n", evalName, "context", Utils.join("\t\t", eval.getTableHeader())); first = false; } for ( List row : eval.getTableRows() ) - out.printf("%s\t%s\t%s%n", evalName, keyWord, Utils.join("\t", row)); + out.printf("%20s\t%40s\t%s%n", evalName, keyWord, Utils.join("\t\t", row)); } } }