Massive improvements to VE2 infrastructure. Now supports VCF writing of interesting sites; multiple comp and eval tracks. Eric will be taking it over and expanding functionality over the next few weeks until it's ready to replace VE1

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2832 348d0f76-0448-11de-a6fe-93d51630548a
2010-02-12 15:26:52 +00:00 · 2010-02-12 15:26:52 +00:00 · 5f74fffa02
parent 197dd540b5
commit 5f74fffa02
4 changed files with 231 additions and 147 deletions
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java
@ -30,7 +30,7 @@ public class DbSNPPercentage extends VariantEvaluator {
    }

    public String getName() {
-        return "dbsnp_percentage";
+        return "dbOverlap";
    }

    public int getComparisonOrder() {
@ -43,6 +43,15 @@ public class DbSNPPercentage extends VariantEvaluator {
    public long nConcordant()    { return nConcordant; }
    public long nNovelSites()    { return Math.abs(nEvalSNPs() - nSNPsAtdbSNPs()); }

+
+    /**
+     * What fraction of the evaluated site variants were also found in the db?
+     *
+     * @return
+     */
+    public double dbSNPRate()           { return rate(nSNPsAtdbSNPs(), nEvalSNPs()); }
+    public double concordanceRate()     { return rate(nConcordant(), nSNPsAtdbSNPs()); }
+    
    public String toString() {
        return getName() + ": " + summaryLine();
    }
@ -53,7 +62,8 @@ public class DbSNPPercentage extends VariantEvaluator {
    }

    private static List<String> HEADER =
-            Arrays.asList("n_dbsnps", "n_eval_snps", "n_overlapping_snps", "n_concordant", "n_novel_snps", "dbsnp_rate", "concordance_rate");
+            Arrays.asList("n_dbsnps", "n_eval_snps", "n_overlapping_snps", "n_concordant",
+                    "n_novel_snps", "percent_eval_in_comp", "concordance_rate");

    // making it a table
    public List<String> getTableHeader() {
@ -82,15 +92,6 @@ public class DbSNPPercentage extends VariantEvaluator {
        return false;
    }

-
-    /**
-     * What fraction of the evaluated site variants were also found in the db?
-     *
-     * @return
-     */
-    public double dbSNPRate()           { return rate(nSNPsAtdbSNPs(), nEvalSNPs()); }
-    public double concordanceRate()     { return rate(nConcordant(), nSNPsAtdbSNPs()); }
-
    public String update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered();
        boolean evalIsGood = eval != null && eval.isSNP();
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java
@ -40,6 +40,10 @@ public class TiTvVariantEvaluator extends VariantEvaluator {
        if ( vc1 != null ) updateTiTv(vc1, false);
        if ( vc2 != null ) updateTiTv(vc2, true);

+        //if ( vc1 == null && vc2 != null && vc2.isSNP() && vc2.isBiallelic() )
+        //    System.out.printf("VC2 = %s%n", vc2);
+        //if ( vc2 != null && vc2.getName().equals("dbsnp") )
+
        return null; // we don't capture any intersting sites
    }

--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java
@ -21,11 +21,10 @@ import java.io.File;
 // todo -- evalations should support comment lines
 // todo -- add Mendelian variable explanations (nDeNovo and nMissingTransmissions)

-// todo -- interesting sites should support VCF generation, so that FN, FP, DeNovo, etc calls get put into a single VCF and
-// todo -- an explanation added to the INFO field as to why it showed up there.
-
 //
 // todo -- write a simple column table system and have the evaluators return this instead of the list<list<string>> objects
+//
+
 // todo -- site frequency spectrum eval (freq. of variants in eval as a function of their AC and AN numbers)
 // todo -- multiple sample concordance tool (genotypes in eval vs. genotypes in truth)
 // todo -- allele freqeuncy discovery tool (FREQ in true vs. discovery counts in eval).  Needs to process subset of samples in true (pools)
@ -33,10 +32,18 @@ import java.io.File;
 // todo -- HWEs
 // todo -- Validation data analysis from VE1?  What is it and should we transition it over?
 // todo -- indel metrics [count of sizes in/del should be in CountVariants]
+
+//
+// todo -- Performance
 // todo -- create JEXL context implementing object that simply looks up values for JEXL evaluations.  Throws error for unknown fields
 //

+//
 // todo -- port over SNP density evaluator.
+// todo -- make it work with intervals correctly
+//
+
+// todo -- counts of snps per target [target name, gene, etc]

 // todo -- add subgroup of known variants as to those at hapmap sites [it's in the dbSNP record]

@ -58,6 +65,25 @@ import java.io.File;
 // todo -- write or find a simple way to organize the table like output of variant eval 2.  A generic table of strings?
 //

+// todo Extend VariantEval, our general-purpose tool for SNP evaluation, to differentiate Ti/Tv at CpG islands and also
+// todo classify (and count) variants into coding, non-coding, synonomous/non-symonomous, 2/4 fold degenerate sites, etc.
+// todo Assume that the incoming VCF has the annotations (you don't need to do this) but VE2 should split up results by
+// todo these catogies automatically (using the default selects)
+//
+// todo -- We agreed to report two standard values for variant evaluation from Êhere out. ÊOne, we will continue to report
+// todo -- the dbSNP 129 rate. Additionally, we will start to report the % of variants found that have already been seen in
+// todo -- 1000 Genomes. ÊThis should be implemented as another standard comp_1kg binding, pointing to only variants
+// todo -- discovered and released by 1KG.  Might need to make this data set ourselves and keep it in GATK/data like
+// todo -- dbsnp rod
+//
+// todo -- aux. plotting routines for VE2
+//
+// todo -- Provide separate dbsnp rates for het only calls and any call where there is at least one hom-var genotype,
+// todo -- since hets are much more likely to be errors
+
+// todo -- Add Heng's hom run metrics
+
+
 /**
 * Test routine for new VariantContext object
 */
@ -91,6 +117,9 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
    @Argument(shortName="outputVCF", fullName="InterestingSitesVCF", doc="If provided, interesting sites emitted to this vcf and the INFO field annotated as to why they are interesting", required=false)
    protected String outputVCF = null;

+    /** Right now we will only be looking at SNPS */
+    EnumSet<VariantContext.Type> ALLOW_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP);
+
    // --------------------------------------------------------------------------------------------------------------
    //
    // private walker data
@ -98,22 +127,48 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
    // --------------------------------------------------------------------------------------------------------------

    /** private class holding all of the information about a single evaluation group (e.g., for eval ROD) */
-    private class EvaluationContext extends HashMap<String, Set<VariantEvaluator>> {
+    private class EvaluationContext implements Comparable<EvaluationContext> {
        // useful for typing
-        public String trackName, contextName;
+        public String evalTrackName, compTrackName, novelty, filtered;
        public boolean enableInterestingSiteCaptures = false;
        VariantContextUtils.JexlVCMatchExp selectExp;
+        Set<VariantEvaluator> evaluations;

-        public EvaluationContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp, boolean enableInterestingSiteCaptures) {
-            this.trackName = trackName;
-            this.contextName = contextName;
+        public boolean isIgnoringFilters()      { return filtered.equals(RAW_SET_NAME); }
+        public boolean requiresFiltered()       { return filtered.equals(FILTERED_SET_NAME); }
+        public boolean requiresNotFiltered()    { return filtered.equals(RETAINED_SET_NAME); }
+        public boolean isIgnoringNovelty()      { return novelty.equals(ALL_SET_NAME); }
+        public boolean requiresNovel()          { return novelty.equals(NOVEL_SET_NAME); }
+        public boolean requiresKnown()          { return novelty.equals(KNOWN_SET_NAME); }
+
+        public boolean isSelected() { return selectExp == null; }
+
+        public String getDisplayName() {
+            return Utils.join(".", Arrays.asList(evalTrackName, compTrackName, selectExp == null ? "all" : selectExp.name, filtered, novelty));
+        }
+
+        public int compareTo(EvaluationContext other) {
+            return this.getDisplayName().compareTo(other.getDisplayName());
+        }
+
+        public EvaluationContext( String evalName, String compName, String novelty, String filtered, VariantContextUtils.JexlVCMatchExp selectExp ) {
+            this.evalTrackName = evalName;
+            this.compTrackName = compName;
+            this.novelty = novelty;
+            this.filtered = filtered;
            this.selectExp = selectExp;
-            this.enableInterestingSiteCaptures = enableInterestingSiteCaptures;
+            this.enableInterestingSiteCaptures = selectExp == null;
+            this.evaluations = instantiateEvalationsSet();
        }
    }

-    private HashMap<String, EvaluationContext> contexts = new HashMap<String, EvaluationContext>();
+    private List<EvaluationContext> contexts = null;
+
+    // lists of all comp and eval ROD track names
    private Set<String> compNames = new HashSet<String>();
+    private Set<String> evalNames = new HashSet<String>();
+
+    private List<String> variantEvaluationNames = new ArrayList<String>();

    private static String RAW_SET_NAME      = "raw";
    private static String RETAINED_SET_NAME = "called";
@ -122,8 +177,16 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
    private static String KNOWN_SET_NAME    = "known";
    private static String NOVEL_SET_NAME    = "novel";

+    private final static String CONTEXT_HEADER = "eval.comp.select.filter.novelty";
+    private final static int N_CONTEXT_NAME_PARTS = CONTEXT_HEADER.split("\\.").length;
+    private static int[] nameSizes = new int[N_CONTEXT_NAME_PARTS];
+    static {
+        int i = 0;
+        for ( String elt : CONTEXT_HEADER.split("\\.") )
+            nameSizes[i++] = elt.length();
+    }
+
    // Dynamically determined variantEvaluation classes
-    private List<String> variantEvaluationNames = new ArrayList<String>();
    private List<Class<? extends VariantEvaluator>> evaluationClasses = null;

    /** output writer for interesting sites */
@ -142,10 +205,7 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {

        for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
            if ( d.getName().startsWith("eval") ) {
-                for ( VariantContextUtils.JexlVCMatchExp e : selectExps ) {
-                    addNewContext(d.getName(), d.getName() + "." + e.name, e);
-                }
-                addNewContext(d.getName(), d.getName() + ".all", null);
+                evalNames.add(d.getName());
            } else if ( d.getName().startsWith("dbsnp") || d.getName().startsWith("hapmap") || d.getName().startsWith("comp") ) {
                compNames.add(d.getName());
            } else {
@ -153,12 +213,14 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
            }
        }

+        contexts = initializeEvaluationContexts(evalNames, compNames, selectExps);
        determineContextNamePartSizes();

        if ( outputVCF != null )
            writer = new VCFWriter(new File(outputVCF));
    }

+
    private void determineAllEvalations() {
        evaluationClasses = PackageUtils.getClassesImplementingInterface(VariantEvaluator.class);
        for ( VariantEvaluator e : instantiateEvalationsSet() ) {
@ -170,6 +232,33 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
        Collections.sort(variantEvaluationNames);
    }

+    private <T> List<T> append(List<T> selectExps, T elt) {
+        List<T> l = new ArrayList<T>(selectExps);
+        l.add(elt);
+        return l;
+    }
+
+    private List<EvaluationContext> initializeEvaluationContexts(Set<String> evalNames, Set<String> compNames, List<VariantContextUtils.JexlVCMatchExp> selectExps) {
+        List<EvaluationContext> contexts = new ArrayList<EvaluationContext>();
+
+        selectExps = append(selectExps, null);
+        for ( String evalName : evalNames ) {
+            for ( String compName : compNames ) {
+                for ( VariantContextUtils.JexlVCMatchExp e : selectExps ) {
+                    for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) {
+                        for ( String novelty : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) {
+                            EvaluationContext context = new EvaluationContext(evalName, compName, novelty, filteredName, e);
+                            contexts.add(context);
+                        }
+                    }
+                }
+            }
+        }
+
+        Collections.sort(contexts);
+        return contexts;
+    }
+
    private Set<VariantEvaluator> instantiateEvalationsSet() {
        Set<VariantEvaluator> evals = new HashSet<VariantEvaluator>();
        Object[] args = new Object[]{this};
@ -194,24 +283,11 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
        return evals;
    }

-    private void addNewContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp) {
-        EvaluationContext group = new EvaluationContext(trackName, contextName, selectExp, selectExp == null);

-        for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) {
-            for ( String subname : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) {
-                String name = subname + "." + filteredName;
-                //System.out.printf("Creating group name: " + name);
-                group.put(name, instantiateEvalationsSet());
-                //group.put(name, instantiateEvalationsSet(subname == ALL_SET_NAME && filteredName == RETAINED_SET_NAME, trackName + "." + (selectExp == null ? "all" : selectExp.name) + "." + name));
-            }
-        }

-        contexts.put(contextName, group);
-    }
-
-    private boolean captureInterestingSitesOfEvalSet(String name) {
+    private boolean captureInterestingSitesOfEvalSet(EvaluationContext group) {
        //System.out.printf("checking %s%n", name);
-        return name.contains(ALL_SET_NAME + "." + RETAINED_SET_NAME);
+        return group.requiresNotFiltered() && group.isIgnoringNovelty();
    }

    // --------------------------------------------------------------------------------------------------------------
@ -220,54 +296,52 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
    //
    // --------------------------------------------------------------------------------------------------------------

+    // todo -- call a single function to build a map from track name -> variant context / null for all
+    //      -- eval + comp names.  Use this data structure to get data throughout rest of the loops here
    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        //System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases());

        if ( ref == null )
            return 0;

-        Collection<VariantContext> comps = getCompVariantContexts(tracker, context);
+        Map<String, VariantContext> vcs = getVariantContexts(tracker, context);
+        //Collection<VariantContext> comps = getCompVariantContexts(tracker, context);

        // to enable walking over pairs where eval or comps have no elements
-        for ( EvaluationContext group : contexts.values() ) {
-            VariantContext vc = getEvalContext(group.trackName, tracker, context);
+        for ( EvaluationContext group : contexts ) {
+            VariantContext vc = vcs.get(group.evalTrackName);

            //logger.debug(String.format("Updating %s of %s with variant", group.name, vc));
+            Set<VariantEvaluator> evaluations = group.evaluations;
+            boolean evalWantsVC = applyVCtoEvaluation(vc, vcs, group);
+            List<String> interestingReasons = new ArrayList<String>();

-            for ( Map.Entry<String, Set<VariantEvaluator>> namedEvaluations : group.entrySet() ) {
-                String evaluationName = namedEvaluations.getKey();
-                Set<VariantEvaluator> evaluations = namedEvaluations.getValue();
-                boolean evalWantsVC = applyVCtoEvaluation(evaluationName, vc, comps, group);
-                List<String> interestingReasons = new ArrayList<String>();
+            for ( VariantEvaluator evaluation : evaluations ) {
+                if ( evaluation.enabled() ) {
+                    // we always call update0 in case the evaluation tracks things like number of bases covered
+                    evaluation.update0(tracker, ref, context);

-                for ( VariantEvaluator evaluation : evaluations ) {
-                    if ( evaluation.enabled() ) {
-                        // we always call update0 in case the evaluation tracks things like number of bases covered
-                        evaluation.update0(tracker, ref, context);
-
-                        // now call the single or paired update function
-                        switch ( evaluation.getComparisonOrder() ) {
-                            case 1:
-                                if ( evalWantsVC && vc != null ) {
-                                    String interesting = evaluation.update1(vc, tracker, ref, context);
-                                    if ( interesting != null ) interestingReasons.add(interesting);
-                                }
-                                break;
-                            case 2:
-                                for ( VariantContext comp : comps ) {
-                                    String interesting = evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context);
-                                    if ( interesting != null ) interestingReasons.add(interesting);
-                                }
-                                break;
-                            default:
-                                throw new StingException("BUG: Unexpected evaluation order " + evaluation);
-                        }
+                    // now call the single or paired update function
+                    switch ( evaluation.getComparisonOrder() ) {
+                        case 1:
+                            if ( evalWantsVC && vc != null ) {
+                                String interesting = evaluation.update1(vc, tracker, ref, context);
+                                if ( interesting != null ) interestingReasons.add(interesting);
+                            }
+                            break;
+                        case 2:
+                            VariantContext comp = vcs.get(group.compTrackName);
+                            String interesting = evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context );
+                            if ( interesting != null ) interestingReasons.add(interesting);
+                            break;
+                        default:
+                            throw new StingException("BUG: Unexpected evaluation order " + evaluation);
                    }
                }
-
-                if ( group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(evaluationName) )
-                    writeInterestingSite(interestingReasons, vc);
            }
+
+            if ( group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(group) )
+                writeInterestingSite(interestingReasons, vc);
        }

        return 0;
@ -307,20 +381,20 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
        }
    }

-    private boolean applyVCtoEvaluation(String evaluationName, VariantContext vc, Collection<VariantContext> comps, EvaluationContext group) {
+    private boolean applyVCtoEvaluation(VariantContext vc, Map<String, VariantContext> vcs, EvaluationContext group) {
        if ( vc == null )
            return true;

-        if ( evaluationName.contains(FILTERED_SET_NAME) && vc.isNotFiltered() )
+        if ( group.requiresFiltered() && vc.isNotFiltered() )
            return false;

-        if ( evaluationName.contains(RETAINED_SET_NAME) && vc.isFiltered() )
+        if ( group.requiresNotFiltered() && vc.isFiltered() )
            return false;

-        boolean vcKnown = vcIsKnown(vc, comps, KNOWN_NAMES);
-        if ( evaluationName.contains(KNOWN_SET_NAME) && ! vcKnown )
+        boolean vcKnown = vcIsKnown(vc, vcs, KNOWN_NAMES);
+        if ( group.requiresKnown() && ! vcKnown )
            return false;
-        else if ( evaluationName.contains(NOVEL_SET_NAME) && vcKnown )
+        else if ( group.requiresNovel() && vcKnown )
            return false;

        if ( group.selectExp != null && ! VariantContextUtils.match(vc, group.selectExp) )
@ -330,14 +404,11 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
        return true;
    }

-    private boolean vcIsKnown(VariantContext vc, Collection<VariantContext> comps, String[] knownNames ) {
-        for ( VariantContext comp : comps ) {
-            if ( comp.isNotFiltered() && comp.getType() == vc.getType() ) {
-                for ( String knownName : knownNames ) {
-                    if ( comp.getName().equals(knownName) ) {
-                        return true;
-                    }
-                }
+    private boolean vcIsKnown(VariantContext vc, Map<String, VariantContext> vcs, String[] knownNames ) {
+        for ( String knownName : knownNames ) {
+            VariantContext known = vcs.get(knownName);
+            if ( known != null && known.isNotFiltered() && known.getType() == vc.getType() ) {
+                return true;
            }
        }

@ -351,23 +422,23 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
 //
 //logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec));

-    private Collection<VariantContext> getCompVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) {
+    private Map<String, VariantContext> getVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) {
        // todo -- we need to deal with dbSNP where there can be multiple records at the same start site.  A potential solution is to
        // todo -- allow the variant evaluation to specify the type of variants it wants to see and only take the first such record at a site
-        Collection<VariantContext> comps = tracker.getVariantContexts(compNames, null, context.getLocation(), true, true);
-
-        // todo -- remove me when the loop works correctly for comparisons of eval x comp for each comp
-        if ( comps.size() > 1 ) throw new StingException("VariantEval2 currently only supports comparisons of N eval tracks vs. a single comparison track.  Yes, I know...");
-        return comps;
+        Map<String, VariantContext> bindings = new HashMap<String, VariantContext>();
+        bindVariantContexts(bindings, evalNames, tracker, context);
+        bindVariantContexts(bindings, compNames, tracker, context);
+        return bindings;
    }

-    private VariantContext getEvalContext(String name, RefMetaDataTracker tracker, AlignmentContext context) {
-        Collection<VariantContext> contexts = tracker.getVariantContexts(name, null, context.getLocation(), true, false);
-
-        if ( context.size() > 1 )
-            throw new StingException("Found multiple variant contexts at " + context.getLocation());
-
-        return contexts.size() == 1 ? contexts.iterator().next() : null;
+    private void bindVariantContexts(Map<String, VariantContext> map, Collection<String> names,
+                                     RefMetaDataTracker tracker, AlignmentContext context ) {
+        for ( String name : names ) {
+            Collection<VariantContext> contexts = tracker.getVariantContexts(name, ALLOW_VARIANT_CONTEXT_TYPES, context.getLocation(), true, true);
+            if ( context.size() > 1 )
+                throw new StingException("Found multiple variant contexts at " + context.getLocation());
+            map.put(name, contexts.size() == 1 ? contexts.iterator().next() : null);
+        }
    }

    // --------------------------------------------------------------------------------------------------------------
@ -390,27 +461,14 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
        return null;
    }

-    private final static String CONTEXT_HEADER = "track.subset.novelty.filter";
-    private final static int N_CONTEXT_NAME_PARTS = CONTEXT_HEADER.split("\\.").length;
-    private static int[] nameSizes = new int[N_CONTEXT_NAME_PARTS];
-    static {
-        int i = 0;
-        for ( String elt : CONTEXT_HEADER.split("\\.") )
-            nameSizes[i++] = elt.length();
-    }
-
    private void determineContextNamePartSizes() {
-        for ( String contextName : Utils.sorted(contexts.keySet()) ) {
-            EvaluationContext group = contexts.get(contextName);
-            for ( String evalSubgroupName : Utils.sorted(group.keySet()) ) {
-                String keyWord = contextName + "." + evalSubgroupName;
-                String[] parts = keyWord.split("\\.");
-                if ( parts.length != N_CONTEXT_NAME_PARTS ) {
-                    throw new StingException("Unexpected number of eval name parts " + keyWord + " length = " + parts.length + ", expected " + N_CONTEXT_NAME_PARTS);
-                } else {
-                    for ( int i = 0; i < parts.length; i++ )
-                        nameSizes[i] = Math.max(nameSizes[i], parts[i].length());
-                }
+        for ( EvaluationContext group : contexts ) {
+            String[] parts = group.getDisplayName().split("\\.");
+            if ( parts.length != N_CONTEXT_NAME_PARTS ) {
+                throw new StingException("Unexpected number of eval name parts " + group.getDisplayName() + " length = " + parts.length + ", expected " + N_CONTEXT_NAME_PARTS);
+            } else {
+                for ( int i = 0; i < parts.length; i++ )
+                    nameSizes[i] = Math.max(nameSizes[i], parts[i].length());
            }
        }
    }
@ -422,7 +480,7 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
        int i = 0;
        for ( String part : keyWord.split("\\.") ) {
            //System.out.printf("part %s %d%n", part, nameSizes[i]);
-            s.append(String.format("%"+nameSizes[i]+"s ", part));
+            s.append(String.format("%" + nameSizes[i] + "s ", part));
            i++;
        }

@ -436,25 +494,26 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
        for ( String evalName : variantEvaluationNames ) {
            boolean first = true;
            out.printf("%n%n");
+
            // todo -- show that comp is dbsnp, etc. is columns
-            for ( String contextName : Utils.sorted(contexts.keySet()) ) {
-                EvaluationContext group = contexts.get(contextName);
+            String lastEvalTrack = null;
+            for ( EvaluationContext group : contexts ) {
+                if ( lastEvalTrack == null || ! lastEvalTrack.equals(group.evalTrackName) ) {
+                    out.printf("%s%n", Utils.dupString('-', 80));
+                    lastEvalTrack = group.evalTrackName;
+                }

-                out.printf("%s%n", Utils.dupString('-', 80));
-                for ( String evalSubgroupName : Utils.sorted(group.keySet()) ) {
-                    Set<VariantEvaluator> evalSet = group.get(evalSubgroupName);
-                    VariantEvaluator eval = getEvalByName(evalName, evalSet);
-                    String keyWord = contextName + "." + evalSubgroupName;
-                    if ( eval.enabled() ) {
+                VariantEvaluator eval = getEvalByName(evalName, group.evaluations);
+                String keyWord = group.getDisplayName();

-                        if ( first ) {
-                            out.printf("%20s %s %s%n", evalName, formatKeyword(CONTEXT_HEADER), Utils.join("\t", eval.getTableHeader()));
-                            first = false;
-                        }
-
-                        for ( List<String> row : eval.getTableRows() )
-                            out.printf("%20s %s %s%n", evalName, formatKeyword(keyWord), Utils.join("\t", row));
+                if ( eval.enabled() ) {
+                    if ( first ) {
+                        out.printf("%20s %s %s%n", evalName, formatKeyword(CONTEXT_HEADER), Utils.join("\t", eval.getTableHeader()));
+                        first = false;
                    }
+
+                    for ( List<String> row : eval.getTableRows() )
+                        out.printf("%20s %s %s%n", evalName, formatKeyword(keyWord), Utils.join("\t", row));
                }
            }
        }
--- a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java
@ -14,17 +14,14 @@ public class VariantEval2IntegrationTest extends WalkerTest {
            " -R " + oneKGLocation + "reference/human_b36_both.fasta";

    private static String root = cmdRoot +
-            " -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
-            " -B eval,VCF,/humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf";
-
-    static HashMap<String, String> expectations = new HashMap<String, String>();
-    static {
-        expectations.put("-L 1:1-10,000,000", "e7dba09b1856b9be86816939596a5062");
-        expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "c170c672ca2ef86068cc5dee9aaac022");
-    }
+            " -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
+            " -B eval,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf";

    @Test
    public void testVE2Simple() {
+        HashMap<String, String> expectations = new HashMap<String, String>();
+        expectations.put("-L 1:1-10,000,000", "d58a2a22e5fb3a3d8d90ba02de37f62b");
+        expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "8a928c8ad99428445e53b0b83f8ccdfa");

        for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
            String extraArgs = entry.getKey();
@ -37,12 +34,35 @@ public class VariantEval2IntegrationTest extends WalkerTest {
        }
    }

+    @Test
+    public void testVE2Complex() {
+        HashMap<String, String> expectations = new HashMap<String, String>();
+        String extraArgs1 = "-L " + validationDataLocation + "chr1_b36_pilot3.interval_list -family NA19238+NA19239=NA19240 -MVQ 30" +
+                " -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" +
+                " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
+
+        String eqMD5s = "380e082222111c7bf962095d9afca8da"; // next two examples should be the same!
+        expectations.put("", eqMD5s);
+        expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s);
+        expectations.put(" -known comp_hapmap", "90d7d4d0ff370e9457978b2869782aa0");
+
+        for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
+            String extraArgs2 = entry.getKey();
+            String md5 = entry.getValue();
+
+            WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs1 + extraArgs2 + " -o %s",
+                    1, // just one output file
+                    Arrays.asList(md5));
+            executeTest("testVE2Complex", spec);
+        }
+    }
+
    @Test
    public void testVE2WriteVCF() {
        String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
        WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
                2,
-                Arrays.asList("c53d7638df2d7440dee1fd274d1f6384", "9ec81f7389c0971e44e4b8d2d4af3008"));
+                Arrays.asList("b7d52d13e6eb3d593395a644583e449a", "9ec81f7389c0971e44e4b8d2d4af3008"));
        executeTest("testVE2WriteVCF", spec);
    }
-}
+}