From 6be5e8286037e1a20b7b85508411db43d16a36c0 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 26 Mar 2012 07:42:15 -0400 Subject: [PATCH] VariantEval scalability optimizations -- StateKey no longer extends TreeMap. It's now a final immutable data structure that caches it's toString and hashcode values. TODO optimizations to entirely remove the TreeMap and just store the HashMap for performance and use the tree for the sorted tostring function. -- NewEvaluationContext has a method makeStateKey() that contains all of the functionality that once was spread around VEUtils -- AnalysisModuleScanner uses an annotationCache to speed up the reflections getAnnotations() call when invoked over and over on the same objects. Still expensive to convert each field to a string for the cache, but the only way around that is a complete refactoring of the toTransversalDone of VE -- VariantEvaluator base class has a cached getSimpleName() function -- VEUtils: general cleanup due to refactoring of StateKey -- VEWalker: much better iteration of map data structures. If you need access to iterate over all key/value pairs use the Map.Entry construct with entrySet. This is far better than iterating over the keys and calling get() on each key. --- .../varianteval/VariantEvalWalker.java | 13 ++-- .../evaluators/VariantEvaluator.java | 9 +++ .../util/AnalysisModuleScanner.java | 14 +++- .../util/NewEvaluationContext.java | 10 +++ .../walkers/varianteval/util/StateKey.java | 74 +++++++++++++++---- .../varianteval/util/VariantEvalUtils.java | 25 +------ 6 files changed, 101 insertions(+), 44 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 2b9f159ac..4bfd90eac 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -482,11 +482,13 @@ public class VariantEvalWalker extends RodWalker implements Tr public void onTraversalDone(Integer result) { logger.info("Finalizing variant report"); - for ( StateKey stateKey : evaluationContexts.keySet() ) { - NewEvaluationContext nec = evaluationContexts.get(stateKey); + for ( Map.Entry ecElt : evaluationContexts.entrySet() ) { + final StateKey stateKey = ecElt.getKey(); + final NewEvaluationContext nec = ecElt.getValue(); for ( VariantEvaluator ve : nec.getEvaluationClassList().values() ) { ve.finalizeEvaluation(); + final String veName = ve.getSimpleName(); // ve.getClass().getSimpleName(); AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve); Map datamap = scanner.getData(); @@ -498,7 +500,7 @@ public class VariantEvalWalker extends RodWalker implements Tr if (field.get(ve) instanceof TableType) { TableType t = (TableType) field.get(ve); - final String subTableName = ve.getClass().getSimpleName() + "." + field.getName(); + final String subTableName = veName + "." + field.getName(); final DataPoint dataPointAnn = datamap.get(field); GATKReportTable table; @@ -539,11 +541,10 @@ public class VariantEvalWalker extends RodWalker implements Tr } } } else { - GATKReportTable table = report.getTable(ve.getClass().getSimpleName()); + GATKReportTable table = report.getTable(veName); for ( VariantStratifier vs : stratificationObjects ) { - String columnName = vs.getName(); - + final String columnName = vs.getName(); table.set(stateKey.toString(), columnName, stateKey.get(vs.getName())); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java index d5cf685de..226429439 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java @@ -8,6 +8,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; public abstract class VariantEvaluator { private VariantEvalWalker walker; + private final String simpleName; + + protected VariantEvaluator() { + this.simpleName = getClass().getSimpleName(); + } public void initialize(VariantEvalWalker walker) { this.walker = walker; @@ -90,4 +95,8 @@ public abstract class VariantEvaluator { protected static String formattedRatio(final int num, final int denom) { return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom)); } + + public String getSimpleName() { + return simpleName; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java index db44e9e28..793bafdd0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java @@ -27,6 +27,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.lang.annotation.Annotation; import java.lang.reflect.Field; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; @@ -40,6 +41,7 @@ import java.util.Map; * the object, a Mashalling object can serialize or deserialize a analysis module. */ public class AnalysisModuleScanner { + final private static Map annotationCache = new HashMap(); // what we extracted from the class private Map datums = new LinkedHashMap(); // the data we've discovered @@ -84,12 +86,22 @@ public class AnalysisModuleScanner { // get the fields from the class, and extract for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) { for (Field f : superCls.getDeclaredFields()) - for (Annotation annotation : f.getAnnotations()) { + for (Annotation annotation : getAnnotations(f)) { if (annotation.annotationType().equals(DataPoint.class)) datums.put(f,(DataPoint) annotation); } } } + + private Annotation[] getAnnotations(final Field field) { + final String fieldName = field.toString(); + Annotation[] annotations = annotationCache.get(fieldName); + if ( annotations == null ) { + annotations = field.getAnnotations(); + annotationCache.put(fieldName, annotations); + } + return annotations; + } /** * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java index 09f2c0168..b5c6a1ecf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java @@ -36,6 +36,16 @@ public class NewEvaluationContext extends HashMap { return new TreeMap(evaluationInstances); } + public StateKey makeStateKey() { + Map map = new HashMap(size()); + + for (Map.Entry elt : this.entrySet() ) { + map.put(elt.getKey().getName(), elt.getValue()); + } + + return new StateKey(map); + } + public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) { for ( final VariantEvaluator evaluation : evaluationInstances.values() ) { // the other updateN methods don't see a null context diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/StateKey.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/StateKey.java index 96bd9a9b7..36b09300b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/StateKey.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/StateKey.java @@ -3,25 +3,67 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; import java.util.Map; import java.util.TreeMap; -public class StateKey extends TreeMap { -// public int hashCode() { -// int hashCode = 1; -// -// for (final Map.Entry pair : this.entrySet()) { -// hashCode *= pair.getKey().hashCode() + pair.getValue().hashCode(); -// } -// -// return hashCode; -// } +/** + * A final constant class representing the specific state configuration + * for a VariantEvaluator instance. + * + * TODO optimizations to entirely remove the TreeMap and just store the HashMap for performance and use the tree for the sorted tostring function. + */ +public final class StateKey { + /** High-performance cache of the toString operation for a constant class */ + private final String string; + private final TreeMap states; - public String toString() { - String value = ""; + public StateKey(final Map states) { + this.states = new TreeMap(states); + this.string = formatString(); + } - for ( final String key : this.keySet() ) { - //value += "\tstate " + key + ":" + this.get(key) + "\n"; - value += String.format("%s:%s;", key, this.get(key)); + public StateKey(final StateKey toOverride, final String keyOverride, final String valueOverride) { + if ( toOverride == null ) { + this.states = new TreeMap(); + } else { + this.states = new TreeMap(toOverride.states); } - return value; + this.states.put(keyOverride, valueOverride); + this.string = formatString(); + } + + @Override + public boolean equals(final Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + final StateKey stateKey = (StateKey) o; + + if (states != null ? !states.equals(stateKey.states) : stateKey.states != null) return false; + + return true; + } + + @Override + public int hashCode() { + return states.hashCode(); + } + + @Override + public String toString() { + return string; + } + + private final String formatString() { + StringBuilder b = new StringBuilder(); + + for ( Map.Entry entry : states.entrySet() ) { + b.append(String.format("%s:%s;", entry.getKey(), entry.getValue())); + } + + return b.toString(); + } + + // TODO -- might be slow because of tree map + public String get(final String key) { + return states.get(key); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 91c7140e6..9b4ae129a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -214,20 +214,9 @@ public class VariantEvalUtils { ecs.putAll(initializeEvaluationContexts(stratificationObjects, evaluationObjects, newStratStack, nec)); } } else { - HashMap necs = new HashMap(); - - StateKey stateKey = new StateKey(); - for (VariantStratifier vs : ec.keySet()) { - String state = ec.get(vs); - - stateKey.put(vs.getName(), state); - } - + final StateKey stateKey = ec.makeStateKey(); ec.addEvaluationClassList(variantEvalWalker, stateKey, evaluationObjects); - - necs.put(stateKey, ec); - - return necs; + return new HashMap(Collections.singletonMap(stateKey, ec)); } return ecs; @@ -428,14 +417,8 @@ public class VariantEvalUtils { HashMap> oneSetOfStates = newStateStack.pop(); VariantStratifier vs = oneSetOfStates.keySet().iterator().next(); - for (String state : oneSetOfStates.get(vs)) { - StateKey newStateKey = new StateKey(); - if (stateKey != null) { - newStateKey.putAll(stateKey); - } - - newStateKey.put(vs.getName(), state); - + for (final String state : oneSetOfStates.get(vs)) { + final StateKey newStateKey = new StateKey(stateKey, vs.getName(), state); initializeStateKeys(stateMap, newStateStack, newStateKey, stateKeys); } } else {