From b063bcd38d2fccf9eea8296d9bc7357edbea9668 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Mar 2012 21:02:21 -0400 Subject: [PATCH] Removing update0 support in VariantEval -- Now the only use for update0, calculating the number of processed loci, is centrally tracked in the walker itself not the evaluations. -- This allows us to avoid calling update0 are every genomic base in 100ks of evaluates when there are a lot of stratifications. -- No need to modify the integration tests, this optimization doesn't change the result of the calculation --- .../varianteval/VariantEvalWalker.java | 17 +++++++--- .../varianteval/evaluators/CountVariants.java | 5 +-- .../evaluators/GenotypePhasingEvaluator.java | 9 +---- .../MendelianViolationEvaluator.java | 23 +------------ .../evaluators/MultiallelicSummary.java | 10 +++--- .../evaluators/VariantEvaluator.java | 33 +++++++++---------- .../evaluators/VariantSummary.java | 16 ++++----- .../util/NewEvaluationContext.java | 12 +------ 8 files changed, 42 insertions(+), 83 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 4fc7a1f41..2b9f159ac 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -214,6 +214,9 @@ public class VariantEvalWalker extends RodWalker implements Tr // Public constants private static String ALL_SAMPLE_NAME = "all"; + // the number of processed bp for this walker + long nProcessedLoci = 0; + // Utility class private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this); @@ -326,10 +329,10 @@ public class VariantEvalWalker extends RodWalker implements Tr */ @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - for ( final NewEvaluationContext nec : evaluationContexts.values() ) { - synchronized (nec) { - nec.update0(tracker, ref, context); - } + // we track the processed bp and expose this for modules instead of wasting CPU power on calculating + // the same thing over and over in evals that want the processed bp + synchronized (this) { + nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); } if (tracker != null) { @@ -455,7 +458,7 @@ public class VariantEvalWalker extends RodWalker implements Tr if ( lenientMatch == null ) lenientMatch = comp; break; case NO_MATCH: - ; + // do nothing } } @@ -581,6 +584,10 @@ public class VariantEvalWalker extends RodWalker implements Tr public Set getJexlExpressions() { return jexlExpressions; } + public long getnProcessedLoci() { + return nProcessedLoci; + } + public Set getContigNames() { final TreeSet contigs = new TreeSet(); for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index 6fc4208ee..3a2635121 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -89,10 +89,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval { return 1; // we only need to see each eval track } - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); - } - public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { nCalledLoci++; @@ -192,6 +188,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval { } public void finalizeEvaluation() { + nProcessedLoci = getWalker().getnProcessedLoci(); variantRate = perLocusRate(nVariantLoci); variantRatePerBp = perLocusRInverseRate(nVariantLoci); heterozygosity = perLocusRate(nHets); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java index 2f9671d90..41979798e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java @@ -60,6 +60,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator { double minPhaseQuality = 10.0; public void initialize(VariantEvalWalker walker) { + super.initialize(walker); this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality()); this.samplePrevGenotypes = new SamplePreviousGenotypes(); } @@ -294,14 +295,6 @@ class CompEvalGenotypes { public Genotype getEvalGenotype() { return evalGt; } - - public void setCompGenotype(Genotype compGt) { - this.compGt = compGt; - } - - public void setEvalGenotype(Genotype evalGt) { - this.evalGt = evalGt; - } } class SamplePreviousGenotypes { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index 7f3bf6290..db2bf61c6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.PrintStream; import java.util.Map; import java.util.Set; @@ -60,17 +59,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator { @DataPoint(description = "Number of mendelian violations found", format = "%d") long nViolations; - - /*@DataPoint(description = "number of child hom ref calls where the parent was hom variant", format = "%d") - long KidHomRef_ParentHomVar; - @DataPoint(description = "number of child het calls where the parent was hom ref", format = "%d") - long KidHet_ParentsHomRef; - @DataPoint(description = "number of child het calls where the parent was hom variant", format = "%d") - long KidHet_ParentsHomVar; - @DataPoint(description = "number of child hom variant calls where the parent was hom ref", format = "%d") - long KidHomVar_ParentHomRef; - */ - @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR", format = "%d") long mvRefRef_Var; @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET", format = "%d") @@ -88,12 +76,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator { @DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET", format = "%d") long mvVarVar_Het; - - /*@DataPoint(description ="Number of inherited var alleles from het parents", format = "%d") - long nInheritedVar; - @DataPoint(description ="Number of inherited ref alleles from het parents", format = "%d") - long nInheritedRef;*/ - @DataPoint(description="Number of HomRef/HomRef/HomRef trios", format = "%d") long HomRefHomRef_HomRef; @DataPoint(description="Number of Het/Het/Het trios", format = "%d") @@ -120,18 +102,15 @@ public class MendelianViolationEvaluator extends VariantEvaluator { long HomVarHet_inheritedVar; MendelianViolation mv; - PrintStream mvFile; Map> families; public void initialize(VariantEvalWalker walker) { - //Changed by Laurent Francioli - 2011-06-07 - //mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold()); + super.initialize(walker); mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false); families = walker.getSampleDB().getFamilies(); } public boolean enabled() { - //return getVEWalker().FAMILY_STRUCTURE != null; return true; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java index 1c34be4a1..90c2def0b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java @@ -28,11 +28,12 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @Analysis(description = "Evaluation summary for multi-allelic variants") public class MultiallelicSummary extends VariantEvaluator implements StandardEval { @@ -90,10 +91,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva @Override public boolean enabled() { return true; } @Override public int getComparisonOrder() { return 2; } - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); - } - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( eval == null || eval.isMonomorphicInSamples() ) return null; @@ -152,6 +149,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva } public void finalizeEvaluation() { + nProcessedLoci = getWalker().getnProcessedLoci(); processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci; variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs; processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java index 7e5cf37ff..d5cf685de 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java @@ -4,14 +4,18 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Collection; - public abstract class VariantEvaluator { - public void initialize(VariantEvalWalker walker) {} + private VariantEvalWalker walker; + + public void initialize(VariantEvalWalker walker) { + this.walker = walker; + } + + public VariantEvalWalker getWalker() { + return walker; + } public abstract boolean enabled(); @@ -19,9 +23,8 @@ public abstract class VariantEvaluator { public abstract int getComparisonOrder(); // called at all sites, regardless of eval context itself; useful for counting processed bases - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - - } + // No longer available. The processed bp is kept in VEW itself for performance reasons + // public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return null; @@ -45,17 +48,13 @@ public abstract class VariantEvaluator { return ((double)num) / (Math.max(denom, 1)); } - public boolean stateIsApplicable(StateKey stateKey) { - return true; - } - /** * Returns true if the variant in vc was a singleton in the original input evaluation * set, regardless of variant context subsetting that has occurred. - * @param eval + * @param eval the VariantContext being assessed for this previous status as a singleton * @return true if eval was originally a singleton site */ - protected static final boolean variantWasSingleton(final VariantContext eval) { + protected static boolean variantWasSingleton(final VariantContext eval) { return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false); } @@ -66,7 +65,7 @@ public abstract class VariantEvaluator { * @param all number of all variants * @return a String novelty rate, or NA if all == 0 */ - protected static final String formattedNoveltyRate(final int known, final int all) { + protected static String formattedNoveltyRate(final int known, final int all) { return formattedPercent(all - known, all); } @@ -77,7 +76,7 @@ public abstract class VariantEvaluator { * @param total count of all objects, including x * @return a String percent rate, or NA if total == 0 */ - protected static final String formattedPercent(final int x, final int total) { + protected static String formattedPercent(final int x, final int total) { return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total)); } @@ -88,7 +87,7 @@ public abstract class VariantEvaluator { * @param denom number of observations in the denumerator * @return a String formatted ratio, or NA if all == 0 */ - protected static final String formattedRatio(final int num, final int denom) { + protected static String formattedRatio(final int num, final int denom) { return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom)); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java index 31f9a4f78..64161ac34 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java @@ -49,7 +49,6 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { /** Indels with size greater than this value are tallied in the CNV column */ private final static int MAX_INDEL_LENGTH = 50; private final static double MIN_CNV_OVERLAP = 0.5; - private VariantEvalWalker walker; public enum Type { SNP, INDEL, CNV @@ -152,7 +151,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { public void initialize(VariantEvalWalker walker) { - this.walker = walker; + super.initialize(walker); nSamples = walker.getSampleNamesForEvaluation().size(); countsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation()); @@ -176,11 +175,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { return 2; // we only need to see each eval track } - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); - } - - private final Type getType(VariantContext vc) { + private Type getType(VariantContext vc) { switch (vc.getType()) { case SNP: return Type.SNP; @@ -196,9 +191,9 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { } } - private final boolean overlapsKnownCNV(VariantContext cnv) { + private boolean overlapsKnownCNV(VariantContext cnv) { if ( knownCNVs != null ) { - final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true); + final GenomeLoc loc = getWalker().getGenomeLocParser().createGenomeLoc(cnv, true); IntervalTree intervalTree = knownCNVs.get(loc.getContig()); final Iterator> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop()); @@ -252,13 +247,14 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { return null; // we don't capture any interesting sites } - private final String noveltyRate(Type type) { + private String noveltyRate(Type type) { final int all = allVariantCounts.all(type); final int known = knownVariantCounts.all(type); return formattedNoveltyRate(known, all); } public void finalizeEvaluation() { + nProcessedLoci = getWalker().getnProcessedLoci(); nSNPs = allVariantCounts.all(Type.SNP); nIndels = allVariantCounts.all(Type.INDEL); nSVs = allVariantCounts.all(Type.CNV); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java index f9d8e437b..09f2c0168 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java @@ -23,9 +23,7 @@ public class NewEvaluationContext extends HashMap { final VariantEvaluator eval = c.newInstance(); eval.initialize(walker); - if (eval.stateIsApplicable(stateKey)) { - evaluationInstances.put(c.getSimpleName(), eval); - } + evaluationInstances.put(c.getSimpleName(), eval); } catch (InstantiationException e) { throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'"); } catch (IllegalAccessException e) { @@ -40,8 +38,6 @@ public class NewEvaluationContext extends HashMap { public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) { for ( final VariantEvaluator evaluation : evaluationInstances.values() ) { - // we always call update0 in case the evaluation tracks things like number of bases covered - // the other updateN methods don't see a null context if ( tracker == null ) continue; @@ -65,10 +61,4 @@ public class NewEvaluationContext extends HashMap { } } } - - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - for ( final VariantEvaluator evaluation : evaluationInstances.values() ) { - evaluation.update0(tracker, ref, context); - } - } }