Removing update0 support in VariantEval

-- Now the only use for update0, calculating the number of processed loci, is centrally tracked in the walker itself not the evaluations.
-- This allows us to avoid calling update0 are every genomic base in 100ks of evaluates when there are a lot of stratifications.
-- No need to modify the integration tests, this optimization doesn't change the result of the calculation
This commit is contained in:
Mark DePristo 2012-03-23 21:02:21 -04:00
parent 0509d316d9
commit b063bcd38d
8 changed files with 42 additions and 83 deletions

View File

@ -214,6 +214,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// Public constants // Public constants
private static String ALL_SAMPLE_NAME = "all"; private static String ALL_SAMPLE_NAME = "all";
// the number of processed bp for this walker
long nProcessedLoci = 0;
// Utility class // Utility class
private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this); private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this);
@ -326,10 +329,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
*/ */
@Override @Override
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( final NewEvaluationContext nec : evaluationContexts.values() ) { // we track the processed bp and expose this for modules instead of wasting CPU power on calculating
synchronized (nec) { // the same thing over and over in evals that want the processed bp
nec.update0(tracker, ref, context); synchronized (this) {
} nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
} }
if (tracker != null) { if (tracker != null) {
@ -455,7 +458,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
if ( lenientMatch == null ) lenientMatch = comp; if ( lenientMatch == null ) lenientMatch = comp;
break; break;
case NO_MATCH: case NO_MATCH:
; // do nothing
} }
} }
@ -581,6 +584,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
public Set<SortableJexlVCMatchExp> getJexlExpressions() { return jexlExpressions; } public Set<SortableJexlVCMatchExp> getJexlExpressions() { return jexlExpressions; }
public long getnProcessedLoci() {
return nProcessedLoci;
}
public Set<String> getContigNames() { public Set<String> getContigNames() {
final TreeSet<String> contigs = new TreeSet<String>(); final TreeSet<String> contigs = new TreeSet<String>();
for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) { for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) {

View File

@ -89,10 +89,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
return 1; // we only need to see each eval track return 1; // we only need to see each eval track
} }
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nCalledLoci++; nCalledLoci++;
@ -192,6 +188,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
} }
public void finalizeEvaluation() { public void finalizeEvaluation() {
nProcessedLoci = getWalker().getnProcessedLoci();
variantRate = perLocusRate(nVariantLoci); variantRate = perLocusRate(nVariantLoci);
variantRatePerBp = perLocusRInverseRate(nVariantLoci); variantRatePerBp = perLocusRInverseRate(nVariantLoci);
heterozygosity = perLocusRate(nHets); heterozygosity = perLocusRate(nHets);

View File

@ -60,6 +60,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
double minPhaseQuality = 10.0; double minPhaseQuality = 10.0;
public void initialize(VariantEvalWalker walker) { public void initialize(VariantEvalWalker walker) {
super.initialize(walker);
this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality()); this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality());
this.samplePrevGenotypes = new SamplePreviousGenotypes(); this.samplePrevGenotypes = new SamplePreviousGenotypes();
} }
@ -294,14 +295,6 @@ class CompEvalGenotypes {
public Genotype getEvalGenotype() { public Genotype getEvalGenotype() {
return evalGt; return evalGt;
} }
public void setCompGenotype(Genotype compGt) {
this.compGt = compGt;
}
public void setEvalGenotype(Genotype evalGt) {
this.evalGt = evalGt;
}
} }
class SamplePreviousGenotypes { class SamplePreviousGenotypes {

View File

@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -60,17 +59,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
@DataPoint(description = "Number of mendelian violations found", format = "%d") @DataPoint(description = "Number of mendelian violations found", format = "%d")
long nViolations; long nViolations;
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant", format = "%d")
long KidHomRef_ParentHomVar;
@DataPoint(description = "number of child het calls where the parent was hom ref", format = "%d")
long KidHet_ParentsHomRef;
@DataPoint(description = "number of child het calls where the parent was hom variant", format = "%d")
long KidHet_ParentsHomVar;
@DataPoint(description = "number of child hom variant calls where the parent was hom ref", format = "%d")
long KidHomVar_ParentHomRef;
*/
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR", format = "%d") @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR", format = "%d")
long mvRefRef_Var; long mvRefRef_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET", format = "%d") @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET", format = "%d")
@ -88,12 +76,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET", format = "%d") @DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET", format = "%d")
long mvVarVar_Het; long mvVarVar_Het;
/*@DataPoint(description ="Number of inherited var alleles from het parents", format = "%d")
long nInheritedVar;
@DataPoint(description ="Number of inherited ref alleles from het parents", format = "%d")
long nInheritedRef;*/
@DataPoint(description="Number of HomRef/HomRef/HomRef trios", format = "%d") @DataPoint(description="Number of HomRef/HomRef/HomRef trios", format = "%d")
long HomRefHomRef_HomRef; long HomRefHomRef_HomRef;
@DataPoint(description="Number of Het/Het/Het trios", format = "%d") @DataPoint(description="Number of Het/Het/Het trios", format = "%d")
@ -120,18 +102,15 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
long HomVarHet_inheritedVar; long HomVarHet_inheritedVar;
MendelianViolation mv; MendelianViolation mv;
PrintStream mvFile;
Map<String,Set<Sample>> families; Map<String,Set<Sample>> families;
public void initialize(VariantEvalWalker walker) { public void initialize(VariantEvalWalker walker) {
//Changed by Laurent Francioli - 2011-06-07 super.initialize(walker);
//mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false); mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false);
families = walker.getSampleDB().getFamilies(); families = walker.getSampleDB().getFamilies();
} }
public boolean enabled() { public boolean enabled() {
//return getVEWalker().FAMILY_STRUCTURE != null;
return true; return true;
} }

View File

@ -28,11 +28,12 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@Analysis(description = "Evaluation summary for multi-allelic variants") @Analysis(description = "Evaluation summary for multi-allelic variants")
public class MultiallelicSummary extends VariantEvaluator implements StandardEval { public class MultiallelicSummary extends VariantEvaluator implements StandardEval {
@ -90,10 +91,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
@Override public boolean enabled() { return true; } @Override public boolean enabled() { return true; }
@Override public int getComparisonOrder() { return 2; } @Override public int getComparisonOrder() { return 2; }
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( eval == null || eval.isMonomorphicInSamples() ) if ( eval == null || eval.isMonomorphicInSamples() )
return null; return null;
@ -152,6 +149,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
} }
public void finalizeEvaluation() { public void finalizeEvaluation() {
nProcessedLoci = getWalker().getnProcessedLoci();
processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci; processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci;
variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs; variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs;
processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci; processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci;

View File

@ -4,14 +4,18 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Collection;
public abstract class VariantEvaluator { public abstract class VariantEvaluator {
public void initialize(VariantEvalWalker walker) {} private VariantEvalWalker walker;
public void initialize(VariantEvalWalker walker) {
this.walker = walker;
}
public VariantEvalWalker getWalker() {
return walker;
}
public abstract boolean enabled(); public abstract boolean enabled();
@ -19,9 +23,8 @@ public abstract class VariantEvaluator {
public abstract int getComparisonOrder(); public abstract int getComparisonOrder();
// called at all sites, regardless of eval context itself; useful for counting processed bases // called at all sites, regardless of eval context itself; useful for counting processed bases
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { // No longer available. The processed bp is kept in VEW itself for performance reasons
// public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
}
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return null; return null;
@ -45,17 +48,13 @@ public abstract class VariantEvaluator {
return ((double)num) / (Math.max(denom, 1)); return ((double)num) / (Math.max(denom, 1));
} }
public boolean stateIsApplicable(StateKey stateKey) {
return true;
}
/** /**
* Returns true if the variant in vc was a singleton in the original input evaluation * Returns true if the variant in vc was a singleton in the original input evaluation
* set, regardless of variant context subsetting that has occurred. * set, regardless of variant context subsetting that has occurred.
* @param eval * @param eval the VariantContext being assessed for this previous status as a singleton
* @return true if eval was originally a singleton site * @return true if eval was originally a singleton site
*/ */
protected static final boolean variantWasSingleton(final VariantContext eval) { protected static boolean variantWasSingleton(final VariantContext eval) {
return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false); return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false);
} }
@ -66,7 +65,7 @@ public abstract class VariantEvaluator {
* @param all number of all variants * @param all number of all variants
* @return a String novelty rate, or NA if all == 0 * @return a String novelty rate, or NA if all == 0
*/ */
protected static final String formattedNoveltyRate(final int known, final int all) { protected static String formattedNoveltyRate(final int known, final int all) {
return formattedPercent(all - known, all); return formattedPercent(all - known, all);
} }
@ -77,7 +76,7 @@ public abstract class VariantEvaluator {
* @param total count of all objects, including x * @param total count of all objects, including x
* @return a String percent rate, or NA if total == 0 * @return a String percent rate, or NA if total == 0
*/ */
protected static final String formattedPercent(final int x, final int total) { protected static String formattedPercent(final int x, final int total) {
return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total)); return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total));
} }
@ -88,7 +87,7 @@ public abstract class VariantEvaluator {
* @param denom number of observations in the denumerator * @param denom number of observations in the denumerator
* @return a String formatted ratio, or NA if all == 0 * @return a String formatted ratio, or NA if all == 0
*/ */
protected static final String formattedRatio(final int num, final int denom) { protected static String formattedRatio(final int num, final int denom) {
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom)); return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
} }
} }

View File

@ -49,7 +49,6 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
/** Indels with size greater than this value are tallied in the CNV column */ /** Indels with size greater than this value are tallied in the CNV column */
private final static int MAX_INDEL_LENGTH = 50; private final static int MAX_INDEL_LENGTH = 50;
private final static double MIN_CNV_OVERLAP = 0.5; private final static double MIN_CNV_OVERLAP = 0.5;
private VariantEvalWalker walker;
public enum Type { public enum Type {
SNP, INDEL, CNV SNP, INDEL, CNV
@ -152,7 +151,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
public void initialize(VariantEvalWalker walker) { public void initialize(VariantEvalWalker walker) {
this.walker = walker; super.initialize(walker);
nSamples = walker.getSampleNamesForEvaluation().size(); nSamples = walker.getSampleNamesForEvaluation().size();
countsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation()); countsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
@ -176,11 +175,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
return 2; // we only need to see each eval track return 2; // we only need to see each eval track
} }
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { private Type getType(VariantContext vc) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
private final Type getType(VariantContext vc) {
switch (vc.getType()) { switch (vc.getType()) {
case SNP: case SNP:
return Type.SNP; return Type.SNP;
@ -196,9 +191,9 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
} }
} }
private final boolean overlapsKnownCNV(VariantContext cnv) { private boolean overlapsKnownCNV(VariantContext cnv) {
if ( knownCNVs != null ) { if ( knownCNVs != null ) {
final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true); final GenomeLoc loc = getWalker().getGenomeLocParser().createGenomeLoc(cnv, true);
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig()); IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop()); final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
@ -252,13 +247,14 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
return null; // we don't capture any interesting sites return null; // we don't capture any interesting sites
} }
private final String noveltyRate(Type type) { private String noveltyRate(Type type) {
final int all = allVariantCounts.all(type); final int all = allVariantCounts.all(type);
final int known = knownVariantCounts.all(type); final int known = knownVariantCounts.all(type);
return formattedNoveltyRate(known, all); return formattedNoveltyRate(known, all);
} }
public void finalizeEvaluation() { public void finalizeEvaluation() {
nProcessedLoci = getWalker().getnProcessedLoci();
nSNPs = allVariantCounts.all(Type.SNP); nSNPs = allVariantCounts.all(Type.SNP);
nIndels = allVariantCounts.all(Type.INDEL); nIndels = allVariantCounts.all(Type.INDEL);
nSVs = allVariantCounts.all(Type.CNV); nSVs = allVariantCounts.all(Type.CNV);

View File

@ -23,9 +23,7 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
final VariantEvaluator eval = c.newInstance(); final VariantEvaluator eval = c.newInstance();
eval.initialize(walker); eval.initialize(walker);
if (eval.stateIsApplicable(stateKey)) { evaluationInstances.put(c.getSimpleName(), eval);
evaluationInstances.put(c.getSimpleName(), eval);
}
} catch (InstantiationException e) { } catch (InstantiationException e) {
throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'"); throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'");
} catch (IllegalAccessException e) { } catch (IllegalAccessException e) {
@ -40,8 +38,6 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) { public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
for ( final VariantEvaluator evaluation : evaluationInstances.values() ) { for ( final VariantEvaluator evaluation : evaluationInstances.values() ) {
// we always call update0 in case the evaluation tracks things like number of bases covered
// the other updateN methods don't see a null context // the other updateN methods don't see a null context
if ( tracker == null ) if ( tracker == null )
continue; continue;
@ -65,10 +61,4 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
} }
} }
} }
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( final VariantEvaluator evaluation : evaluationInstances.values() ) {
evaluation.update0(tracker, ref, context);
}
}
} }