Removing update0 support in VariantEval

-- Now the only use for update0, calculating the number of processed loci, is centrally tracked in the walker itself not the evaluations.
-- This allows us to avoid calling update0 are every genomic base in 100ks of evaluates when there are a lot of stratifications.
-- No need to modify the integration tests, this optimization doesn't change the result of the calculation
This commit is contained in:
Mark DePristo 2012-03-23 21:02:21 -04:00
parent 0509d316d9
commit b063bcd38d
8 changed files with 42 additions and 83 deletions

View File

@ -214,6 +214,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// Public constants
private static String ALL_SAMPLE_NAME = "all";
// the number of processed bp for this walker
long nProcessedLoci = 0;
// Utility class
private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this);
@ -326,10 +329,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
*/
@Override
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( final NewEvaluationContext nec : evaluationContexts.values() ) {
synchronized (nec) {
nec.update0(tracker, ref, context);
}
// we track the processed bp and expose this for modules instead of wasting CPU power on calculating
// the same thing over and over in evals that want the processed bp
synchronized (this) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
if (tracker != null) {
@ -455,7 +458,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
if ( lenientMatch == null ) lenientMatch = comp;
break;
case NO_MATCH:
;
// do nothing
}
}
@ -581,6 +584,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
public Set<SortableJexlVCMatchExp> getJexlExpressions() { return jexlExpressions; }
public long getnProcessedLoci() {
return nProcessedLoci;
}
public Set<String> getContigNames() {
final TreeSet<String> contigs = new TreeSet<String>();
for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) {

View File

@ -89,10 +89,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
return 1; // we only need to see each eval track
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nCalledLoci++;
@ -192,6 +188,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
}
public void finalizeEvaluation() {
nProcessedLoci = getWalker().getnProcessedLoci();
variantRate = perLocusRate(nVariantLoci);
variantRatePerBp = perLocusRInverseRate(nVariantLoci);
heterozygosity = perLocusRate(nHets);

View File

@ -60,6 +60,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
double minPhaseQuality = 10.0;
public void initialize(VariantEvalWalker walker) {
super.initialize(walker);
this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality());
this.samplePrevGenotypes = new SamplePreviousGenotypes();
}
@ -294,14 +295,6 @@ class CompEvalGenotypes {
public Genotype getEvalGenotype() {
return evalGt;
}
public void setCompGenotype(Genotype compGt) {
this.compGt = compGt;
}
public void setEvalGenotype(Genotype evalGt) {
this.evalGt = evalGt;
}
}
class SamplePreviousGenotypes {

View File

@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.Map;
import java.util.Set;
@ -60,17 +59,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
@DataPoint(description = "Number of mendelian violations found", format = "%d")
long nViolations;
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant", format = "%d")
long KidHomRef_ParentHomVar;
@DataPoint(description = "number of child het calls where the parent was hom ref", format = "%d")
long KidHet_ParentsHomRef;
@DataPoint(description = "number of child het calls where the parent was hom variant", format = "%d")
long KidHet_ParentsHomVar;
@DataPoint(description = "number of child hom variant calls where the parent was hom ref", format = "%d")
long KidHomVar_ParentHomRef;
*/
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR", format = "%d")
long mvRefRef_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET", format = "%d")
@ -88,12 +76,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET", format = "%d")
long mvVarVar_Het;
/*@DataPoint(description ="Number of inherited var alleles from het parents", format = "%d")
long nInheritedVar;
@DataPoint(description ="Number of inherited ref alleles from het parents", format = "%d")
long nInheritedRef;*/
@DataPoint(description="Number of HomRef/HomRef/HomRef trios", format = "%d")
long HomRefHomRef_HomRef;
@DataPoint(description="Number of Het/Het/Het trios", format = "%d")
@ -120,18 +102,15 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
long HomVarHet_inheritedVar;
MendelianViolation mv;
PrintStream mvFile;
Map<String,Set<Sample>> families;
public void initialize(VariantEvalWalker walker) {
//Changed by Laurent Francioli - 2011-06-07
//mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
super.initialize(walker);
mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false);
families = walker.getSampleDB().getFamilies();
}
public boolean enabled() {
//return getVEWalker().FAMILY_STRUCTURE != null;
return true;
}

View File

@ -28,11 +28,12 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@Analysis(description = "Evaluation summary for multi-allelic variants")
public class MultiallelicSummary extends VariantEvaluator implements StandardEval {
@ -90,10 +91,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
@Override public boolean enabled() { return true; }
@Override public int getComparisonOrder() { return 2; }
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( eval == null || eval.isMonomorphicInSamples() )
return null;
@ -152,6 +149,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
}
public void finalizeEvaluation() {
nProcessedLoci = getWalker().getnProcessedLoci();
processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci;
variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs;
processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci;

View File

@ -4,14 +4,18 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Collection;
public abstract class VariantEvaluator {
public void initialize(VariantEvalWalker walker) {}
private VariantEvalWalker walker;
public void initialize(VariantEvalWalker walker) {
this.walker = walker;
}
public VariantEvalWalker getWalker() {
return walker;
}
public abstract boolean enabled();
@ -19,9 +23,8 @@ public abstract class VariantEvaluator {
public abstract int getComparisonOrder();
// called at all sites, regardless of eval context itself; useful for counting processed bases
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
}
// No longer available. The processed bp is kept in VEW itself for performance reasons
// public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return null;
@ -45,17 +48,13 @@ public abstract class VariantEvaluator {
return ((double)num) / (Math.max(denom, 1));
}
public boolean stateIsApplicable(StateKey stateKey) {
return true;
}
/**
* Returns true if the variant in vc was a singleton in the original input evaluation
* set, regardless of variant context subsetting that has occurred.
* @param eval
* @param eval the VariantContext being assessed for this previous status as a singleton
* @return true if eval was originally a singleton site
*/
protected static final boolean variantWasSingleton(final VariantContext eval) {
protected static boolean variantWasSingleton(final VariantContext eval) {
return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false);
}
@ -66,7 +65,7 @@ public abstract class VariantEvaluator {
* @param all number of all variants
* @return a String novelty rate, or NA if all == 0
*/
protected static final String formattedNoveltyRate(final int known, final int all) {
protected static String formattedNoveltyRate(final int known, final int all) {
return formattedPercent(all - known, all);
}
@ -77,7 +76,7 @@ public abstract class VariantEvaluator {
* @param total count of all objects, including x
* @return a String percent rate, or NA if total == 0
*/
protected static final String formattedPercent(final int x, final int total) {
protected static String formattedPercent(final int x, final int total) {
return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total));
}
@ -88,7 +87,7 @@ public abstract class VariantEvaluator {
* @param denom number of observations in the denumerator
* @return a String formatted ratio, or NA if all == 0
*/
protected static final String formattedRatio(final int num, final int denom) {
protected static String formattedRatio(final int num, final int denom) {
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
}
}

View File

@ -49,7 +49,6 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
/** Indels with size greater than this value are tallied in the CNV column */
private final static int MAX_INDEL_LENGTH = 50;
private final static double MIN_CNV_OVERLAP = 0.5;
private VariantEvalWalker walker;
public enum Type {
SNP, INDEL, CNV
@ -152,7 +151,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
public void initialize(VariantEvalWalker walker) {
this.walker = walker;
super.initialize(walker);
nSamples = walker.getSampleNamesForEvaluation().size();
countsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
@ -176,11 +175,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
return 2; // we only need to see each eval track
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
private final Type getType(VariantContext vc) {
private Type getType(VariantContext vc) {
switch (vc.getType()) {
case SNP:
return Type.SNP;
@ -196,9 +191,9 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
}
}
private final boolean overlapsKnownCNV(VariantContext cnv) {
private boolean overlapsKnownCNV(VariantContext cnv) {
if ( knownCNVs != null ) {
final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
final GenomeLoc loc = getWalker().getGenomeLocParser().createGenomeLoc(cnv, true);
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
@ -252,13 +247,14 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
return null; // we don't capture any interesting sites
}
private final String noveltyRate(Type type) {
private String noveltyRate(Type type) {
final int all = allVariantCounts.all(type);
final int known = knownVariantCounts.all(type);
return formattedNoveltyRate(known, all);
}
public void finalizeEvaluation() {
nProcessedLoci = getWalker().getnProcessedLoci();
nSNPs = allVariantCounts.all(Type.SNP);
nIndels = allVariantCounts.all(Type.INDEL);
nSVs = allVariantCounts.all(Type.CNV);

View File

@ -23,9 +23,7 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
final VariantEvaluator eval = c.newInstance();
eval.initialize(walker);
if (eval.stateIsApplicable(stateKey)) {
evaluationInstances.put(c.getSimpleName(), eval);
}
evaluationInstances.put(c.getSimpleName(), eval);
} catch (InstantiationException e) {
throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'");
} catch (IllegalAccessException e) {
@ -40,8 +38,6 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
for ( final VariantEvaluator evaluation : evaluationInstances.values() ) {
// we always call update0 in case the evaluation tracks things like number of bases covered
// the other updateN methods don't see a null context
if ( tracker == null )
continue;
@ -65,10 +61,4 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
}
}
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( final VariantEvaluator evaluation : evaluationInstances.values() ) {
evaluation.update0(tracker, ref, context);
}
}
}