Removing update0 support in VariantEval
-- Now the only use for update0, calculating the number of processed loci, is centrally tracked in the walker itself not the evaluations. -- This allows us to avoid calling update0 are every genomic base in 100ks of evaluates when there are a lot of stratifications. -- No need to modify the integration tests, this optimization doesn't change the result of the calculation
This commit is contained in:
parent
0509d316d9
commit
b063bcd38d
|
|
@ -214,6 +214,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
// Public constants
|
||||
private static String ALL_SAMPLE_NAME = "all";
|
||||
|
||||
// the number of processed bp for this walker
|
||||
long nProcessedLoci = 0;
|
||||
|
||||
// Utility class
|
||||
private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this);
|
||||
|
||||
|
|
@ -326,10 +329,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
*/
|
||||
@Override
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
for ( final NewEvaluationContext nec : evaluationContexts.values() ) {
|
||||
synchronized (nec) {
|
||||
nec.update0(tracker, ref, context);
|
||||
}
|
||||
// we track the processed bp and expose this for modules instead of wasting CPU power on calculating
|
||||
// the same thing over and over in evals that want the processed bp
|
||||
synchronized (this) {
|
||||
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
|
||||
}
|
||||
|
||||
if (tracker != null) {
|
||||
|
|
@ -455,7 +458,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
if ( lenientMatch == null ) lenientMatch = comp;
|
||||
break;
|
||||
case NO_MATCH:
|
||||
;
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -581,6 +584,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
|
||||
public Set<SortableJexlVCMatchExp> getJexlExpressions() { return jexlExpressions; }
|
||||
|
||||
public long getnProcessedLoci() {
|
||||
return nProcessedLoci;
|
||||
}
|
||||
|
||||
public Set<String> getContigNames() {
|
||||
final TreeSet<String> contigs = new TreeSet<String>();
|
||||
for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) {
|
||||
|
|
|
|||
|
|
@ -89,10 +89,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
return 1; // we only need to see each eval track
|
||||
}
|
||||
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
|
||||
}
|
||||
|
||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
nCalledLoci++;
|
||||
|
||||
|
|
@ -192,6 +188,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
nProcessedLoci = getWalker().getnProcessedLoci();
|
||||
variantRate = perLocusRate(nVariantLoci);
|
||||
variantRatePerBp = perLocusRInverseRate(nVariantLoci);
|
||||
heterozygosity = perLocusRate(nHets);
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
|
|||
double minPhaseQuality = 10.0;
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
super.initialize(walker);
|
||||
this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality());
|
||||
this.samplePrevGenotypes = new SamplePreviousGenotypes();
|
||||
}
|
||||
|
|
@ -294,14 +295,6 @@ class CompEvalGenotypes {
|
|||
public Genotype getEvalGenotype() {
|
||||
return evalGt;
|
||||
}
|
||||
|
||||
public void setCompGenotype(Genotype compGt) {
|
||||
this.compGt = compGt;
|
||||
}
|
||||
|
||||
public void setEvalGenotype(Genotype evalGt) {
|
||||
this.evalGt = evalGt;
|
||||
}
|
||||
}
|
||||
|
||||
class SamplePreviousGenotypes {
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
|||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
|
|
@ -60,17 +59,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
@DataPoint(description = "Number of mendelian violations found", format = "%d")
|
||||
long nViolations;
|
||||
|
||||
|
||||
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant", format = "%d")
|
||||
long KidHomRef_ParentHomVar;
|
||||
@DataPoint(description = "number of child het calls where the parent was hom ref", format = "%d")
|
||||
long KidHet_ParentsHomRef;
|
||||
@DataPoint(description = "number of child het calls where the parent was hom variant", format = "%d")
|
||||
long KidHet_ParentsHomVar;
|
||||
@DataPoint(description = "number of child hom variant calls where the parent was hom ref", format = "%d")
|
||||
long KidHomVar_ParentHomRef;
|
||||
*/
|
||||
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR", format = "%d")
|
||||
long mvRefRef_Var;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET", format = "%d")
|
||||
|
|
@ -88,12 +76,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET", format = "%d")
|
||||
long mvVarVar_Het;
|
||||
|
||||
|
||||
/*@DataPoint(description ="Number of inherited var alleles from het parents", format = "%d")
|
||||
long nInheritedVar;
|
||||
@DataPoint(description ="Number of inherited ref alleles from het parents", format = "%d")
|
||||
long nInheritedRef;*/
|
||||
|
||||
@DataPoint(description="Number of HomRef/HomRef/HomRef trios", format = "%d")
|
||||
long HomRefHomRef_HomRef;
|
||||
@DataPoint(description="Number of Het/Het/Het trios", format = "%d")
|
||||
|
|
@ -120,18 +102,15 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
long HomVarHet_inheritedVar;
|
||||
|
||||
MendelianViolation mv;
|
||||
PrintStream mvFile;
|
||||
Map<String,Set<Sample>> families;
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
//Changed by Laurent Francioli - 2011-06-07
|
||||
//mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
|
||||
super.initialize(walker);
|
||||
mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false);
|
||||
families = walker.getSampleDB().getFamilies();
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
//return getVEWalker().FAMILY_STRUCTURE != null;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,11 +28,12 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
@Analysis(description = "Evaluation summary for multi-allelic variants")
|
||||
public class MultiallelicSummary extends VariantEvaluator implements StandardEval {
|
||||
|
|
@ -90,10 +91,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
|
|||
@Override public boolean enabled() { return true; }
|
||||
@Override public int getComparisonOrder() { return 2; }
|
||||
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
|
||||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( eval == null || eval.isMonomorphicInSamples() )
|
||||
return null;
|
||||
|
|
@ -152,6 +149,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
|
|||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
nProcessedLoci = getWalker().getnProcessedLoci();
|
||||
processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci;
|
||||
variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs;
|
||||
processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci;
|
||||
|
|
|
|||
|
|
@ -4,14 +4,18 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
public abstract class VariantEvaluator {
|
||||
public void initialize(VariantEvalWalker walker) {}
|
||||
private VariantEvalWalker walker;
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
this.walker = walker;
|
||||
}
|
||||
|
||||
public VariantEvalWalker getWalker() {
|
||||
return walker;
|
||||
}
|
||||
|
||||
public abstract boolean enabled();
|
||||
|
||||
|
|
@ -19,9 +23,8 @@ public abstract class VariantEvaluator {
|
|||
public abstract int getComparisonOrder();
|
||||
|
||||
// called at all sites, regardless of eval context itself; useful for counting processed bases
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
}
|
||||
// No longer available. The processed bp is kept in VEW itself for performance reasons
|
||||
// public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return null;
|
||||
|
|
@ -45,17 +48,13 @@ public abstract class VariantEvaluator {
|
|||
return ((double)num) / (Math.max(denom, 1));
|
||||
}
|
||||
|
||||
public boolean stateIsApplicable(StateKey stateKey) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the variant in vc was a singleton in the original input evaluation
|
||||
* set, regardless of variant context subsetting that has occurred.
|
||||
* @param eval
|
||||
* @param eval the VariantContext being assessed for this previous status as a singleton
|
||||
* @return true if eval was originally a singleton site
|
||||
*/
|
||||
protected static final boolean variantWasSingleton(final VariantContext eval) {
|
||||
protected static boolean variantWasSingleton(final VariantContext eval) {
|
||||
return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +65,7 @@ public abstract class VariantEvaluator {
|
|||
* @param all number of all variants
|
||||
* @return a String novelty rate, or NA if all == 0
|
||||
*/
|
||||
protected static final String formattedNoveltyRate(final int known, final int all) {
|
||||
protected static String formattedNoveltyRate(final int known, final int all) {
|
||||
return formattedPercent(all - known, all);
|
||||
}
|
||||
|
||||
|
|
@ -77,7 +76,7 @@ public abstract class VariantEvaluator {
|
|||
* @param total count of all objects, including x
|
||||
* @return a String percent rate, or NA if total == 0
|
||||
*/
|
||||
protected static final String formattedPercent(final int x, final int total) {
|
||||
protected static String formattedPercent(final int x, final int total) {
|
||||
return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total));
|
||||
}
|
||||
|
||||
|
|
@ -88,7 +87,7 @@ public abstract class VariantEvaluator {
|
|||
* @param denom number of observations in the denumerator
|
||||
* @return a String formatted ratio, or NA if all == 0
|
||||
*/
|
||||
protected static final String formattedRatio(final int num, final int denom) {
|
||||
protected static String formattedRatio(final int num, final int denom) {
|
||||
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,7 +49,6 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
/** Indels with size greater than this value are tallied in the CNV column */
|
||||
private final static int MAX_INDEL_LENGTH = 50;
|
||||
private final static double MIN_CNV_OVERLAP = 0.5;
|
||||
private VariantEvalWalker walker;
|
||||
|
||||
public enum Type {
|
||||
SNP, INDEL, CNV
|
||||
|
|
@ -152,7 +151,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
this.walker = walker;
|
||||
super.initialize(walker);
|
||||
|
||||
nSamples = walker.getSampleNamesForEvaluation().size();
|
||||
countsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
||||
|
|
@ -176,11 +175,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
return 2; // we only need to see each eval track
|
||||
}
|
||||
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
|
||||
}
|
||||
|
||||
private final Type getType(VariantContext vc) {
|
||||
private Type getType(VariantContext vc) {
|
||||
switch (vc.getType()) {
|
||||
case SNP:
|
||||
return Type.SNP;
|
||||
|
|
@ -196,9 +191,9 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
}
|
||||
|
||||
private final boolean overlapsKnownCNV(VariantContext cnv) {
|
||||
private boolean overlapsKnownCNV(VariantContext cnv) {
|
||||
if ( knownCNVs != null ) {
|
||||
final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
|
||||
final GenomeLoc loc = getWalker().getGenomeLocParser().createGenomeLoc(cnv, true);
|
||||
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
|
||||
|
||||
final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
|
||||
|
|
@ -252,13 +247,14 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
private final String noveltyRate(Type type) {
|
||||
private String noveltyRate(Type type) {
|
||||
final int all = allVariantCounts.all(type);
|
||||
final int known = knownVariantCounts.all(type);
|
||||
return formattedNoveltyRate(known, all);
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
nProcessedLoci = getWalker().getnProcessedLoci();
|
||||
nSNPs = allVariantCounts.all(Type.SNP);
|
||||
nIndels = allVariantCounts.all(Type.INDEL);
|
||||
nSVs = allVariantCounts.all(Type.CNV);
|
||||
|
|
|
|||
|
|
@ -23,9 +23,7 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
|
|||
final VariantEvaluator eval = c.newInstance();
|
||||
eval.initialize(walker);
|
||||
|
||||
if (eval.stateIsApplicable(stateKey)) {
|
||||
evaluationInstances.put(c.getSimpleName(), eval);
|
||||
}
|
||||
evaluationInstances.put(c.getSimpleName(), eval);
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'");
|
||||
} catch (IllegalAccessException e) {
|
||||
|
|
@ -40,8 +38,6 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
|
|||
|
||||
public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
|
||||
for ( final VariantEvaluator evaluation : evaluationInstances.values() ) {
|
||||
// we always call update0 in case the evaluation tracks things like number of bases covered
|
||||
|
||||
// the other updateN methods don't see a null context
|
||||
if ( tracker == null )
|
||||
continue;
|
||||
|
|
@ -65,10 +61,4 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
for ( final VariantEvaluator evaluation : evaluationInstances.values() ) {
|
||||
evaluation.update0(tracker, ref, context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue