From 695cf8367598d3cb312e590b2cb3fb6a0cd65faf Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 18 Oct 2012 07:38:32 -0400 Subject: [PATCH] More docs and contracts for classes in genotyper.afcalc -- Future protection of the output of GeneralPloidyExactAFCalc, which produces in some cases bad likelihoods (positive values) --- .../afcalc/GeneralPloidyExactAFCalc.java | 6 ++-- .../gatk/walkers/genotyper/afcalc/AFCalc.java | 28 +++++++++++++++++-- .../IndependentAllelesDiploidExactAFCalc.java | 14 ++++++++-- .../afcalc/OriginalDiploidExactAFCalc.java | 2 +- .../genotyper/afcalc/StateTracker.java | 12 +++----- 5 files changed, 45 insertions(+), 17 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java index 3916c2549..9c7883ab8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java @@ -447,14 +447,16 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { // update the MLE if necessary final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length); - getStateTracker().updateMLEifNeeded(Double.isInfinite(log10LofK) ? MathUtils.LOG10_P_OF_ZERO : log10LofK, altCounts); + // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY + getStateTracker().updateMLEifNeeded(MathUtils.goodLog10Probability(log10LofK) ? log10LofK : MathUtils.LOG10_P_OF_ZERO, altCounts); // apply the priors over each alternate allele for (final int ACcount : altCounts ) { if ( ACcount > 0 ) log10LofK += log10AlleleFrequencyPriors[ACcount]; } - getStateTracker().updateMAPifNeeded(Double.isInfinite(log10LofK) ? MathUtils.LOG10_P_OF_ZERO : log10LofK, altCounts); + // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY + getStateTracker().updateMAPifNeeded(MathUtils.goodLog10Probability(log10LofK) ? log10LofK : MathUtils.LOG10_P_OF_ZERO, altCounts); return log10LofK; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java index 927fadd94..e3abdeb24 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java @@ -53,6 +53,16 @@ public abstract class AFCalc implements Cloneable { private final StateTracker stateTracker; private ExactCallLogger exactCallLogger = null; + /** + * Create a new AFCalc object capable of calculating the prob. that alleles are + * segregating among nSamples with up to maxAltAlleles for SNPs and maxAltAllelesForIndels + * for indels for samples with ploidy + * + * @param nSamples number of samples, must be > 0 + * @param maxAltAlleles maxAltAlleles for SNPs + * @param maxAltAllelesForIndels for indels + * @param ploidy the ploidy, must be > 0 + */ protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) { if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples); if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles); @@ -65,10 +75,20 @@ public abstract class AFCalc implements Cloneable { this.stateTracker = new StateTracker(Math.max(maxAltAlleles, maxAltAllelesForIndels)); } + /** + * Enable exact call logging to file + * + * @param exactCallsLog the destination file + */ public void enableProcessLog(final File exactCallsLog) { exactCallLogger = new ExactCallLogger(exactCallsLog); } + /** + * Use this logger instead of the default logger + * + * @param logger + */ public void setLogger(Logger logger) { this.logger = logger; } @@ -109,7 +129,7 @@ public abstract class AFCalc implements Cloneable { * @param log10AlleleFrequencyPriors the priors by AC vector * @return a AFCalcResult describing the result of this calculation */ - @Requires("stateTracker.getnEvaluations() > 0") + @Requires("stateTracker.getnEvaluations() >= 0") @Ensures("result != null") protected AFCalcResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors) { stateTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles()); @@ -144,11 +164,13 @@ public abstract class AFCalc implements Cloneable { * @param log10AlleleFrequencyPriors priors * @return a AFCalcResult object describing the results of this calculation */ - // TODO -- add consistent requires among args + @Requires({"vc != null", "log10AlleleFrequencyPriors != null"}) protected abstract AFCalcResult computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors); /** + * Subset VC to the just allelesToUse, updating genotype likelihoods + * * Must be overridden by concrete subclasses * * @param vc variant context with alleles and genotype likelihoods @@ -172,7 +194,7 @@ public abstract class AFCalc implements Cloneable { return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels); } - public StateTracker getStateTracker() { + protected StateTracker getStateTracker() { return stateTracker; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java index c0edee291..2f85a5246 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java @@ -303,7 +303,13 @@ import java.util.*; /** * Take the independent estimates of pNonRef for each alt allele and combine them into a single result * - * TODO -- add more docs + * Given n independent calculations for each of n alternate alleles create a single + * combined AFCalcResult with: + * + * priors for AF == 0 equal to theta^N for the nth least likely allele + * posteriors that reflect the combined chance that any alleles are segregating and corresponding + * likelihoods + * combined MLEs in the order of the alt alleles in vc * * @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently */ @@ -350,8 +356,10 @@ import java.util.*; }; return new MyAFCalcResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(), - MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true), // necessary to ensure all values < 0 - MathUtils.normalizeFromLog10(log10PriorsOfAC, true), // priors incorporate multiple alt alleles, must be normalized + // necessary to ensure all values < 0 + MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true), + // priors incorporate multiple alt alleles, must be normalized + MathUtils.normalizeFromLog10(log10PriorsOfAC, true), log10pNonRefByAllele, sortedResultsWithThetaNPriors); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java index 88f5e06e6..dea38e46c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java @@ -11,7 +11,7 @@ import java.util.Map; /** * Original bi-allelic ~O(N) implementation. Kept here for posterity and reference */ -public class OriginalDiploidExactAFCalc extends DiploidExactAFCalc { +class OriginalDiploidExactAFCalc extends DiploidExactAFCalc { protected OriginalDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) { super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java index 3eb32d35e..301891a99 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java @@ -129,9 +129,7 @@ final class StateTracker { } /** - * Returns the likelihoods summed across all AC values for AC > 0 - * - * @return + * @return the likelihoods summed across all AC values for AC > 0 */ private double getLog10LikelihoodOfAFNotZero(final boolean capAt0) { if ( log10LikelihoodsForAFGt0Sum == null ) { @@ -144,7 +142,7 @@ final class StateTracker { } /** - * @return + * @return the log10 likelihood of AF == 0 */ private double getLog10LikelihoodOfAFzero() { return log10LikelihoodOfAFzero; @@ -157,9 +155,9 @@ final class StateTracker { * For example, that the allelesUsedInGenotyping has been set, that the alleleCountsOfMLE contains meaningful * values, etc. * - * @param log10PriorsByAC + * @param log10PriorsByAC the priors by AC * - * @return + * @return an AFCalcResult summarizing the final results of this calculation */ @Requires("allelesUsedInGenotyping != null") protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) { @@ -167,8 +165,6 @@ final class StateTracker { final double[] log10Likelihoods = new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero(true)}; final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true); - // TODO -- replace with more meaningful computation - // TODO -- refactor this calculation into the ref calculation final Map log10pNonRefByAllele = new HashMap(allelesUsedInGenotyping.size()); for ( int i = 0; i < subACOfMLE.length; i++ ) { final Allele allele = allelesUsedInGenotyping.get(i+1);