More docs and contracts for classes in genotyper.afcalc

-- Future protection of the output of GeneralPloidyExactAFCalc, which produces in some cases bad likelihoods (positive values)
This commit is contained in:
Mark DePristo 2012-10-18 07:38:32 -04:00
parent 99c9031cb4
commit 695cf83675
5 changed files with 45 additions and 17 deletions

View File

@ -447,14 +447,16 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// update the MLE if necessary // update the MLE if necessary
final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length); final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length);
getStateTracker().updateMLEifNeeded(Double.isInfinite(log10LofK) ? MathUtils.LOG10_P_OF_ZERO : log10LofK, altCounts); // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
getStateTracker().updateMLEifNeeded(MathUtils.goodLog10Probability(log10LofK) ? log10LofK : MathUtils.LOG10_P_OF_ZERO, altCounts);
// apply the priors over each alternate allele // apply the priors over each alternate allele
for (final int ACcount : altCounts ) { for (final int ACcount : altCounts ) {
if ( ACcount > 0 ) if ( ACcount > 0 )
log10LofK += log10AlleleFrequencyPriors[ACcount]; log10LofK += log10AlleleFrequencyPriors[ACcount];
} }
getStateTracker().updateMAPifNeeded(Double.isInfinite(log10LofK) ? MathUtils.LOG10_P_OF_ZERO : log10LofK, altCounts); // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
getStateTracker().updateMAPifNeeded(MathUtils.goodLog10Probability(log10LofK) ? log10LofK : MathUtils.LOG10_P_OF_ZERO, altCounts);
return log10LofK; return log10LofK;
} }

View File

@ -53,6 +53,16 @@ public abstract class AFCalc implements Cloneable {
private final StateTracker stateTracker; private final StateTracker stateTracker;
private ExactCallLogger exactCallLogger = null; private ExactCallLogger exactCallLogger = null;
/**
* Create a new AFCalc object capable of calculating the prob. that alleles are
* segregating among nSamples with up to maxAltAlleles for SNPs and maxAltAllelesForIndels
* for indels for samples with ploidy
*
* @param nSamples number of samples, must be > 0
* @param maxAltAlleles maxAltAlleles for SNPs
* @param maxAltAllelesForIndels for indels
* @param ploidy the ploidy, must be > 0
*/
protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) { protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples); if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles); if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
@ -65,10 +75,20 @@ public abstract class AFCalc implements Cloneable {
this.stateTracker = new StateTracker(Math.max(maxAltAlleles, maxAltAllelesForIndels)); this.stateTracker = new StateTracker(Math.max(maxAltAlleles, maxAltAllelesForIndels));
} }
/**
* Enable exact call logging to file
*
* @param exactCallsLog the destination file
*/
public void enableProcessLog(final File exactCallsLog) { public void enableProcessLog(final File exactCallsLog) {
exactCallLogger = new ExactCallLogger(exactCallsLog); exactCallLogger = new ExactCallLogger(exactCallsLog);
} }
/**
* Use this logger instead of the default logger
*
* @param logger
*/
public void setLogger(Logger logger) { public void setLogger(Logger logger) {
this.logger = logger; this.logger = logger;
} }
@ -109,7 +129,7 @@ public abstract class AFCalc implements Cloneable {
* @param log10AlleleFrequencyPriors the priors by AC vector * @param log10AlleleFrequencyPriors the priors by AC vector
* @return a AFCalcResult describing the result of this calculation * @return a AFCalcResult describing the result of this calculation
*/ */
@Requires("stateTracker.getnEvaluations() > 0") @Requires("stateTracker.getnEvaluations() >= 0")
@Ensures("result != null") @Ensures("result != null")
protected AFCalcResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors) { protected AFCalcResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors) {
stateTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles()); stateTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles());
@ -144,11 +164,13 @@ public abstract class AFCalc implements Cloneable {
* @param log10AlleleFrequencyPriors priors * @param log10AlleleFrequencyPriors priors
* @return a AFCalcResult object describing the results of this calculation * @return a AFCalcResult object describing the results of this calculation
*/ */
// TODO -- add consistent requires among args @Requires({"vc != null", "log10AlleleFrequencyPriors != null"})
protected abstract AFCalcResult computeLog10PNonRef(final VariantContext vc, protected abstract AFCalcResult computeLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors); final double[] log10AlleleFrequencyPriors);
/** /**
* Subset VC to the just allelesToUse, updating genotype likelihoods
*
* Must be overridden by concrete subclasses * Must be overridden by concrete subclasses
* *
* @param vc variant context with alleles and genotype likelihoods * @param vc variant context with alleles and genotype likelihoods
@ -172,7 +194,7 @@ public abstract class AFCalc implements Cloneable {
return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels); return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels);
} }
public StateTracker getStateTracker() { protected StateTracker getStateTracker() {
return stateTracker; return stateTracker;
} }

View File

@ -303,7 +303,13 @@ import java.util.*;
/** /**
* Take the independent estimates of pNonRef for each alt allele and combine them into a single result * Take the independent estimates of pNonRef for each alt allele and combine them into a single result
* *
* TODO -- add more docs * Given n independent calculations for each of n alternate alleles create a single
* combined AFCalcResult with:
*
* priors for AF == 0 equal to theta^N for the nth least likely allele
* posteriors that reflect the combined chance that any alleles are segregating and corresponding
* likelihoods
* combined MLEs in the order of the alt alleles in vc
* *
* @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently * @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently
*/ */
@ -350,8 +356,10 @@ import java.util.*;
}; };
return new MyAFCalcResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(), return new MyAFCalcResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(),
MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true), // necessary to ensure all values < 0 // necessary to ensure all values < 0
MathUtils.normalizeFromLog10(log10PriorsOfAC, true), // priors incorporate multiple alt alleles, must be normalized MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true),
// priors incorporate multiple alt alleles, must be normalized
MathUtils.normalizeFromLog10(log10PriorsOfAC, true),
log10pNonRefByAllele, sortedResultsWithThetaNPriors); log10pNonRefByAllele, sortedResultsWithThetaNPriors);
} }
} }

View File

@ -11,7 +11,7 @@ import java.util.Map;
/** /**
* Original bi-allelic ~O(N) implementation. Kept here for posterity and reference * Original bi-allelic ~O(N) implementation. Kept here for posterity and reference
*/ */
public class OriginalDiploidExactAFCalc extends DiploidExactAFCalc { class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
protected OriginalDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) { protected OriginalDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
} }

View File

@ -129,9 +129,7 @@ final class StateTracker {
} }
/** /**
* Returns the likelihoods summed across all AC values for AC > 0 * @return the likelihoods summed across all AC values for AC > 0
*
* @return
*/ */
private double getLog10LikelihoodOfAFNotZero(final boolean capAt0) { private double getLog10LikelihoodOfAFNotZero(final boolean capAt0) {
if ( log10LikelihoodsForAFGt0Sum == null ) { if ( log10LikelihoodsForAFGt0Sum == null ) {
@ -144,7 +142,7 @@ final class StateTracker {
} }
/** /**
* @return * @return the log10 likelihood of AF == 0
*/ */
private double getLog10LikelihoodOfAFzero() { private double getLog10LikelihoodOfAFzero() {
return log10LikelihoodOfAFzero; return log10LikelihoodOfAFzero;
@ -157,9 +155,9 @@ final class StateTracker {
* For example, that the allelesUsedInGenotyping has been set, that the alleleCountsOfMLE contains meaningful * For example, that the allelesUsedInGenotyping has been set, that the alleleCountsOfMLE contains meaningful
* values, etc. * values, etc.
* *
* @param log10PriorsByAC * @param log10PriorsByAC the priors by AC
* *
* @return * @return an AFCalcResult summarizing the final results of this calculation
*/ */
@Requires("allelesUsedInGenotyping != null") @Requires("allelesUsedInGenotyping != null")
protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) { protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) {
@ -167,8 +165,6 @@ final class StateTracker {
final double[] log10Likelihoods = new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero(true)}; final double[] log10Likelihoods = new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero(true)};
final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true); final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
// TODO -- replace with more meaningful computation
// TODO -- refactor this calculation into the ref calculation
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size()); final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
for ( int i = 0; i < subACOfMLE.length; i++ ) { for ( int i = 0; i < subACOfMLE.length; i++ ) {
final Allele allele = allelesUsedInGenotyping.get(i+1); final Allele allele = allelesUsedInGenotyping.get(i+1);