Merge AFCalcResultTracker into StateTracker, cleanup

-- These two classes were really the same, and now they are actually the same!
-- Cleanuped the interfaces, removed duplicate data
-- Added lots of contracts, some of which found numerical issues with GeneralPloidyExactAFCalc (which have been patched over but not fixed)
-- Moved goodProbability and goodProbabilityVector utilities to MathUtils.  Very useful for contracts!
This commit is contained in:
Mark DePristo 2012-10-17 20:41:33 -04:00
parent 9c63cee9fc
commit 99c9031cb4
9 changed files with 440 additions and 626 deletions

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods; import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
import org.broadinstitute.sting.gatk.walkers.genotyper.ProbabilityVector;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -69,8 +68,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
@Override @Override
public AFCalcResult computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { public AFCalcResult computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) {
combineSinglePools(vc.getGenotypes(), vc.getNAlleles(), ploidy, log10AlleleFrequencyPriors, getResultTracker()); combineSinglePools(vc.getGenotypes(), vc.getNAlleles(), ploidy, log10AlleleFrequencyPriors);
return resultFromTracker(vc, log10AlleleFrequencyPriors); return getResultFromFinalState(vc, log10AlleleFrequencyPriors);
} }
/** /**
@ -171,13 +170,11 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
* @param numAlleles Number of alternate alleles * @param numAlleles Number of alternate alleles
* @param ploidyPerPool Number of samples per pool * @param ploidyPerPool Number of samples per pool
* @param log10AlleleFrequencyPriors Frequency priors * @param log10AlleleFrequencyPriors Frequency priors
* @param resultTracker object to fill with output values
*/ */
protected static void combineSinglePools(final GenotypesContext GLs, protected void combineSinglePools(final GenotypesContext GLs,
final int numAlleles, final int numAlleles,
final int ploidyPerPool, final int ploidyPerPool,
final double[] log10AlleleFrequencyPriors, final double[] log10AlleleFrequencyPriors) {
final AFCalcResultTracker resultTracker) {
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs, true); final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs, true);
@ -196,24 +193,24 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
if ( genotypeLikelihoods.size() <= 1 ) { if ( genotypeLikelihoods.size() <= 1 ) {
// no meaningful GLs at all, just set the tracker to non poly values // no meaningful GLs at all, just set the tracker to non poly values
resultTracker.reset(); // just mimic-ing call below getStateTracker().reset(); // just mimic-ing call below
resultTracker.setLog10LikelihoodOfAFzero(0.0); getStateTracker().setLog10LikelihoodOfAFzero(0.0);
} else { } else {
for (int p=1; p<genotypeLikelihoods.size(); p++) { for (int p=1; p<genotypeLikelihoods.size(); p++) {
resultTracker.reset(); // TODO -- why is this here? It makes it hard to track the n evaluation getStateTracker().reset(); // TODO -- why is this here? It makes it hard to track the n evaluation
combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, genotypeLikelihoods.get(p), combinedPloidy, ploidyPerPool, combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, genotypeLikelihoods.get(p),
numAlleles, log10AlleleFrequencyPriors, resultTracker); combinedPloidy, ploidyPerPool, numAlleles, log10AlleleFrequencyPriors);
combinedPloidy = ploidyPerPool + combinedPloidy; // total number of chromosomes in combinedLikelihoods combinedPloidy = ploidyPerPool + combinedPloidy; // total number of chromosomes in combinedLikelihoods
} }
} }
} }
public static CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool, double[] newGL, int originalPloidy, int newGLPloidy, int numAlleles, public CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool,
final double[] log10AlleleFrequencyPriors, double[] newGL,
final AFCalcResultTracker resultTracker) { int originalPloidy,
int newGLPloidy,
int numAlleles,
final double[] log10AlleleFrequencyPriors) {
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>(); final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
// mapping of ExactACset indexes to the objects // mapping of ExactACset indexes to the objects
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(); final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>();
@ -230,16 +227,11 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
indexesToACset.put(zeroSet.getACcounts(), zeroSet); indexesToACset.put(zeroSet.getACcounts(), zeroSet);
// keep processing while we have AC conformations that need to be calculated // keep processing while we have AC conformations that need to be calculated
StateTracker stateTracker = new StateTracker();
while ( !ACqueue.isEmpty() ) { while ( !ACqueue.isEmpty() ) {
resultTracker.incNEvaluations(); getStateTracker().incNEvaluations();
// compute log10Likelihoods // compute log10Likelihoods
final ExactACset ACset = ACqueue.remove(); final ExactACset ACset = ACqueue.remove();
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, resultTracker, stateTracker, ACqueue, indexesToACset); final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset);
// adjust max likelihood seen if needed
if ( log10LofKs > stateTracker.getMaxLog10L())
stateTracker.update(log10LofKs, ACset.getACcounts());
// clean up memory // clean up memory
indexesToACset.remove(ACset.getACcounts()); indexesToACset.remove(ACset.getACcounts());
@ -260,39 +252,32 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
* @param log10AlleleFrequencyPriors Prior object * @param log10AlleleFrequencyPriors Prior object
* @param originalPloidy Total ploidy of original combined pool * @param originalPloidy Total ploidy of original combined pool
* @param newGLPloidy Ploidy of GL vector * @param newGLPloidy Ploidy of GL vector
* @param resultTracker AFResult object
* @param stateTracker max likelihood observed so far
* @param ACqueue Queue of conformations to compute * @param ACqueue Queue of conformations to compute
* @param indexesToACset AC indices of objects in queue * @param indexesToACset AC indices of objects in queue
* @return max log likelihood * @return max log likelihood
*/ */
private static double calculateACConformationAndUpdateQueue(final ExactACset set, private double calculateACConformationAndUpdateQueue(final ExactACset set,
final CombinedPoolLikelihoods newPool, final CombinedPoolLikelihoods newPool,
final CombinedPoolLikelihoods originalPool, final CombinedPoolLikelihoods originalPool,
final double[] newGL, final double[] newGL,
final double[] log10AlleleFrequencyPriors, final double[] log10AlleleFrequencyPriors,
final int originalPloidy, final int originalPloidy,
final int newGLPloidy, final int newGLPloidy,
final AFCalcResultTracker resultTracker, final LinkedList<ExactACset> ACqueue,
final StateTracker stateTracker, final HashMap<ExactACcounts, ExactACset> indexesToACset) {
final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
// compute likeihood in "set" of new set based on original likelihoods // compute likeihood in "set" of new set based on original likelihoods
final int numAlleles = set.getACcounts().getCounts().length; final int numAlleles = set.getACcounts().getCounts().length;
final int newPloidy = set.getACsum(); final int newPloidy = set.getACsum();
final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy, resultTracker); final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy);
// add to new pool // add to new pool
if (!Double.isInfinite(log10LofK)) if (!Double.isInfinite(log10LofK))
newPool.add(set); newPool.add(set);
// TODO -- uncomment this correct line when the implementation of this model is optimized (it's too slow now to handle this fix) // TODO -- change false to true this correct line when the implementation of this model is optimized (it's too slow now to handle this fix)
//if ( log10LofK < stateTracker.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && stateTracker.isLowerAC(set.ACcounts) ) { if ( getStateTracker().abort(log10LofK, set.getACcounts(), false) ) {
if ( log10LofK < stateTracker.getMaxLog10L() - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
if ( VERBOSE )
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.getACcounts(), log10LofK, stateTracker.getMaxLog10L());
return log10LofK; return log10LofK;
} }
@ -323,67 +308,67 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
} }
/** // /**
* Naive combiner of two multiallelic pools - number of alt alleles must be the same. // * Naive combiner of two multiallelic pools - number of alt alleles must be the same.
* Math is generalization of biallelic combiner. // * Math is generalization of biallelic combiner.
* // *
* For vector K representing an allele count conformation, // * For vector K representing an allele count conformation,
* Pr(D | AC = K) = Sum_G Pr(D|AC1 = G) Pr (D|AC2=K-G) * F(G,K) // * Pr(D | AC = K) = Sum_G Pr(D|AC1 = G) Pr (D|AC2=K-G) * F(G,K)
* where F(G,K) = choose(m1,[g0 g1 ...])*choose(m2,[...]) / choose(m1+m2,[k1 k2 ...]) // * where F(G,K) = choose(m1,[g0 g1 ...])*choose(m2,[...]) / choose(m1+m2,[k1 k2 ...])
* @param originalPool First log-likelihood pool GL vector // * @param originalPool First log-likelihood pool GL vector
* @param yy Second pool GL vector // * @param yy Second pool GL vector
* @param ploidy1 Ploidy of first pool (# of chromosomes in it) // * @param ploidy1 Ploidy of first pool (# of chromosomes in it)
* @param ploidy2 Ploidy of second pool // * @param ploidy2 Ploidy of second pool
* @param numAlleles Number of alleles // * @param numAlleles Number of alleles
* @param log10AlleleFrequencyPriors Array of biallelic priors // * @param log10AlleleFrequencyPriors Array of biallelic priors
* @param resultTracker Af calculation result object // * @param resultTracker Af calculation result object
*/ // */
public static void combineMultiallelicPoolNaively(CombinedPoolLikelihoods originalPool, double[] yy, int ploidy1, int ploidy2, int numAlleles, // public static void combineMultiallelicPoolNaively(CombinedPoolLikelihoods originalPool, double[] yy, int ploidy1, int ploidy2, int numAlleles,
final double[] log10AlleleFrequencyPriors, // final double[] log10AlleleFrequencyPriors,
final AFCalcResultTracker resultTracker) { // final AFCalcResultTracker resultTracker) {
/* ///*
final int dim1 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy1); // final int dim1 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy1);
final int dim2 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy2); // final int dim2 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy2);
//
if (dim1 != originalPool.getLength() || dim2 != yy.length) // if (dim1 != originalPool.getLength() || dim2 != yy.length)
throw new ReviewedStingException("BUG: Inconsistent vector length"); // throw new ReviewedStingException("BUG: Inconsistent vector length");
//
if (ploidy2 == 0) // if (ploidy2 == 0)
return; // return;
//
final int newPloidy = ploidy1 + ploidy2; // final int newPloidy = ploidy1 + ploidy2;
//
// Say L1(K) = Pr(D|AC1=K) * choose(m1,K) // // Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
// and L2(K) = Pr(D|AC2=K) * choose(m2,K) // // and L2(K) = Pr(D|AC2=K) * choose(m2,K)
GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1); // GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
final double[] x = originalPool.getLikelihoodsAsVector(true); // final double[] x = originalPool.getLikelihoodsAsVector(true);
while(firstIterator.hasNext()) { // while(firstIterator.hasNext()) {
x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector()); // x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
firstIterator.next(); // firstIterator.next();
} // }
//
GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2); // GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
final double[] y = yy.clone(); // final double[] y = yy.clone();
while(secondIterator.hasNext()) { // while(secondIterator.hasNext()) {
y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector()); // y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
secondIterator.next(); // secondIterator.next();
} // }
//
// initialize output to -log10(choose(m1+m2,[k1 k2...]) // // initialize output to -log10(choose(m1+m2,[k1 k2...])
final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy); // final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy); // final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
//
//
// Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K // // Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
while(outputIterator.hasNext()) { // while(outputIterator.hasNext()) {
final ExactACset set = new ExactACset(1, new ExactACcounts(outputIterator.getCurrentAltVector())); // final ExactACset set = new ExactACset(1, new ExactACcounts(outputIterator.getCurrentAltVector()));
double likelihood = computeLofK(set, x,y, log10AlleleFrequencyPriors, numAlleles, ploidy1, ploidy2, result); // double likelihood = computeLofK(set, x,y, log10AlleleFrequencyPriors, numAlleles, ploidy1, ploidy2, result);
//
originalPool.add(likelihood, set, outputIterator.getLinearIndex()); // originalPool.add(likelihood, set, outputIterator.getLinearIndex());
outputIterator.next(); // outputIterator.next();
} // }
*/ //*/
} // }
/** /**
* Compute likelihood of a particular AC conformation and update AFresult object * Compute likelihood of a particular AC conformation and update AFresult object
@ -394,15 +379,13 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
* @param numAlleles Number of alleles (including ref) * @param numAlleles Number of alleles (including ref)
* @param ploidy1 Ploidy of original pool (combined) * @param ploidy1 Ploidy of original pool (combined)
* @param ploidy2 Ploidy of new pool * @param ploidy2 Ploidy of new pool
* @param resultTracker AFResult object
* @return log-likehood of requested conformation * @return log-likehood of requested conformation
*/ */
private static double computeLofK(final ExactACset set, private double computeLofK(final ExactACset set,
final CombinedPoolLikelihoods firstGLs, final CombinedPoolLikelihoods firstGLs,
final double[] secondGL, final double[] secondGL,
final double[] log10AlleleFrequencyPriors, final double[] log10AlleleFrequencyPriors,
final int numAlleles, final int ploidy1, final int ploidy2, final int numAlleles, final int ploidy1, final int ploidy2) {
final AFCalcResultTracker resultTracker) {
final int newPloidy = ploidy1 + ploidy2; final int newPloidy = ploidy1 + ploidy2;
@ -420,8 +403,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX]; final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX];
set.getLog10Likelihoods()[0] = log10Lof0; set.getLog10Likelihoods()[0] = log10Lof0;
resultTracker.setLog10LikelihoodOfAFzero(log10Lof0); getStateTracker().setLog10LikelihoodOfAFzero(log10Lof0);
resultTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); getStateTracker().setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
return log10Lof0; return log10Lof0;
} else { } else {
@ -464,14 +447,14 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// update the MLE if necessary // update the MLE if necessary
final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length); final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length);
resultTracker.updateMLEifNeeded(log10LofK, altCounts); getStateTracker().updateMLEifNeeded(Double.isInfinite(log10LofK) ? MathUtils.LOG10_P_OF_ZERO : log10LofK, altCounts);
// apply the priors over each alternate allele // apply the priors over each alternate allele
for (final int ACcount : altCounts ) { for (final int ACcount : altCounts ) {
if ( ACcount > 0 ) if ( ACcount > 0 )
log10LofK += log10AlleleFrequencyPriors[ACcount]; log10LofK += log10AlleleFrequencyPriors[ACcount];
} }
resultTracker.updateMAPifNeeded(log10LofK, altCounts); getStateTracker().updateMAPifNeeded(Double.isInfinite(log10LofK) ? MathUtils.LOG10_P_OF_ZERO : log10LofK, altCounts);
return log10LofK; return log10LofK;
} }
@ -494,99 +477,6 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
return (sum == ploidy); return (sum == ploidy);
} }
/**
* Combines naively two biallelic pools (of arbitrary size).
* For two pools of size m1 and m2, we can compute the combined likelihood as:
* Pr(D|AC=k) = Sum_{j=0}^k Pr(D|AC1=j) Pr(D|AC2=k-j) * choose(m1,j)*choose(m2,k-j)/choose(m1+m2,k)
* @param originalPool Pool likelihood vector, x[k] = Pr(AC_i = k) for alt allele i
* @param newPLVector Second GL vector
* @param ploidy1 Ploidy of first pool (# of chromosomes in it)
* @param ploidy2 Ploidy of second pool
* @param log10AlleleFrequencyPriors Array of biallelic priors
* @param resultTracker Af calculation result object
* @return Combined likelihood vector
*/
public static ProbabilityVector combineBiallelicPoolsNaively(final ProbabilityVector originalPool, final double[] newPLVector,
final int ploidy1, final int ploidy2, final double[] log10AlleleFrequencyPriors,
final AFCalcResultTracker resultTracker) {
final int newPloidy = ploidy1 + ploidy2;
final double[] combinedLikelihoods = new double[1+newPloidy];
/** Pre-fill result array and incorporate weights into input vectors
* Say L1(k) = Pr(D|AC1=k) * choose(m1,k)
* and L2(k) = Pr(D|AC2=k) * choose(m2,k)
* equation reduces to
* Pr(D|AC=k) = 1/choose(m1+m2,k) * Sum_{j=0}^k L1(k) L2(k-j)
* which is just plain convolution of L1 and L2 (with pre-existing vector)
*/
// intialize result vector to -infinity
Arrays.fill(combinedLikelihoods,Double.NEGATIVE_INFINITY);
final double[] x = Arrays.copyOf(originalPool.getProbabilityVector(),1+ploidy1);
for (int k=originalPool.getProbabilityVector().length; k< x.length; k++)
x[k] = Double.NEGATIVE_INFINITY;
final double[] y = newPLVector.clone();
final double log10Lof0 = x[0]+y[0];
resultTracker.setLog10LikelihoodOfAFzero(log10Lof0);
resultTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
double maxElement = log10Lof0;
int maxElementIdx = 0;
int[] alleleCounts = new int[1];
for (int k= originalPool.getMinVal() ; k <= newPloidy; k++) {
double[] acc = new double[k+1];
Arrays.fill(acc,Double.NEGATIVE_INFINITY);
double innerMax = Double.NEGATIVE_INFINITY;
for (int j=0; j <=k; j++) {
double x1,y1;
if (k-j>=0 && k-j < y.length)
y1 = y[k-j] + MathUtils.log10BinomialCoefficient(ploidy2,k-j);
else
continue;
if (j < x.length)
x1 = x[j] + MathUtils.log10BinomialCoefficient(ploidy1,j);
else
continue;
if (Double.isInfinite(x1) || Double.isInfinite(y1))
continue;
acc[j] = x1 + y1;
if (acc[j] > innerMax)
innerMax = acc[j];
else if (acc[j] < innerMax - MAX_LOG10_ERROR_TO_STOP_EARLY)
break;
}
combinedLikelihoods[k] = MathUtils.log10sumLog10(acc) - MathUtils.log10BinomialCoefficient(newPloidy,k);
maxElementIdx = k;
double maxDiff = combinedLikelihoods[k] - maxElement;
if (maxDiff > 0)
maxElement = combinedLikelihoods[k];
else if (maxDiff < maxElement - MAX_LOG10_ERROR_TO_STOP_EARLY) {
break;
}
alleleCounts[0] = k;
resultTracker.updateMLEifNeeded(combinedLikelihoods[k],alleleCounts);
resultTracker.updateMAPifNeeded(combinedLikelihoods[k] + log10AlleleFrequencyPriors[k],alleleCounts);
}
return new ProbabilityVector(MathUtils.normalizeFromLog10(Arrays.copyOf(combinedLikelihoods,maxElementIdx+1),false, true));
}
/** /**
* From a given variant context, extract a given subset of alleles, and update genotype context accordingly, * From a given variant context, extract a given subset of alleles, and update genotype context accordingly,
* including updating the PL's, and assign genotypes accordingly * including updating the PL's, and assign genotypes accordingly
@ -675,10 +565,10 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
* *
* @return genotype * @return genotype
*/ */
private static void assignGenotype(final GenotypeBuilder gb, private void assignGenotype(final GenotypeBuilder gb,
final double[] newLikelihoods, final double[] newLikelihoods,
final List<Allele> allelesToUse, final List<Allele> allelesToUse,
final int numChromosomes) { final int numChromosomes) {
final int numNewAltAlleles = allelesToUse.size() - 1; final int numNewAltAlleles = allelesToUse.size() - 1;

View File

@ -137,18 +137,15 @@ public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
@Test(dataProvider = "getGLs") @Test(dataProvider = "getGLs")
public void testGLs(GetGLsTest cfg) { public void testGLs(GetGLsTest cfg) {
final AFCalcResultTracker resultTracker = new AFCalcResultTracker(cfg.numAltAlleles);
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size()); final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
double[] priors = new double[len]; // flat priors double[] priors = new double[len]; // flat priors
GeneralPloidyExactAFCalc.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, resultTracker); final GeneralPloidyExactAFCalc calc = new GeneralPloidyExactAFCalc(cfg.GLs.size(), 1 + cfg.numAltAlleles, 1 + cfg.numAltAlleles, cfg.ploidy);
calc.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors);
int nameIndex = 1; int nameIndex = 1;
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1)); int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
int calculatedAlleleCount = resultTracker.getAlleleCountsOfMAP()[allele]; int calculatedAlleleCount = calc.getStateTracker().getAlleleCountsOfMAP()[allele];
// System.out.format( "%s Expected:%d Calc:%d\n",cfg.toString(),expectedAlleleCount, calculatedAlleleCount);
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount); Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
} }
} }

View File

@ -50,7 +50,7 @@ public abstract class AFCalc implements Cloneable {
protected Logger logger = defaultLogger; protected Logger logger = defaultLogger;
private SimpleTimer callTimer = new SimpleTimer(); private SimpleTimer callTimer = new SimpleTimer();
private final AFCalcResultTracker resultTracker; private final StateTracker stateTracker;
private ExactCallLogger exactCallLogger = null; private ExactCallLogger exactCallLogger = null;
protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) { protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
@ -62,7 +62,7 @@ public abstract class AFCalc implements Cloneable {
this.nSamples = nSamples; this.nSamples = nSamples;
this.maxAlternateAllelesToGenotype = maxAltAlleles; this.maxAlternateAllelesToGenotype = maxAltAlleles;
this.maxAlternateAllelesForIndels = maxAltAllelesForIndels; this.maxAlternateAllelesForIndels = maxAltAllelesForIndels;
this.resultTracker = new AFCalcResultTracker(Math.max(maxAltAlleles, maxAltAllelesForIndels)); this.stateTracker = new StateTracker(Math.max(maxAltAlleles, maxAltAllelesForIndels));
} }
public void enableProcessLog(final File exactCallsLog) { public void enableProcessLog(final File exactCallsLog) {
@ -83,10 +83,10 @@ public abstract class AFCalc implements Cloneable {
public AFCalcResult getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { public AFCalcResult getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) {
if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null"); if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null");
if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null"); if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null");
if ( resultTracker == null ) throw new IllegalArgumentException("Results object cannot be null"); if ( stateTracker == null ) throw new IllegalArgumentException("Results object cannot be null");
// reset the result, so we can store our new result there // reset the result, so we can store our new result there
resultTracker.reset(); stateTracker.reset();
final VariantContext vcWorking = reduceScope(vc); final VariantContext vcWorking = reduceScope(vc);
@ -100,10 +100,20 @@ public abstract class AFCalc implements Cloneable {
return result; return result;
} }
@Deprecated /**
protected AFCalcResult resultFromTracker(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors) { * Convert the final state of the state tracker into our result as an AFCalcResult
resultTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles()); *
return resultTracker.toAFCalcResult(log10AlleleFrequencyPriors); * Assumes that stateTracker has been updated accordingly
*
* @param vcWorking the VariantContext we actually used as input to the calc model (after reduction)
* @param log10AlleleFrequencyPriors the priors by AC vector
* @return a AFCalcResult describing the result of this calculation
*/
@Requires("stateTracker.getnEvaluations() > 0")
@Ensures("result != null")
protected AFCalcResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors) {
stateTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles());
return stateTracker.toAFCalcResult(log10AlleleFrequencyPriors);
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@ -162,8 +172,8 @@ public abstract class AFCalc implements Cloneable {
return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels); return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels);
} }
public AFCalcResultTracker getResultTracker() { public StateTracker getStateTracker() {
return resultTracker; return stateTracker;
} }
} }

View File

@ -83,8 +83,8 @@ public class AFCalcResult {
if ( log10pNonRefByAllele == null ) throw new IllegalArgumentException("log10pNonRefByAllele cannot be null"); if ( log10pNonRefByAllele == null ) throw new IllegalArgumentException("log10pNonRefByAllele cannot be null");
if ( log10pNonRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pNonRefByAllele has the wrong number of elements: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping); if ( log10pNonRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pNonRefByAllele has the wrong number of elements: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
if ( ! allelesUsedInGenotyping.containsAll(log10pNonRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pNonRefByAllele doesn't contain all of the alleles used in genotyping: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping); if ( ! allelesUsedInGenotyping.containsAll(log10pNonRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pNonRefByAllele doesn't contain all of the alleles used in genotyping: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
if ( ! goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC)); if ( ! MathUtils.goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC));
if ( ! goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC)); if ( ! MathUtils.goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC));
this.alleleCountsOfMLE = alleleCountsOfMLE; this.alleleCountsOfMLE = alleleCountsOfMLE;
this.nEvaluations = nEvaluations; this.nEvaluations = nEvaluations;
@ -147,7 +147,7 @@ public class AFCalcResult {
* Due to computational / implementation constraints this may be smaller than * Due to computational / implementation constraints this may be smaller than
* the actual list of alleles requested * the actual list of alleles requested
* *
* @return a non-empty list of alleles used during genotyping * @return a non-empty list of alleles used during genotyping, the first of which is the reference allele
*/ */
@Ensures({"result != null", "! result.isEmpty()"}) @Ensures({"result != null", "! result.isEmpty()"})
public List<Allele> getAllelesUsedInGenotyping() { public List<Allele> getAllelesUsedInGenotyping() {
@ -159,7 +159,7 @@ public class AFCalcResult {
* *
* @return * @return
*/ */
@Ensures({"goodLog10Probability(result)"}) @Ensures({"MathUtils.goodLog10Probability(result)"})
public double getLog10PosteriorOfAFEq0() { public double getLog10PosteriorOfAFEq0() {
return log10PosteriorsOfAC[AF0]; return log10PosteriorsOfAC[AF0];
} }
@ -169,7 +169,7 @@ public class AFCalcResult {
* *
* @return * @return
*/ */
@Ensures({"goodLog10Probability(result)"}) @Ensures({"MathUtils.goodLog10Probability(result)"})
public double getLog10PosteriorOfAFGT0() { public double getLog10PosteriorOfAFGT0() {
return log10PosteriorsOfAC[AF1p]; return log10PosteriorsOfAC[AF1p];
} }
@ -179,7 +179,7 @@ public class AFCalcResult {
* *
* @return * @return
*/ */
@Ensures({"goodLog10Probability(result)"}) @Ensures({"MathUtils.goodLog10Probability(result)"})
public double getLog10LikelihoodOfAFEq0() { public double getLog10LikelihoodOfAFEq0() {
return log10LikelihoodsOfAC[AF0]; return log10LikelihoodsOfAC[AF0];
} }
@ -189,7 +189,7 @@ public class AFCalcResult {
* *
* @return * @return
*/ */
@Ensures({"goodLog10Probability(result)"}) @Ensures({"MathUtils.goodLog10Probability(result)"})
public double getLog10LikelihoodOfAFGT0() { public double getLog10LikelihoodOfAFGT0() {
return log10LikelihoodsOfAC[AF1p]; return log10LikelihoodsOfAC[AF1p];
} }
@ -199,7 +199,7 @@ public class AFCalcResult {
* *
* @return * @return
*/ */
@Ensures({"goodLog10Probability(result)"}) @Ensures({"MathUtils.goodLog10Probability(result)"})
public double getLog10PriorOfAFEq0() { public double getLog10PriorOfAFEq0() {
return log10PriorsOfAC[AF0]; return log10PriorsOfAC[AF0];
} }
@ -209,7 +209,7 @@ public class AFCalcResult {
* *
* @return * @return
*/ */
@Ensures({"goodLog10Probability(result)"}) @Ensures({"MathUtils.goodLog10Probability(result)"})
public double getLog10PriorOfAFGT0() { public double getLog10PriorOfAFGT0() {
return log10PriorsOfAC[AF1p]; return log10PriorsOfAC[AF1p];
} }
@ -263,7 +263,7 @@ public class AFCalcResult {
* @param allele the allele we're interested in, must be in getAllelesUsedInGenotyping * @param allele the allele we're interested in, must be in getAllelesUsedInGenotyping
* @return the log10 probability that allele is segregating at this site * @return the log10 probability that allele is segregating at this site
*/ */
@Ensures("goodLog10Probability(result)") @Ensures("MathUtils.goodLog10Probability(result)")
public double getLog10PosteriorOfAFGt0ForAllele(final Allele allele) { public double getLog10PosteriorOfAFGt0ForAllele(final Allele allele) {
final Double log10pNonRef = log10pNonRefByAllele.get(allele); final Double log10pNonRef = log10pNonRefByAllele.get(allele);
if ( log10pNonRef == null ) throw new IllegalArgumentException("Unknown allele " + allele); if ( log10pNonRef == null ) throw new IllegalArgumentException("Unknown allele " + allele);
@ -279,7 +279,7 @@ public class AFCalcResult {
* @return freshly allocated log10 normalized posteriors vector * @return freshly allocated log10 normalized posteriors vector
*/ */
@Requires("log10LikelihoodsOfAC.length == log10PriorsOfAC.length") @Requires("log10LikelihoodsOfAC.length == log10PriorsOfAC.length")
@Ensures("goodLog10ProbVector(result, LOG_10_ARRAY_SIZES, true)") @Ensures("MathUtils.goodLog10ProbVector(result, LOG_10_ARRAY_SIZES, true)")
private static double[] computePosteriors(final double[] log10LikelihoodsOfAC, final double[] log10PriorsOfAC) { private static double[] computePosteriors(final double[] log10LikelihoodsOfAC, final double[] log10PriorsOfAC) {
final double[] log10UnnormalizedPosteriors = new double[log10LikelihoodsOfAC.length]; final double[] log10UnnormalizedPosteriors = new double[log10LikelihoodsOfAC.length];
for ( int i = 0; i < log10LikelihoodsOfAC.length; i++ ) for ( int i = 0; i < log10LikelihoodsOfAC.length; i++ )
@ -287,29 +287,6 @@ public class AFCalcResult {
return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false); return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false);
} }
/**
* Check that the log10 prob vector vector is well formed
*
* @param vector
* @param expectedSize
* @param shouldSumToOne
*
* @return true if vector is well-formed, false otherwise
*/
private static boolean goodLog10ProbVector(final double[] vector, final int expectedSize, final boolean shouldSumToOne) {
if ( vector.length != expectedSize ) return false;
for ( final double pr : vector ) {
if ( ! goodLog10Probability(pr) )
return false;
}
if ( shouldSumToOne && MathUtils.compareDoubles(MathUtils.sumLog10(vector), 1.0, 1e-4) != 0 )
return false;
return true; // everything is good
}
/** /**
* Computes the offset into linear vectors indexed by alt allele for allele * Computes the offset into linear vectors indexed by alt allele for allele
* *
@ -331,14 +308,4 @@ public class AFCalcResult {
else else
return index - 1; return index - 1;
} }
/**
* Checks that the result is a well-formed log10 probability
*
* @param result a supposedly well-formed log10 probability value
* @return true if result is really well formed
*/
private static boolean goodLog10Probability(final double result) {
return result <= 0.0 && ! Double.isInfinite(result) && ! Double.isNaN(result);
}
} }

View File

@ -1,256 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import com.google.java.contract.Ensures;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: ebanks
* Date: Dec 14, 2011
*
* Useful helper class to communicate the results of the allele frequency calculation
*
* TODO -- WHAT IS THE CONTRACT ON MAP AC AND P NON REF?
*/
class AFCalcResultTracker {
protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
// These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles
protected double log10MLE;
protected double log10MAP;
private final int[] alleleCountsOfMLE;
private final int[] alleleCountsOfMAP;
// The posteriors seen, not including that of AF=0
private static final int LIKELIHOODS_CACHE_SIZE = 5000;
private final double[] log10LikelihoodsMatrixValues = new double[LIKELIHOODS_CACHE_SIZE];
private int currentLikelihoodsCacheIndex = 0;
protected Double log10LikelihoodsMatrixSum = null;
// These variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
private double log10LikelihoodOfAFzero;
private double log10PosteriorOfAFzero;
private int[] AClimits;
int nEvaluations = 0;
/**
* The list of alleles actually used in computing the AF
*/
private List<Allele> allelesUsedInGenotyping = null;
/**
* Create a results object capability of storing results for calls with up to maxAltAlleles
*
* @param maxAltAlleles an integer >= 1
*/
public AFCalcResultTracker(final int maxAltAlleles) {
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles);
alleleCountsOfMLE = new int[maxAltAlleles];
alleleCountsOfMAP = new int[maxAltAlleles];
reset();
}
/**
* Returns a vector with maxAltAlleles values containing AC values at the MLE
*
* The values of the ACs for this call are stored in the getAllelesUsedInGenotyping order,
* starting from index 0 (i.e., the first alt allele is at 0). The vector is always
* maxAltAlleles in length, and so only the first getAllelesUsedInGenotyping.size() - 1 values
* are meaningful.
*
* @return a vector with allele counts, not all of which may be meaningful
*/
@Ensures("result != null")
public int[] getAlleleCountsOfMLE() {
return alleleCountsOfMLE;
}
/**
* Returns a vector with maxAltAlleles values containing AC values at the MAP
*
* @see #getAlleleCountsOfMLE() for the encoding of results in this vector
*
* @return a non-null vector of ints
*/
@Ensures("result != null")
public int[] getAlleleCountsOfMAP() {
return alleleCountsOfMAP;
}
/**
* Returns the likelihoods summed across all AC values for AC > 0
*
* @return
*/
public double getLog10LikelihoodOfAFNotZero() {
if ( log10LikelihoodsMatrixSum == null ) {
if ( currentLikelihoodsCacheIndex == 0 ) // there's nothing to sum up, so make the sum equal to the smallest thing we have
log10LikelihoodsMatrixSum = MathUtils.LOG10_P_OF_ZERO;
else
log10LikelihoodsMatrixSum = MathUtils.log10sumLog10(log10LikelihoodsMatrixValues, 0, currentLikelihoodsCacheIndex);
}
return log10LikelihoodsMatrixSum;
}
public double getLog10LikelihoodOfAFNotZero(final boolean capAt0) {
return Math.min(getLog10LikelihoodOfAFNotZero(), capAt0 ? 0.0 : Double.POSITIVE_INFINITY);
}
/**
* TODO -- eric what is this supposed to return? my unit tests don't do what I think they should
*
* @return
*/
public double getLog10LikelihoodOfAFzero() {
return log10LikelihoodOfAFzero;
}
/**
* TODO -- eric what is this supposed to return? my unit tests don't do what I think they should
*
* @return
*/
public double getLog10PosteriorOfAFzero() {
return log10PosteriorOfAFzero;
}
protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) {
final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size() - 1);
final double[] log10Likelihoods = new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero(true)};
final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
// TODO -- replace with more meaningful computation
// TODO -- refactor this calculation into the ref calculation
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
for ( int i = 0; i < subACOfMLE.length; i++ ) {
final Allele allele = allelesUsedInGenotyping.get(i+1);
final double log10PNonRef = getAlleleCountsOfMAP()[i] > 0 ? 0 : -10000; // TODO -- a total hack but in effect what the old behavior was
log10pNonRefByAllele.put(allele, log10PNonRef);
}
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pNonRefByAllele);
}
// --------------------------------------------------------------------------------
//
// Protected mutational methods only for use within the calculation models themselves
//
// --------------------------------------------------------------------------------
/**
* Reset the data in this results object, so that it can be used in a subsequent AF calculation
*
* Resetting of the data is done by the calculation model itself, so shouldn't be done by callers any longer
*/
protected void reset() {
log10MLE = log10MAP = log10LikelihoodOfAFzero = log10PosteriorOfAFzero = VALUE_NOT_CALCULATED;
for ( int i = 0; i < alleleCountsOfMLE.length; i++ ) {
alleleCountsOfMLE[i] = 0;
alleleCountsOfMAP[i] = 0;
}
currentLikelihoodsCacheIndex = 0;
log10LikelihoodsMatrixSum = null;
allelesUsedInGenotyping = null;
nEvaluations = 0;
Arrays.fill(log10LikelihoodsMatrixValues, Double.POSITIVE_INFINITY);
}
/**
* Tell this result we used one more evaluation cycle
*/
protected void incNEvaluations() {
nEvaluations++;
}
protected void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
addToLikelihoodsCache(log10LofK);
if ( log10LofK > log10MLE ) {
log10MLE = log10LofK;
for ( int i = 0; i < alleleCountsForK.length; i++ )
alleleCountsOfMLE[i] = alleleCountsForK[i];
}
}
protected void updateMAPifNeeded(final double log10LofK, final int[] alleleCountsForK) {
if ( log10LofK > log10MAP ) {
log10MAP = log10LofK;
for ( int i = 0; i < alleleCountsForK.length; i++ )
alleleCountsOfMAP[i] = alleleCountsForK[i];
}
}
private void addToLikelihoodsCache(final double log10LofK) {
// add to the cache
log10LikelihoodsMatrixValues[currentLikelihoodsCacheIndex++] = log10LofK;
// if we've filled up the cache, then condense by summing up all of the values and placing the sum back into the first cell
if ( currentLikelihoodsCacheIndex == LIKELIHOODS_CACHE_SIZE) {
final double temporarySum = MathUtils.log10sumLog10(log10LikelihoodsMatrixValues, 0, currentLikelihoodsCacheIndex);
Arrays.fill(log10LikelihoodsMatrixValues, Double.POSITIVE_INFINITY);
log10LikelihoodsMatrixValues[0] = temporarySum;
currentLikelihoodsCacheIndex = 1;
}
}
protected void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) {
this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero;
if ( log10LikelihoodOfAFzero > log10MLE ) {
log10MLE = log10LikelihoodOfAFzero;
Arrays.fill(alleleCountsOfMLE, 0);
}
}
protected void setLog10PosteriorOfAFzero(final double log10PosteriorOfAFzero) {
this.log10PosteriorOfAFzero = log10PosteriorOfAFzero;
if ( log10PosteriorOfAFzero > log10MAP ) {
log10MAP = log10PosteriorOfAFzero;
Arrays.fill(alleleCountsOfMAP, 0);
}
}
protected void setAllelesUsedInGenotyping(List<Allele> allelesUsedInGenotyping) {
if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.isEmpty() )
throw new IllegalArgumentException("allelesUsedInGenotyping cannot be null or empty");
this.allelesUsedInGenotyping = allelesUsedInGenotyping;
}
protected void setAClimits(int[] AClimits) {
this.AClimits = AClimits;
}
}

View File

@ -36,10 +36,6 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy); if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
} }
protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) {
return new StateTracker();
}
@Override @Override
protected AFCalcResult computeLog10PNonRef(final VariantContext vc, protected AFCalcResult computeLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) { final double[] log10AlleleFrequencyPriors) {
@ -60,29 +56,21 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
ACqueue.add(zeroSet); ACqueue.add(zeroSet);
indexesToACset.put(zeroSet.getACcounts(), zeroSet); indexesToACset.put(zeroSet.getACcounts(), zeroSet);
// keep processing while we have AC conformations that need to be calculated
final StateTracker stateTracker = makeMaxLikelihood(vc, getResultTracker());
while ( !ACqueue.isEmpty() ) { while ( !ACqueue.isEmpty() ) {
getResultTracker().incNEvaluations(); // keep track of the number of evaluations getStateTracker().incNEvaluations(); // keep track of the number of evaluations
// compute log10Likelihoods // compute log10Likelihoods
final ExactACset set = ACqueue.remove(); final ExactACset set = ACqueue.remove();
if ( stateTracker.withinMaxACs(set.getACcounts()) ) { final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors);
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, stateTracker, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, getResultTracker());
// adjust max likelihood seen if needed // clean up memory
stateTracker.update(log10LofKs, set.getACcounts()); indexesToACset.remove(set.getACcounts());
//if ( DEBUG )
// clean up memory // System.out.printf(" *** removing used set=%s%n", set.ACcounts);
indexesToACset.remove(set.getACcounts());
//if ( DEBUG )
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
}
} }
return resultFromTracker(vc, log10AlleleFrequencyPriors); return getResultFromFinalState(vc, log10AlleleFrequencyPriors);
} }
@Override @Override
@ -153,23 +141,21 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
private double calculateAlleleCountConformation(final ExactACset set, private double calculateAlleleCountConformation(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods, final ArrayList<double[]> genotypeLikelihoods,
final StateTracker stateTracker,
final int numChr, final int numChr,
final LinkedList<ExactACset> ACqueue, final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset, final HashMap<ExactACcounts, ExactACset> indexesToACset,
final double[] log10AlleleFrequencyPriors, final double[] log10AlleleFrequencyPriors) {
final AFCalcResultTracker resultTracker) {
//if ( DEBUG ) //if ( DEBUG )
// System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); // System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
// compute the log10Likelihoods // compute the log10Likelihoods
computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, resultTracker); computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors);
final double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1]; final double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
// can we abort early because the log10Likelihoods are so small? // can we abort early because the log10Likelihoods are so small?
if ( stateTracker.abort(log10LofK, set.getACcounts()) ) { if ( getStateTracker().abort(log10LofK, set.getACcounts(), true) ) {
//if ( DEBUG ) //if ( DEBUG )
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); // System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
return log10LofK; return log10LofK;
@ -188,7 +174,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
ACcountsClone[allele]++; ACcountsClone[allele]++;
// to get to this conformation, a sample would need to be AB (remember that ref=0) // to get to this conformation, a sample would need to be AB (remember that ref=0)
final int PLindex = GenotypeLikelihoods.calculatePLindex(0, allele+1); final int PLindex = GenotypeLikelihoods.calculatePLindex(0, allele+1);
updateACset(stateTracker, ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods); updateACset(ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
} }
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different // add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
@ -213,9 +199,9 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
// IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering // IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering
for ( DependentSet dependent : differentAlleles ) for ( DependentSet dependent : differentAlleles )
updateACset(stateTracker, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods); updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
for ( DependentSet dependent : sameAlleles ) for ( DependentSet dependent : sameAlleles )
updateACset(stateTracker, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods); updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
} }
return log10LofK; return log10LofK;
@ -223,8 +209,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and // adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
// also pushes its value to the given callingSetIndex. // also pushes its value to the given callingSetIndex.
private void updateACset(final StateTracker stateTracker, private void updateACset(final int[] newSetCounts,
final int[] newSetCounts,
final int numChr, final int numChr,
final ExactACset dependentSet, final ExactACset dependentSet,
final int PLsetIndex, final int PLsetIndex,
@ -246,8 +231,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
private void computeLofK(final ExactACset set, private void computeLofK(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods, final ArrayList<double[]> genotypeLikelihoods,
final double[] log10AlleleFrequencyPriors, final double[] log10AlleleFrequencyPriors) {
final AFCalcResultTracker resultTracker) {
set.getLog10Likelihoods()[0] = 0.0; // the zero case set.getLog10Likelihoods()[0] = 0.0; // the zero case
final int totalK = set.getACsum(); final int totalK = set.getACsum();
@ -258,8 +242,8 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX]; set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
final double log10Lof0 = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1]; final double log10Lof0 = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
resultTracker.setLog10LikelihoodOfAFzero(log10Lof0); getStateTracker().setLog10LikelihoodOfAFzero(log10Lof0);
resultTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); getStateTracker().setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
return; return;
} }
@ -281,14 +265,15 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1]; double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
// update the MLE if necessary // update the MLE if necessary
resultTracker.updateMLEifNeeded(log10LofK, set.getACcounts().getCounts()); getStateTracker().updateMLEifNeeded(log10LofK, set.getACcounts().getCounts());
// apply the priors over each alternate allele // apply the priors over each alternate allele
for ( final int ACcount : set.getACcounts().getCounts() ) { for ( final int ACcount : set.getACcounts().getCounts() ) {
if ( ACcount > 0 ) if ( ACcount > 0 )
log10LofK += log10AlleleFrequencyPriors[ACcount]; log10LofK += log10AlleleFrequencyPriors[ACcount];
} }
resultTracker.updateMAPifNeeded(log10LofK, set.getACcounts().getCounts());
getStateTracker().updateMAPifNeeded(log10LofK, set.getACcounts().getCounts());
} }
private void pushData(final ExactACset targetSet, private void pushData(final ExactACset targetSet,

View File

@ -16,10 +16,6 @@ public class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
} }
protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) {
return new StateTracker();
}
@Override @Override
protected AFCalcResult computeLog10PNonRef(VariantContext vc, double[] log10AlleleFrequencyPriors) { protected AFCalcResult computeLog10PNonRef(VariantContext vc, double[] log10AlleleFrequencyPriors) {
final double[] log10AlleleFrequencyLikelihoods = new double[log10AlleleFrequencyPriors.length]; final double[] log10AlleleFrequencyLikelihoods = new double[log10AlleleFrequencyPriors.length];

View File

@ -1,35 +1,85 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/** /**
* Keeps track of the best state seen by the exact model and the max states to visit * Keeps track of the state information during the exact model AF calculation.
* allowing us to abort the search before we visit the entire matrix of AC x samples *
* Tracks things like the MLE and MAP AC values, their corresponding likelhood and posterior
* values, the likelihood of the AF == 0 state, and the number of evaluations needed
* by the calculation to compute the P(AF == 0)
*/ */
final class StateTracker { final class StateTracker {
public final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
protected final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
final private int[] maxACsToConsider;
private ExactACcounts ACsAtMax = null;
private double maxLog10L = Double.NEGATIVE_INFINITY;
public StateTracker() {
this(null);
}
public StateTracker(final int[] maxACsToConsider) {
this.maxACsToConsider = maxACsToConsider;
}
/** /**
* Update the maximum log10L seen, if log10LofKs is higher, and the corresponding ACs of this state * These variables are intended to contain the MLE and MAP (and their corresponding allele counts)
* * of the site over all alternate alleles
* @param log10LofKs the likelihood of our current configuration state
*/ */
public void update(final double log10LofKs, final ExactACcounts ACs) { protected double log10MLE;
if ( log10LofKs > getMaxLog10L()) { protected double log10MAP;
this.setMaxLog10L(log10LofKs);
this.ACsAtMax = ACs; /**
} * Returns a vector with maxAltAlleles values containing AC values at the MLE
*
* The values of the ACs for this call are stored in the getAllelesUsedInGenotyping order,
* starting from index 0 (i.e., the first alt allele is at 0). The vector is always
* maxAltAlleles in length, and so only the first getAllelesUsedInGenotyping.size() - 1 values
* are meaningful.
*/
private final int[] alleleCountsOfMLE;
private final int[] alleleCountsOfMAP;
/**
* A vector of log10 likelihood values seen, for future summation. When the size of the
* vector is exceeed -- because we've pushed more posteriors than there's space to hold
* -- we simply sum up the existing values, make that the first value, and continue.
*/
private final double[] log10LikelihoodsForAFGt0 = new double[LIKELIHOODS_CACHE_SIZE];
private static final int LIKELIHOODS_CACHE_SIZE = 5000;
private int log10LikelihoodsForAFGt0CacheIndex = 0;
/**
* The actual sum of the likelihoods. Null if the sum hasn't been computed yet
*/
protected Double log10LikelihoodsForAFGt0Sum = null;
/**
* Contains the likelihood for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
*/
private double log10LikelihoodOfAFzero = 0.0;
/**
* The number of evaluates we've gone through in the AFCalc
*/
private int nEvaluations = 0;
/**
* The list of alleles actually used in computing the AF
*/
private List<Allele> allelesUsedInGenotyping = null;
/**
* Create a results object capability of storing results for calls with up to maxAltAlleles
*
* @param maxAltAlleles an integer >= 1
*/
public StateTracker(final int maxAltAlleles) {
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles);
alleleCountsOfMLE = new int[maxAltAlleles];
alleleCountsOfMAP = new int[maxAltAlleles];
reset();
} }
/** /**
@ -39,58 +89,200 @@ final class StateTracker {
* @param log10LofK the log10 likelihood of the configuration we're considering analyzing * @param log10LofK the log10 likelihood of the configuration we're considering analyzing
* @return true if the configuration cannot meaningfully contribute to our likelihood sum * @return true if the configuration cannot meaningfully contribute to our likelihood sum
*/ */
public boolean tooLowLikelihood(final double log10LofK) { private boolean tooLowLikelihood(final double log10LofK) {
return log10LofK < getMaxLog10L() - MAX_LOG10_ERROR_TO_STOP_EARLY; return log10LofK < log10MLE - MAX_LOG10_ERROR_TO_STOP_EARLY;
} }
/** /**
* Are all ACs in otherACs less than or equal to their corresponding ACs in the maxACsToConsider? * @return true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
*/
private boolean isLowerAC(final ExactACcounts otherACs) {
final int[] otherACcounts = otherACs.getCounts();
for ( int i = 0; i < otherACcounts.length; i++ ) {
if ( alleleCountsOfMLE[i] > otherACcounts[i] )
return false;
}
return true;
}
/**
* Should we stop exploring paths from ACs, given it's log10LofK
* *
* @param otherACs the set of otherACs that we want to know if we should consider analyzing * @param log10LofK the log10LofK of these ACs
* @return true if otherACs is a state worth considering, or false otherwise * @param ACs the ACs of this state
* @return return true if there's no reason to continue with subpaths of AC, or false otherwise
*/ */
public boolean withinMaxACs(final ExactACcounts otherACs) { protected boolean abort( final double log10LofK, final ExactACcounts ACs, final boolean enforceLowerACs ) {
if ( maxACsToConsider == null ) return tooLowLikelihood(log10LofK) && (!enforceLowerACs || isLowerAC(ACs));
return true; }
final int[] otherACcounts = otherACs.getCounts(); @Ensures("result != null")
protected int[] getAlleleCountsOfMAP() {
return alleleCountsOfMAP;
}
for ( int i = 0; i < maxACsToConsider.length; i++ ) { @Ensures("result >= 0")
// consider one more than the max AC to collect a bit more likelihood mass protected int getnEvaluations() {
if ( otherACcounts[i] > maxACsToConsider[i] + 1 ) return nEvaluations;
return false;
}
return true;
} }
/** /**
* returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set * Returns the likelihoods summed across all AC values for AC > 0
*
* @return
*/ */
public boolean isLowerAC(final ExactACcounts otherACs) { private double getLog10LikelihoodOfAFNotZero(final boolean capAt0) {
if ( ACsAtMax == null ) if ( log10LikelihoodsForAFGt0Sum == null ) {
return true; if ( log10LikelihoodsForAFGt0CacheIndex == 0 ) // there's nothing to sum up, so make the sum equal to the smallest thing we have
log10LikelihoodsForAFGt0Sum = MathUtils.LOG10_P_OF_ZERO;
else
log10LikelihoodsForAFGt0Sum = MathUtils.log10sumLog10(log10LikelihoodsForAFGt0, 0, log10LikelihoodsForAFGt0CacheIndex);
}
return Math.min(log10LikelihoodsForAFGt0Sum, capAt0 ? 0.0 : Double.POSITIVE_INFINITY);
}
final int[] myACcounts = this.ACsAtMax.getCounts(); /**
final int[] otherACcounts = otherACs.getCounts(); * @return
*/
private double getLog10LikelihoodOfAFzero() {
return log10LikelihoodOfAFzero;
}
for ( int i = 0; i < myACcounts.length; i++ ) { /**
if ( myACcounts[i] > otherACcounts[i] ) * Convert this state to an corresponding AFCalcResult.
return false; *
* Assumes that the values in this state have been filled in with meaningful values during the calculation.
* For example, that the allelesUsedInGenotyping has been set, that the alleleCountsOfMLE contains meaningful
* values, etc.
*
* @param log10PriorsByAC
*
* @return
*/
@Requires("allelesUsedInGenotyping != null")
protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) {
final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size() - 1);
final double[] log10Likelihoods = new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero(true)};
final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
// TODO -- replace with more meaningful computation
// TODO -- refactor this calculation into the ref calculation
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
for ( int i = 0; i < subACOfMLE.length; i++ ) {
final Allele allele = allelesUsedInGenotyping.get(i+1);
final double log10PNonRef = alleleCountsOfMAP[i] > 0 ? 0 : -10000; // TODO -- a total hack but in effect what the old behavior was
log10pNonRefByAllele.put(allele, log10PNonRef);
} }
return true; return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pNonRefByAllele);
} }
public boolean abort( final double log10LofK, final ExactACcounts ACs ) { // --------------------------------------------------------------------------------
return tooLowLikelihood(log10LofK) && isLowerAC(ACs); //
// Protected mutational methods only for use within the calculation models themselves
//
// --------------------------------------------------------------------------------
/**
* Reset the data in this results object, so that it can be used in a subsequent AF calculation
*
* Resetting of the data is done by the calculation model itself, so shouldn't be done by callers any longer
*/
protected void reset() {
log10MLE = log10MAP = log10LikelihoodOfAFzero = VALUE_NOT_CALCULATED;
log10LikelihoodsForAFGt0CacheIndex = 0;
log10LikelihoodsForAFGt0Sum = null;
allelesUsedInGenotyping = null;
nEvaluations = 0;
Arrays.fill(alleleCountsOfMLE, 0);
Arrays.fill(alleleCountsOfMAP, 0);
Arrays.fill(log10LikelihoodsForAFGt0, Double.POSITIVE_INFINITY);
} }
public double getMaxLog10L() { /**
return maxLog10L; * Tell this result we used one more evaluation cycle
*/
protected void incNEvaluations() {
nEvaluations++;
} }
public void setMaxLog10L(double maxLog10L) { /**
this.maxLog10L = maxLog10L; * Update the maximum log10 likelihoods seen, if log10LofKs is higher, and the corresponding ACs of this state
*
* @param log10LofK the likelihood of our current configuration state, cannot be the 0 state
* @param alleleCountsForK the allele counts for this state
*/
@Requires({"alleleCountsForK != null", "MathUtils.sum(alleleCountsForK) >= 0", "MathUtils.goodLog10Probability(log10LofK)"})
@Ensures("log10MLE == Math.max(log10LofK, log10MLE)")
protected void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
addToLikelihoodsCache(log10LofK);
if ( log10LofK > log10MLE ) {
log10MLE = log10LofK;
System.arraycopy(alleleCountsForK, 0, alleleCountsOfMLE, 0, alleleCountsForK.length);
}
}
/**
* Update the maximum log10 posterior seen, if log10PofKs is higher, and the corresponding ACs of this state
*
* @param log10PofK the posterior of our current configuration state
* @param alleleCountsForK the allele counts for this state
*/
@Requires({"alleleCountsForK != null", "MathUtils.sum(alleleCountsForK) >= 0", "MathUtils.goodLog10Probability(log10PofK)"})
@Ensures("log10MAP == Math.max(log10PofK, log10MAP)")
protected void updateMAPifNeeded(final double log10PofK, final int[] alleleCountsForK) {
if ( log10PofK > log10MAP ) {
log10MAP = log10PofK;
System.arraycopy(alleleCountsForK, 0, alleleCountsOfMAP, 0, alleleCountsForK.length);
}
}
@Requires({"MathUtils.goodLog10Probability(log10LofK)"})
private void addToLikelihoodsCache(final double log10LofK) {
// add to the cache
log10LikelihoodsForAFGt0[log10LikelihoodsForAFGt0CacheIndex++] = log10LofK;
// if we've filled up the cache, then condense by summing up all of the values and placing the sum back into the first cell
if ( log10LikelihoodsForAFGt0CacheIndex == LIKELIHOODS_CACHE_SIZE) {
final double temporarySum = MathUtils.log10sumLog10(log10LikelihoodsForAFGt0, 0, log10LikelihoodsForAFGt0CacheIndex);
Arrays.fill(log10LikelihoodsForAFGt0, Double.POSITIVE_INFINITY);
log10LikelihoodsForAFGt0[0] = temporarySum;
log10LikelihoodsForAFGt0CacheIndex = 1;
}
}
@Requires({"MathUtils.goodLog10Probability(log10LikelihoodOfAFzero)"})
protected void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) {
this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero;
if ( log10LikelihoodOfAFzero > log10MLE ) {
log10MLE = log10LikelihoodOfAFzero;
Arrays.fill(alleleCountsOfMLE, 0);
}
}
@Requires({"MathUtils.goodLog10Probability(log10LikelihoodOfAFzero)"})
protected void setLog10PosteriorOfAFzero(final double log10PosteriorOfAFzero) {
if ( log10PosteriorOfAFzero > log10MAP ) {
log10MAP = log10PosteriorOfAFzero;
Arrays.fill(alleleCountsOfMAP, 0);
}
}
/**
* Set the list of alleles used in genotyping
*
* @param allelesUsedInGenotyping the list of alleles, where the first allele is reference
*/
@Requires({"allelesUsedInGenotyping != null", "allelesUsedInGenotyping.size() > 1"})
protected void setAllelesUsedInGenotyping(List<Allele> allelesUsedInGenotyping) {
if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.isEmpty() )
throw new IllegalArgumentException("allelesUsedInGenotyping cannot be null or empty");
if ( allelesUsedInGenotyping.get(0).isNonReference() )
throw new IllegalArgumentException("The first element of allelesUsedInGenotyping must be the reference allele");
this.allelesUsedInGenotyping = allelesUsedInGenotyping;
} }
} }

View File

@ -1194,6 +1194,39 @@ public class MathUtils {
return getQScoreOrderStatistic(reads, offsets, (int) Math.floor(reads.size() / 2.)); return getQScoreOrderStatistic(reads, offsets, (int) Math.floor(reads.size() / 2.));
} }
/**
* Check that the log10 prob vector vector is well formed
*
* @param vector
* @param expectedSize
* @param shouldSumToOne
*
* @return true if vector is well-formed, false otherwise
*/
public static boolean goodLog10ProbVector(final double[] vector, final int expectedSize, final boolean shouldSumToOne) {
if ( vector.length != expectedSize ) return false;
for ( final double pr : vector ) {
if ( ! goodLog10Probability(pr) )
return false;
}
if ( shouldSumToOne && compareDoubles(sumLog10(vector), 1.0, 1e-4) != 0 )
return false;
return true; // everything is good
}
/**
* Checks that the result is a well-formed log10 probability
*
* @param result a supposedly well-formed log10 probability value
* @return true if result is really well formed
*/
public static boolean goodLog10Probability(final double result) {
return result <= 0.0 && ! Double.isInfinite(result) && ! Double.isNaN(result);
}
/** /**
* A utility class that computes on the fly average and standard deviation for a stream of numbers. * A utility class that computes on the fly average and standard deviation for a stream of numbers.
* The number of observations does not have to be known in advance, and can be also very big (so that * The number of observations does not have to be known in advance, and can be also very big (so that