Merge AFCalcResultTracker into StateTracker, cleanup
-- These two classes were really the same, and now they are actually the same! -- Cleanuped the interfaces, removed duplicate data -- Added lots of contracts, some of which found numerical issues with GeneralPloidyExactAFCalc (which have been patched over but not fixed) -- Moved goodProbability and goodProbabilityVector utilities to MathUtils. Very useful for contracts!
This commit is contained in:
parent
9c63cee9fc
commit
99c9031cb4
|
|
@ -26,7 +26,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.ProbabilityVector;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -69,8 +68,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
|
||||
@Override
|
||||
public AFCalcResult computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) {
|
||||
combineSinglePools(vc.getGenotypes(), vc.getNAlleles(), ploidy, log10AlleleFrequencyPriors, getResultTracker());
|
||||
return resultFromTracker(vc, log10AlleleFrequencyPriors);
|
||||
combineSinglePools(vc.getGenotypes(), vc.getNAlleles(), ploidy, log10AlleleFrequencyPriors);
|
||||
return getResultFromFinalState(vc, log10AlleleFrequencyPriors);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -171,13 +170,11 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
* @param numAlleles Number of alternate alleles
|
||||
* @param ploidyPerPool Number of samples per pool
|
||||
* @param log10AlleleFrequencyPriors Frequency priors
|
||||
* @param resultTracker object to fill with output values
|
||||
*/
|
||||
protected static void combineSinglePools(final GenotypesContext GLs,
|
||||
final int numAlleles,
|
||||
final int ploidyPerPool,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AFCalcResultTracker resultTracker) {
|
||||
protected void combineSinglePools(final GenotypesContext GLs,
|
||||
final int numAlleles,
|
||||
final int ploidyPerPool,
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
|
||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs, true);
|
||||
|
||||
|
|
@ -196,24 +193,24 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
|
||||
if ( genotypeLikelihoods.size() <= 1 ) {
|
||||
// no meaningful GLs at all, just set the tracker to non poly values
|
||||
resultTracker.reset(); // just mimic-ing call below
|
||||
resultTracker.setLog10LikelihoodOfAFzero(0.0);
|
||||
getStateTracker().reset(); // just mimic-ing call below
|
||||
getStateTracker().setLog10LikelihoodOfAFzero(0.0);
|
||||
} else {
|
||||
for (int p=1; p<genotypeLikelihoods.size(); p++) {
|
||||
resultTracker.reset(); // TODO -- why is this here? It makes it hard to track the n evaluation
|
||||
combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, genotypeLikelihoods.get(p), combinedPloidy, ploidyPerPool,
|
||||
numAlleles, log10AlleleFrequencyPriors, resultTracker);
|
||||
getStateTracker().reset(); // TODO -- why is this here? It makes it hard to track the n evaluation
|
||||
combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, genotypeLikelihoods.get(p),
|
||||
combinedPloidy, ploidyPerPool, numAlleles, log10AlleleFrequencyPriors);
|
||||
combinedPloidy = ploidyPerPool + combinedPloidy; // total number of chromosomes in combinedLikelihoods
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool, double[] newGL, int originalPloidy, int newGLPloidy, int numAlleles,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AFCalcResultTracker resultTracker) {
|
||||
|
||||
|
||||
|
||||
public CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool,
|
||||
double[] newGL,
|
||||
int originalPloidy,
|
||||
int newGLPloidy,
|
||||
int numAlleles,
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
|
||||
// mapping of ExactACset indexes to the objects
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>();
|
||||
|
|
@ -230,16 +227,11 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
indexesToACset.put(zeroSet.getACcounts(), zeroSet);
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
StateTracker stateTracker = new StateTracker();
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
resultTracker.incNEvaluations();
|
||||
getStateTracker().incNEvaluations();
|
||||
// compute log10Likelihoods
|
||||
final ExactACset ACset = ACqueue.remove();
|
||||
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, resultTracker, stateTracker, ACqueue, indexesToACset);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
if ( log10LofKs > stateTracker.getMaxLog10L())
|
||||
stateTracker.update(log10LofKs, ACset.getACcounts());
|
||||
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset);
|
||||
|
||||
// clean up memory
|
||||
indexesToACset.remove(ACset.getACcounts());
|
||||
|
|
@ -260,39 +252,32 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
* @param log10AlleleFrequencyPriors Prior object
|
||||
* @param originalPloidy Total ploidy of original combined pool
|
||||
* @param newGLPloidy Ploidy of GL vector
|
||||
* @param resultTracker AFResult object
|
||||
* @param stateTracker max likelihood observed so far
|
||||
* @param ACqueue Queue of conformations to compute
|
||||
* @param indexesToACset AC indices of objects in queue
|
||||
* @return max log likelihood
|
||||
*/
|
||||
private static double calculateACConformationAndUpdateQueue(final ExactACset set,
|
||||
final CombinedPoolLikelihoods newPool,
|
||||
final CombinedPoolLikelihoods originalPool,
|
||||
final double[] newGL,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final int originalPloidy,
|
||||
final int newGLPloidy,
|
||||
final AFCalcResultTracker resultTracker,
|
||||
final StateTracker stateTracker,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
||||
private double calculateACConformationAndUpdateQueue(final ExactACset set,
|
||||
final CombinedPoolLikelihoods newPool,
|
||||
final CombinedPoolLikelihoods originalPool,
|
||||
final double[] newGL,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final int originalPloidy,
|
||||
final int newGLPloidy,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
||||
|
||||
// compute likeihood in "set" of new set based on original likelihoods
|
||||
final int numAlleles = set.getACcounts().getCounts().length;
|
||||
final int newPloidy = set.getACsum();
|
||||
final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy, resultTracker);
|
||||
final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy);
|
||||
|
||||
|
||||
// add to new pool
|
||||
if (!Double.isInfinite(log10LofK))
|
||||
newPool.add(set);
|
||||
|
||||
// TODO -- uncomment this correct line when the implementation of this model is optimized (it's too slow now to handle this fix)
|
||||
//if ( log10LofK < stateTracker.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && stateTracker.isLowerAC(set.ACcounts) ) {
|
||||
if ( log10LofK < stateTracker.getMaxLog10L() - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||
if ( VERBOSE )
|
||||
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.getACcounts(), log10LofK, stateTracker.getMaxLog10L());
|
||||
// TODO -- change false to true this correct line when the implementation of this model is optimized (it's too slow now to handle this fix)
|
||||
if ( getStateTracker().abort(log10LofK, set.getACcounts(), false) ) {
|
||||
return log10LofK;
|
||||
}
|
||||
|
||||
|
|
@ -323,67 +308,67 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Naive combiner of two multiallelic pools - number of alt alleles must be the same.
|
||||
* Math is generalization of biallelic combiner.
|
||||
*
|
||||
* For vector K representing an allele count conformation,
|
||||
* Pr(D | AC = K) = Sum_G Pr(D|AC1 = G) Pr (D|AC2=K-G) * F(G,K)
|
||||
* where F(G,K) = choose(m1,[g0 g1 ...])*choose(m2,[...]) / choose(m1+m2,[k1 k2 ...])
|
||||
* @param originalPool First log-likelihood pool GL vector
|
||||
* @param yy Second pool GL vector
|
||||
* @param ploidy1 Ploidy of first pool (# of chromosomes in it)
|
||||
* @param ploidy2 Ploidy of second pool
|
||||
* @param numAlleles Number of alleles
|
||||
* @param log10AlleleFrequencyPriors Array of biallelic priors
|
||||
* @param resultTracker Af calculation result object
|
||||
*/
|
||||
public static void combineMultiallelicPoolNaively(CombinedPoolLikelihoods originalPool, double[] yy, int ploidy1, int ploidy2, int numAlleles,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AFCalcResultTracker resultTracker) {
|
||||
/*
|
||||
final int dim1 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy1);
|
||||
final int dim2 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy2);
|
||||
|
||||
if (dim1 != originalPool.getLength() || dim2 != yy.length)
|
||||
throw new ReviewedStingException("BUG: Inconsistent vector length");
|
||||
|
||||
if (ploidy2 == 0)
|
||||
return;
|
||||
|
||||
final int newPloidy = ploidy1 + ploidy2;
|
||||
|
||||
// Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
|
||||
// and L2(K) = Pr(D|AC2=K) * choose(m2,K)
|
||||
GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
|
||||
final double[] x = originalPool.getLikelihoodsAsVector(true);
|
||||
while(firstIterator.hasNext()) {
|
||||
x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
|
||||
firstIterator.next();
|
||||
}
|
||||
|
||||
GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||
final double[] y = yy.clone();
|
||||
while(secondIterator.hasNext()) {
|
||||
y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
|
||||
secondIterator.next();
|
||||
}
|
||||
|
||||
// initialize output to -log10(choose(m1+m2,[k1 k2...])
|
||||
final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
|
||||
final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
|
||||
|
||||
|
||||
// Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
|
||||
while(outputIterator.hasNext()) {
|
||||
final ExactACset set = new ExactACset(1, new ExactACcounts(outputIterator.getCurrentAltVector()));
|
||||
double likelihood = computeLofK(set, x,y, log10AlleleFrequencyPriors, numAlleles, ploidy1, ploidy2, result);
|
||||
|
||||
originalPool.add(likelihood, set, outputIterator.getLinearIndex());
|
||||
outputIterator.next();
|
||||
}
|
||||
*/
|
||||
}
|
||||
// /**
|
||||
// * Naive combiner of two multiallelic pools - number of alt alleles must be the same.
|
||||
// * Math is generalization of biallelic combiner.
|
||||
// *
|
||||
// * For vector K representing an allele count conformation,
|
||||
// * Pr(D | AC = K) = Sum_G Pr(D|AC1 = G) Pr (D|AC2=K-G) * F(G,K)
|
||||
// * where F(G,K) = choose(m1,[g0 g1 ...])*choose(m2,[...]) / choose(m1+m2,[k1 k2 ...])
|
||||
// * @param originalPool First log-likelihood pool GL vector
|
||||
// * @param yy Second pool GL vector
|
||||
// * @param ploidy1 Ploidy of first pool (# of chromosomes in it)
|
||||
// * @param ploidy2 Ploidy of second pool
|
||||
// * @param numAlleles Number of alleles
|
||||
// * @param log10AlleleFrequencyPriors Array of biallelic priors
|
||||
// * @param resultTracker Af calculation result object
|
||||
// */
|
||||
// public static void combineMultiallelicPoolNaively(CombinedPoolLikelihoods originalPool, double[] yy, int ploidy1, int ploidy2, int numAlleles,
|
||||
// final double[] log10AlleleFrequencyPriors,
|
||||
// final AFCalcResultTracker resultTracker) {
|
||||
///*
|
||||
// final int dim1 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy1);
|
||||
// final int dim2 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy2);
|
||||
//
|
||||
// if (dim1 != originalPool.getLength() || dim2 != yy.length)
|
||||
// throw new ReviewedStingException("BUG: Inconsistent vector length");
|
||||
//
|
||||
// if (ploidy2 == 0)
|
||||
// return;
|
||||
//
|
||||
// final int newPloidy = ploidy1 + ploidy2;
|
||||
//
|
||||
// // Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
|
||||
// // and L2(K) = Pr(D|AC2=K) * choose(m2,K)
|
||||
// GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
|
||||
// final double[] x = originalPool.getLikelihoodsAsVector(true);
|
||||
// while(firstIterator.hasNext()) {
|
||||
// x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
|
||||
// firstIterator.next();
|
||||
// }
|
||||
//
|
||||
// GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||
// final double[] y = yy.clone();
|
||||
// while(secondIterator.hasNext()) {
|
||||
// y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
|
||||
// secondIterator.next();
|
||||
// }
|
||||
//
|
||||
// // initialize output to -log10(choose(m1+m2,[k1 k2...])
|
||||
// final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
|
||||
// final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
|
||||
//
|
||||
//
|
||||
// // Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
|
||||
// while(outputIterator.hasNext()) {
|
||||
// final ExactACset set = new ExactACset(1, new ExactACcounts(outputIterator.getCurrentAltVector()));
|
||||
// double likelihood = computeLofK(set, x,y, log10AlleleFrequencyPriors, numAlleles, ploidy1, ploidy2, result);
|
||||
//
|
||||
// originalPool.add(likelihood, set, outputIterator.getLinearIndex());
|
||||
// outputIterator.next();
|
||||
// }
|
||||
//*/
|
||||
// }
|
||||
|
||||
/**
|
||||
* Compute likelihood of a particular AC conformation and update AFresult object
|
||||
|
|
@ -394,15 +379,13 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
* @param numAlleles Number of alleles (including ref)
|
||||
* @param ploidy1 Ploidy of original pool (combined)
|
||||
* @param ploidy2 Ploidy of new pool
|
||||
* @param resultTracker AFResult object
|
||||
* @return log-likehood of requested conformation
|
||||
*/
|
||||
private static double computeLofK(final ExactACset set,
|
||||
final CombinedPoolLikelihoods firstGLs,
|
||||
final double[] secondGL,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final int numAlleles, final int ploidy1, final int ploidy2,
|
||||
final AFCalcResultTracker resultTracker) {
|
||||
private double computeLofK(final ExactACset set,
|
||||
final CombinedPoolLikelihoods firstGLs,
|
||||
final double[] secondGL,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final int numAlleles, final int ploidy1, final int ploidy2) {
|
||||
|
||||
final int newPloidy = ploidy1 + ploidy2;
|
||||
|
||||
|
|
@ -420,8 +403,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX];
|
||||
set.getLog10Likelihoods()[0] = log10Lof0;
|
||||
|
||||
resultTracker.setLog10LikelihoodOfAFzero(log10Lof0);
|
||||
resultTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
||||
getStateTracker().setLog10LikelihoodOfAFzero(log10Lof0);
|
||||
getStateTracker().setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
||||
return log10Lof0;
|
||||
|
||||
} else {
|
||||
|
|
@ -464,14 +447,14 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
|
||||
// update the MLE if necessary
|
||||
final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length);
|
||||
resultTracker.updateMLEifNeeded(log10LofK, altCounts);
|
||||
getStateTracker().updateMLEifNeeded(Double.isInfinite(log10LofK) ? MathUtils.LOG10_P_OF_ZERO : log10LofK, altCounts);
|
||||
|
||||
// apply the priors over each alternate allele
|
||||
for (final int ACcount : altCounts ) {
|
||||
if ( ACcount > 0 )
|
||||
log10LofK += log10AlleleFrequencyPriors[ACcount];
|
||||
}
|
||||
resultTracker.updateMAPifNeeded(log10LofK, altCounts);
|
||||
getStateTracker().updateMAPifNeeded(Double.isInfinite(log10LofK) ? MathUtils.LOG10_P_OF_ZERO : log10LofK, altCounts);
|
||||
|
||||
return log10LofK;
|
||||
}
|
||||
|
|
@ -494,99 +477,6 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
return (sum == ploidy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Combines naively two biallelic pools (of arbitrary size).
|
||||
* For two pools of size m1 and m2, we can compute the combined likelihood as:
|
||||
* Pr(D|AC=k) = Sum_{j=0}^k Pr(D|AC1=j) Pr(D|AC2=k-j) * choose(m1,j)*choose(m2,k-j)/choose(m1+m2,k)
|
||||
* @param originalPool Pool likelihood vector, x[k] = Pr(AC_i = k) for alt allele i
|
||||
* @param newPLVector Second GL vector
|
||||
* @param ploidy1 Ploidy of first pool (# of chromosomes in it)
|
||||
* @param ploidy2 Ploidy of second pool
|
||||
* @param log10AlleleFrequencyPriors Array of biallelic priors
|
||||
* @param resultTracker Af calculation result object
|
||||
* @return Combined likelihood vector
|
||||
*/
|
||||
public static ProbabilityVector combineBiallelicPoolsNaively(final ProbabilityVector originalPool, final double[] newPLVector,
|
||||
final int ploidy1, final int ploidy2, final double[] log10AlleleFrequencyPriors,
|
||||
final AFCalcResultTracker resultTracker) {
|
||||
|
||||
final int newPloidy = ploidy1 + ploidy2;
|
||||
|
||||
final double[] combinedLikelihoods = new double[1+newPloidy];
|
||||
|
||||
/** Pre-fill result array and incorporate weights into input vectors
|
||||
* Say L1(k) = Pr(D|AC1=k) * choose(m1,k)
|
||||
* and L2(k) = Pr(D|AC2=k) * choose(m2,k)
|
||||
* equation reduces to
|
||||
* Pr(D|AC=k) = 1/choose(m1+m2,k) * Sum_{j=0}^k L1(k) L2(k-j)
|
||||
* which is just plain convolution of L1 and L2 (with pre-existing vector)
|
||||
*/
|
||||
|
||||
// intialize result vector to -infinity
|
||||
Arrays.fill(combinedLikelihoods,Double.NEGATIVE_INFINITY);
|
||||
|
||||
final double[] x = Arrays.copyOf(originalPool.getProbabilityVector(),1+ploidy1);
|
||||
for (int k=originalPool.getProbabilityVector().length; k< x.length; k++)
|
||||
x[k] = Double.NEGATIVE_INFINITY;
|
||||
|
||||
final double[] y = newPLVector.clone();
|
||||
|
||||
|
||||
final double log10Lof0 = x[0]+y[0];
|
||||
resultTracker.setLog10LikelihoodOfAFzero(log10Lof0);
|
||||
resultTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
||||
|
||||
double maxElement = log10Lof0;
|
||||
int maxElementIdx = 0;
|
||||
int[] alleleCounts = new int[1];
|
||||
for (int k= originalPool.getMinVal() ; k <= newPloidy; k++) {
|
||||
double[] acc = new double[k+1];
|
||||
Arrays.fill(acc,Double.NEGATIVE_INFINITY);
|
||||
double innerMax = Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (int j=0; j <=k; j++) {
|
||||
double x1,y1;
|
||||
|
||||
|
||||
if (k-j>=0 && k-j < y.length)
|
||||
y1 = y[k-j] + MathUtils.log10BinomialCoefficient(ploidy2,k-j);
|
||||
else
|
||||
continue;
|
||||
|
||||
if (j < x.length)
|
||||
x1 = x[j] + MathUtils.log10BinomialCoefficient(ploidy1,j);
|
||||
else
|
||||
continue;
|
||||
|
||||
if (Double.isInfinite(x1) || Double.isInfinite(y1))
|
||||
continue;
|
||||
acc[j] = x1 + y1;
|
||||
if (acc[j] > innerMax)
|
||||
innerMax = acc[j];
|
||||
else if (acc[j] < innerMax - MAX_LOG10_ERROR_TO_STOP_EARLY)
|
||||
break;
|
||||
}
|
||||
combinedLikelihoods[k] = MathUtils.log10sumLog10(acc) - MathUtils.log10BinomialCoefficient(newPloidy,k);
|
||||
maxElementIdx = k;
|
||||
double maxDiff = combinedLikelihoods[k] - maxElement;
|
||||
if (maxDiff > 0)
|
||||
maxElement = combinedLikelihoods[k];
|
||||
else if (maxDiff < maxElement - MAX_LOG10_ERROR_TO_STOP_EARLY) {
|
||||
break;
|
||||
}
|
||||
|
||||
alleleCounts[0] = k;
|
||||
resultTracker.updateMLEifNeeded(combinedLikelihoods[k],alleleCounts);
|
||||
resultTracker.updateMAPifNeeded(combinedLikelihoods[k] + log10AlleleFrequencyPriors[k],alleleCounts);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
return new ProbabilityVector(MathUtils.normalizeFromLog10(Arrays.copyOf(combinedLikelihoods,maxElementIdx+1),false, true));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* From a given variant context, extract a given subset of alleles, and update genotype context accordingly,
|
||||
* including updating the PL's, and assign genotypes accordingly
|
||||
|
|
@ -675,10 +565,10 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
*
|
||||
* @return genotype
|
||||
*/
|
||||
private static void assignGenotype(final GenotypeBuilder gb,
|
||||
final double[] newLikelihoods,
|
||||
final List<Allele> allelesToUse,
|
||||
final int numChromosomes) {
|
||||
private void assignGenotype(final GenotypeBuilder gb,
|
||||
final double[] newLikelihoods,
|
||||
final List<Allele> allelesToUse,
|
||||
final int numChromosomes) {
|
||||
final int numNewAltAlleles = allelesToUse.size() - 1;
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -137,18 +137,15 @@ public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
|
|||
|
||||
@Test(dataProvider = "getGLs")
|
||||
public void testGLs(GetGLsTest cfg) {
|
||||
|
||||
final AFCalcResultTracker resultTracker = new AFCalcResultTracker(cfg.numAltAlleles);
|
||||
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
|
||||
double[] priors = new double[len]; // flat priors
|
||||
|
||||
GeneralPloidyExactAFCalc.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, resultTracker);
|
||||
final GeneralPloidyExactAFCalc calc = new GeneralPloidyExactAFCalc(cfg.GLs.size(), 1 + cfg.numAltAlleles, 1 + cfg.numAltAlleles, cfg.ploidy);
|
||||
calc.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors);
|
||||
int nameIndex = 1;
|
||||
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
|
||||
int calculatedAlleleCount = resultTracker.getAlleleCountsOfMAP()[allele];
|
||||
|
||||
// System.out.format( "%s Expected:%d Calc:%d\n",cfg.toString(),expectedAlleleCount, calculatedAlleleCount);
|
||||
int calculatedAlleleCount = calc.getStateTracker().getAlleleCountsOfMAP()[allele];
|
||||
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ public abstract class AFCalc implements Cloneable {
|
|||
protected Logger logger = defaultLogger;
|
||||
|
||||
private SimpleTimer callTimer = new SimpleTimer();
|
||||
private final AFCalcResultTracker resultTracker;
|
||||
private final StateTracker stateTracker;
|
||||
private ExactCallLogger exactCallLogger = null;
|
||||
|
||||
protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
||||
|
|
@ -62,7 +62,7 @@ public abstract class AFCalc implements Cloneable {
|
|||
this.nSamples = nSamples;
|
||||
this.maxAlternateAllelesToGenotype = maxAltAlleles;
|
||||
this.maxAlternateAllelesForIndels = maxAltAllelesForIndels;
|
||||
this.resultTracker = new AFCalcResultTracker(Math.max(maxAltAlleles, maxAltAllelesForIndels));
|
||||
this.stateTracker = new StateTracker(Math.max(maxAltAlleles, maxAltAllelesForIndels));
|
||||
}
|
||||
|
||||
public void enableProcessLog(final File exactCallsLog) {
|
||||
|
|
@ -83,10 +83,10 @@ public abstract class AFCalc implements Cloneable {
|
|||
public AFCalcResult getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) {
|
||||
if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null");
|
||||
if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null");
|
||||
if ( resultTracker == null ) throw new IllegalArgumentException("Results object cannot be null");
|
||||
if ( stateTracker == null ) throw new IllegalArgumentException("Results object cannot be null");
|
||||
|
||||
// reset the result, so we can store our new result there
|
||||
resultTracker.reset();
|
||||
stateTracker.reset();
|
||||
|
||||
final VariantContext vcWorking = reduceScope(vc);
|
||||
|
||||
|
|
@ -100,10 +100,20 @@ public abstract class AFCalc implements Cloneable {
|
|||
return result;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
protected AFCalcResult resultFromTracker(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors) {
|
||||
resultTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles());
|
||||
return resultTracker.toAFCalcResult(log10AlleleFrequencyPriors);
|
||||
/**
|
||||
* Convert the final state of the state tracker into our result as an AFCalcResult
|
||||
*
|
||||
* Assumes that stateTracker has been updated accordingly
|
||||
*
|
||||
* @param vcWorking the VariantContext we actually used as input to the calc model (after reduction)
|
||||
* @param log10AlleleFrequencyPriors the priors by AC vector
|
||||
* @return a AFCalcResult describing the result of this calculation
|
||||
*/
|
||||
@Requires("stateTracker.getnEvaluations() > 0")
|
||||
@Ensures("result != null")
|
||||
protected AFCalcResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors) {
|
||||
stateTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles());
|
||||
return stateTracker.toAFCalcResult(log10AlleleFrequencyPriors);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -162,8 +172,8 @@ public abstract class AFCalc implements Cloneable {
|
|||
return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels);
|
||||
}
|
||||
|
||||
public AFCalcResultTracker getResultTracker() {
|
||||
return resultTracker;
|
||||
public StateTracker getStateTracker() {
|
||||
return stateTracker;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -83,8 +83,8 @@ public class AFCalcResult {
|
|||
if ( log10pNonRefByAllele == null ) throw new IllegalArgumentException("log10pNonRefByAllele cannot be null");
|
||||
if ( log10pNonRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pNonRefByAllele has the wrong number of elements: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
|
||||
if ( ! allelesUsedInGenotyping.containsAll(log10pNonRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pNonRefByAllele doesn't contain all of the alleles used in genotyping: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
|
||||
if ( ! goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC));
|
||||
if ( ! goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC));
|
||||
if ( ! MathUtils.goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC));
|
||||
if ( ! MathUtils.goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC));
|
||||
|
||||
this.alleleCountsOfMLE = alleleCountsOfMLE;
|
||||
this.nEvaluations = nEvaluations;
|
||||
|
|
@ -147,7 +147,7 @@ public class AFCalcResult {
|
|||
* Due to computational / implementation constraints this may be smaller than
|
||||
* the actual list of alleles requested
|
||||
*
|
||||
* @return a non-empty list of alleles used during genotyping
|
||||
* @return a non-empty list of alleles used during genotyping, the first of which is the reference allele
|
||||
*/
|
||||
@Ensures({"result != null", "! result.isEmpty()"})
|
||||
public List<Allele> getAllelesUsedInGenotyping() {
|
||||
|
|
@ -159,7 +159,7 @@ public class AFCalcResult {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures({"goodLog10Probability(result)"})
|
||||
@Ensures({"MathUtils.goodLog10Probability(result)"})
|
||||
public double getLog10PosteriorOfAFEq0() {
|
||||
return log10PosteriorsOfAC[AF0];
|
||||
}
|
||||
|
|
@ -169,7 +169,7 @@ public class AFCalcResult {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures({"goodLog10Probability(result)"})
|
||||
@Ensures({"MathUtils.goodLog10Probability(result)"})
|
||||
public double getLog10PosteriorOfAFGT0() {
|
||||
return log10PosteriorsOfAC[AF1p];
|
||||
}
|
||||
|
|
@ -179,7 +179,7 @@ public class AFCalcResult {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures({"goodLog10Probability(result)"})
|
||||
@Ensures({"MathUtils.goodLog10Probability(result)"})
|
||||
public double getLog10LikelihoodOfAFEq0() {
|
||||
return log10LikelihoodsOfAC[AF0];
|
||||
}
|
||||
|
|
@ -189,7 +189,7 @@ public class AFCalcResult {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures({"goodLog10Probability(result)"})
|
||||
@Ensures({"MathUtils.goodLog10Probability(result)"})
|
||||
public double getLog10LikelihoodOfAFGT0() {
|
||||
return log10LikelihoodsOfAC[AF1p];
|
||||
}
|
||||
|
|
@ -199,7 +199,7 @@ public class AFCalcResult {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures({"goodLog10Probability(result)"})
|
||||
@Ensures({"MathUtils.goodLog10Probability(result)"})
|
||||
public double getLog10PriorOfAFEq0() {
|
||||
return log10PriorsOfAC[AF0];
|
||||
}
|
||||
|
|
@ -209,7 +209,7 @@ public class AFCalcResult {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures({"goodLog10Probability(result)"})
|
||||
@Ensures({"MathUtils.goodLog10Probability(result)"})
|
||||
public double getLog10PriorOfAFGT0() {
|
||||
return log10PriorsOfAC[AF1p];
|
||||
}
|
||||
|
|
@ -263,7 +263,7 @@ public class AFCalcResult {
|
|||
* @param allele the allele we're interested in, must be in getAllelesUsedInGenotyping
|
||||
* @return the log10 probability that allele is segregating at this site
|
||||
*/
|
||||
@Ensures("goodLog10Probability(result)")
|
||||
@Ensures("MathUtils.goodLog10Probability(result)")
|
||||
public double getLog10PosteriorOfAFGt0ForAllele(final Allele allele) {
|
||||
final Double log10pNonRef = log10pNonRefByAllele.get(allele);
|
||||
if ( log10pNonRef == null ) throw new IllegalArgumentException("Unknown allele " + allele);
|
||||
|
|
@ -279,7 +279,7 @@ public class AFCalcResult {
|
|||
* @return freshly allocated log10 normalized posteriors vector
|
||||
*/
|
||||
@Requires("log10LikelihoodsOfAC.length == log10PriorsOfAC.length")
|
||||
@Ensures("goodLog10ProbVector(result, LOG_10_ARRAY_SIZES, true)")
|
||||
@Ensures("MathUtils.goodLog10ProbVector(result, LOG_10_ARRAY_SIZES, true)")
|
||||
private static double[] computePosteriors(final double[] log10LikelihoodsOfAC, final double[] log10PriorsOfAC) {
|
||||
final double[] log10UnnormalizedPosteriors = new double[log10LikelihoodsOfAC.length];
|
||||
for ( int i = 0; i < log10LikelihoodsOfAC.length; i++ )
|
||||
|
|
@ -287,29 +287,6 @@ public class AFCalcResult {
|
|||
return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the log10 prob vector vector is well formed
|
||||
*
|
||||
* @param vector
|
||||
* @param expectedSize
|
||||
* @param shouldSumToOne
|
||||
*
|
||||
* @return true if vector is well-formed, false otherwise
|
||||
*/
|
||||
private static boolean goodLog10ProbVector(final double[] vector, final int expectedSize, final boolean shouldSumToOne) {
|
||||
if ( vector.length != expectedSize ) return false;
|
||||
|
||||
for ( final double pr : vector ) {
|
||||
if ( ! goodLog10Probability(pr) )
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( shouldSumToOne && MathUtils.compareDoubles(MathUtils.sumLog10(vector), 1.0, 1e-4) != 0 )
|
||||
return false;
|
||||
|
||||
return true; // everything is good
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the offset into linear vectors indexed by alt allele for allele
|
||||
*
|
||||
|
|
@ -331,14 +308,4 @@ public class AFCalcResult {
|
|||
else
|
||||
return index - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that the result is a well-formed log10 probability
|
||||
*
|
||||
* @param result a supposedly well-formed log10 probability value
|
||||
* @return true if result is really well formed
|
||||
*/
|
||||
private static boolean goodLog10Probability(final double result) {
|
||||
return result <= 0.0 && ! Double.isInfinite(result) && ! Double.isNaN(result);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,256 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: ebanks
|
||||
* Date: Dec 14, 2011
|
||||
*
|
||||
* Useful helper class to communicate the results of the allele frequency calculation
|
||||
*
|
||||
* TODO -- WHAT IS THE CONTRACT ON MAP AC AND P NON REF?
|
||||
*/
|
||||
class AFCalcResultTracker {
|
||||
protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
|
||||
|
||||
// These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles
|
||||
protected double log10MLE;
|
||||
protected double log10MAP;
|
||||
private final int[] alleleCountsOfMLE;
|
||||
private final int[] alleleCountsOfMAP;
|
||||
|
||||
// The posteriors seen, not including that of AF=0
|
||||
private static final int LIKELIHOODS_CACHE_SIZE = 5000;
|
||||
private final double[] log10LikelihoodsMatrixValues = new double[LIKELIHOODS_CACHE_SIZE];
|
||||
private int currentLikelihoodsCacheIndex = 0;
|
||||
protected Double log10LikelihoodsMatrixSum = null;
|
||||
|
||||
// These variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
|
||||
private double log10LikelihoodOfAFzero;
|
||||
private double log10PosteriorOfAFzero;
|
||||
private int[] AClimits;
|
||||
|
||||
int nEvaluations = 0;
|
||||
|
||||
/**
|
||||
* The list of alleles actually used in computing the AF
|
||||
*/
|
||||
private List<Allele> allelesUsedInGenotyping = null;
|
||||
|
||||
/**
|
||||
* Create a results object capability of storing results for calls with up to maxAltAlleles
|
||||
*
|
||||
* @param maxAltAlleles an integer >= 1
|
||||
*/
|
||||
public AFCalcResultTracker(final int maxAltAlleles) {
|
||||
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles);
|
||||
|
||||
alleleCountsOfMLE = new int[maxAltAlleles];
|
||||
alleleCountsOfMAP = new int[maxAltAlleles];
|
||||
|
||||
reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a vector with maxAltAlleles values containing AC values at the MLE
|
||||
*
|
||||
* The values of the ACs for this call are stored in the getAllelesUsedInGenotyping order,
|
||||
* starting from index 0 (i.e., the first alt allele is at 0). The vector is always
|
||||
* maxAltAlleles in length, and so only the first getAllelesUsedInGenotyping.size() - 1 values
|
||||
* are meaningful.
|
||||
*
|
||||
* @return a vector with allele counts, not all of which may be meaningful
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public int[] getAlleleCountsOfMLE() {
|
||||
return alleleCountsOfMLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a vector with maxAltAlleles values containing AC values at the MAP
|
||||
*
|
||||
* @see #getAlleleCountsOfMLE() for the encoding of results in this vector
|
||||
*
|
||||
* @return a non-null vector of ints
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public int[] getAlleleCountsOfMAP() {
|
||||
return alleleCountsOfMAP;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the likelihoods summed across all AC values for AC > 0
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double getLog10LikelihoodOfAFNotZero() {
|
||||
if ( log10LikelihoodsMatrixSum == null ) {
|
||||
if ( currentLikelihoodsCacheIndex == 0 ) // there's nothing to sum up, so make the sum equal to the smallest thing we have
|
||||
log10LikelihoodsMatrixSum = MathUtils.LOG10_P_OF_ZERO;
|
||||
else
|
||||
log10LikelihoodsMatrixSum = MathUtils.log10sumLog10(log10LikelihoodsMatrixValues, 0, currentLikelihoodsCacheIndex);
|
||||
}
|
||||
return log10LikelihoodsMatrixSum;
|
||||
}
|
||||
|
||||
public double getLog10LikelihoodOfAFNotZero(final boolean capAt0) {
|
||||
return Math.min(getLog10LikelihoodOfAFNotZero(), capAt0 ? 0.0 : Double.POSITIVE_INFINITY);
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO -- eric what is this supposed to return? my unit tests don't do what I think they should
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double getLog10LikelihoodOfAFzero() {
|
||||
return log10LikelihoodOfAFzero;
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO -- eric what is this supposed to return? my unit tests don't do what I think they should
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double getLog10PosteriorOfAFzero() {
|
||||
return log10PosteriorOfAFzero;
|
||||
}
|
||||
|
||||
protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) {
|
||||
final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size() - 1);
|
||||
final double[] log10Likelihoods = new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero(true)};
|
||||
final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
|
||||
|
||||
// TODO -- replace with more meaningful computation
|
||||
// TODO -- refactor this calculation into the ref calculation
|
||||
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
|
||||
for ( int i = 0; i < subACOfMLE.length; i++ ) {
|
||||
final Allele allele = allelesUsedInGenotyping.get(i+1);
|
||||
final double log10PNonRef = getAlleleCountsOfMAP()[i] > 0 ? 0 : -10000; // TODO -- a total hack but in effect what the old behavior was
|
||||
log10pNonRefByAllele.put(allele, log10PNonRef);
|
||||
}
|
||||
|
||||
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pNonRefByAllele);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Protected mutational methods only for use within the calculation models themselves
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Reset the data in this results object, so that it can be used in a subsequent AF calculation
|
||||
*
|
||||
* Resetting of the data is done by the calculation model itself, so shouldn't be done by callers any longer
|
||||
*/
|
||||
protected void reset() {
|
||||
log10MLE = log10MAP = log10LikelihoodOfAFzero = log10PosteriorOfAFzero = VALUE_NOT_CALCULATED;
|
||||
for ( int i = 0; i < alleleCountsOfMLE.length; i++ ) {
|
||||
alleleCountsOfMLE[i] = 0;
|
||||
alleleCountsOfMAP[i] = 0;
|
||||
}
|
||||
currentLikelihoodsCacheIndex = 0;
|
||||
log10LikelihoodsMatrixSum = null;
|
||||
allelesUsedInGenotyping = null;
|
||||
nEvaluations = 0;
|
||||
Arrays.fill(log10LikelihoodsMatrixValues, Double.POSITIVE_INFINITY);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tell this result we used one more evaluation cycle
|
||||
*/
|
||||
protected void incNEvaluations() {
|
||||
nEvaluations++;
|
||||
}
|
||||
|
||||
protected void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
||||
addToLikelihoodsCache(log10LofK);
|
||||
|
||||
if ( log10LofK > log10MLE ) {
|
||||
log10MLE = log10LofK;
|
||||
for ( int i = 0; i < alleleCountsForK.length; i++ )
|
||||
alleleCountsOfMLE[i] = alleleCountsForK[i];
|
||||
}
|
||||
}
|
||||
|
||||
protected void updateMAPifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
||||
if ( log10LofK > log10MAP ) {
|
||||
log10MAP = log10LofK;
|
||||
for ( int i = 0; i < alleleCountsForK.length; i++ )
|
||||
alleleCountsOfMAP[i] = alleleCountsForK[i];
|
||||
}
|
||||
}
|
||||
|
||||
private void addToLikelihoodsCache(final double log10LofK) {
|
||||
// add to the cache
|
||||
log10LikelihoodsMatrixValues[currentLikelihoodsCacheIndex++] = log10LofK;
|
||||
|
||||
// if we've filled up the cache, then condense by summing up all of the values and placing the sum back into the first cell
|
||||
if ( currentLikelihoodsCacheIndex == LIKELIHOODS_CACHE_SIZE) {
|
||||
final double temporarySum = MathUtils.log10sumLog10(log10LikelihoodsMatrixValues, 0, currentLikelihoodsCacheIndex);
|
||||
Arrays.fill(log10LikelihoodsMatrixValues, Double.POSITIVE_INFINITY);
|
||||
log10LikelihoodsMatrixValues[0] = temporarySum;
|
||||
currentLikelihoodsCacheIndex = 1;
|
||||
}
|
||||
}
|
||||
|
||||
protected void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) {
|
||||
this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero;
|
||||
if ( log10LikelihoodOfAFzero > log10MLE ) {
|
||||
log10MLE = log10LikelihoodOfAFzero;
|
||||
Arrays.fill(alleleCountsOfMLE, 0);
|
||||
}
|
||||
}
|
||||
|
||||
protected void setLog10PosteriorOfAFzero(final double log10PosteriorOfAFzero) {
|
||||
this.log10PosteriorOfAFzero = log10PosteriorOfAFzero;
|
||||
if ( log10PosteriorOfAFzero > log10MAP ) {
|
||||
log10MAP = log10PosteriorOfAFzero;
|
||||
Arrays.fill(alleleCountsOfMAP, 0);
|
||||
}
|
||||
}
|
||||
|
||||
protected void setAllelesUsedInGenotyping(List<Allele> allelesUsedInGenotyping) {
|
||||
if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.isEmpty() )
|
||||
throw new IllegalArgumentException("allelesUsedInGenotyping cannot be null or empty");
|
||||
|
||||
this.allelesUsedInGenotyping = allelesUsedInGenotyping;
|
||||
}
|
||||
|
||||
protected void setAClimits(int[] AClimits) {
|
||||
this.AClimits = AClimits;
|
||||
}
|
||||
}
|
||||
|
|
@ -36,10 +36,6 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
|
||||
}
|
||||
|
||||
protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) {
|
||||
return new StateTracker();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AFCalcResult computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
|
|
@ -60,29 +56,21 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
ACqueue.add(zeroSet);
|
||||
indexesToACset.put(zeroSet.getACcounts(), zeroSet);
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
final StateTracker stateTracker = makeMaxLikelihood(vc, getResultTracker());
|
||||
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
getResultTracker().incNEvaluations(); // keep track of the number of evaluations
|
||||
getStateTracker().incNEvaluations(); // keep track of the number of evaluations
|
||||
|
||||
// compute log10Likelihoods
|
||||
final ExactACset set = ACqueue.remove();
|
||||
|
||||
if ( stateTracker.withinMaxACs(set.getACcounts()) ) {
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, stateTracker, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, getResultTracker());
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
stateTracker.update(log10LofKs, set.getACcounts());
|
||||
|
||||
// clean up memory
|
||||
indexesToACset.remove(set.getACcounts());
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
|
||||
}
|
||||
// clean up memory
|
||||
indexesToACset.remove(set.getACcounts());
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
|
||||
}
|
||||
|
||||
return resultFromTracker(vc, log10AlleleFrequencyPriors);
|
||||
return getResultFromFinalState(vc, log10AlleleFrequencyPriors);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -153,23 +141,21 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
|
||||
private double calculateAlleleCountConformation(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final StateTracker stateTracker,
|
||||
final int numChr,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AFCalcResultTracker resultTracker) {
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
|
||||
|
||||
// compute the log10Likelihoods
|
||||
computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, resultTracker);
|
||||
computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors);
|
||||
|
||||
final double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
|
||||
|
||||
// can we abort early because the log10Likelihoods are so small?
|
||||
if ( stateTracker.abort(log10LofK, set.getACcounts()) ) {
|
||||
if ( getStateTracker().abort(log10LofK, set.getACcounts(), true) ) {
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||
return log10LofK;
|
||||
|
|
@ -188,7 +174,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
ACcountsClone[allele]++;
|
||||
// to get to this conformation, a sample would need to be AB (remember that ref=0)
|
||||
final int PLindex = GenotypeLikelihoods.calculatePLindex(0, allele+1);
|
||||
updateACset(stateTracker, ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
updateACset(ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
|
||||
|
|
@ -213,9 +199,9 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
|
||||
// IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering
|
||||
for ( DependentSet dependent : differentAlleles )
|
||||
updateACset(stateTracker, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
for ( DependentSet dependent : sameAlleles )
|
||||
updateACset(stateTracker, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
return log10LofK;
|
||||
|
|
@ -223,8 +209,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
|
||||
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
|
||||
// also pushes its value to the given callingSetIndex.
|
||||
private void updateACset(final StateTracker stateTracker,
|
||||
final int[] newSetCounts,
|
||||
private void updateACset(final int[] newSetCounts,
|
||||
final int numChr,
|
||||
final ExactACset dependentSet,
|
||||
final int PLsetIndex,
|
||||
|
|
@ -246,8 +231,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
|
||||
private void computeLofK(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AFCalcResultTracker resultTracker) {
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
|
||||
set.getLog10Likelihoods()[0] = 0.0; // the zero case
|
||||
final int totalK = set.getACsum();
|
||||
|
|
@ -258,8 +242,8 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
|
||||
|
||||
final double log10Lof0 = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
|
||||
resultTracker.setLog10LikelihoodOfAFzero(log10Lof0);
|
||||
resultTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
||||
getStateTracker().setLog10LikelihoodOfAFzero(log10Lof0);
|
||||
getStateTracker().setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -281,14 +265,15 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
|
||||
|
||||
// update the MLE if necessary
|
||||
resultTracker.updateMLEifNeeded(log10LofK, set.getACcounts().getCounts());
|
||||
getStateTracker().updateMLEifNeeded(log10LofK, set.getACcounts().getCounts());
|
||||
|
||||
// apply the priors over each alternate allele
|
||||
for ( final int ACcount : set.getACcounts().getCounts() ) {
|
||||
if ( ACcount > 0 )
|
||||
log10LofK += log10AlleleFrequencyPriors[ACcount];
|
||||
}
|
||||
resultTracker.updateMAPifNeeded(log10LofK, set.getACcounts().getCounts());
|
||||
|
||||
getStateTracker().updateMAPifNeeded(log10LofK, set.getACcounts().getCounts());
|
||||
}
|
||||
|
||||
private void pushData(final ExactACset targetSet,
|
||||
|
|
|
|||
|
|
@ -16,10 +16,6 @@ public class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
|
|||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
||||
}
|
||||
|
||||
protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) {
|
||||
return new StateTracker();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AFCalcResult computeLog10PNonRef(VariantContext vc, double[] log10AlleleFrequencyPriors) {
|
||||
final double[] log10AlleleFrequencyLikelihoods = new double[log10AlleleFrequencyPriors.length];
|
||||
|
|
|
|||
|
|
@ -1,35 +1,85 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Keeps track of the best state seen by the exact model and the max states to visit
|
||||
* allowing us to abort the search before we visit the entire matrix of AC x samples
|
||||
* Keeps track of the state information during the exact model AF calculation.
|
||||
*
|
||||
* Tracks things like the MLE and MAP AC values, their corresponding likelhood and posterior
|
||||
* values, the likelihood of the AF == 0 state, and the number of evaluations needed
|
||||
* by the calculation to compute the P(AF == 0)
|
||||
*/
|
||||
final class StateTracker {
|
||||
public final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
|
||||
final private int[] maxACsToConsider;
|
||||
|
||||
private ExactACcounts ACsAtMax = null;
|
||||
private double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
|
||||
public StateTracker() {
|
||||
this(null);
|
||||
}
|
||||
|
||||
public StateTracker(final int[] maxACsToConsider) {
|
||||
this.maxACsToConsider = maxACsToConsider;
|
||||
}
|
||||
protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
|
||||
protected final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
|
||||
/**
|
||||
* Update the maximum log10L seen, if log10LofKs is higher, and the corresponding ACs of this state
|
||||
*
|
||||
* @param log10LofKs the likelihood of our current configuration state
|
||||
* These variables are intended to contain the MLE and MAP (and their corresponding allele counts)
|
||||
* of the site over all alternate alleles
|
||||
*/
|
||||
public void update(final double log10LofKs, final ExactACcounts ACs) {
|
||||
if ( log10LofKs > getMaxLog10L()) {
|
||||
this.setMaxLog10L(log10LofKs);
|
||||
this.ACsAtMax = ACs;
|
||||
}
|
||||
protected double log10MLE;
|
||||
protected double log10MAP;
|
||||
|
||||
/**
|
||||
* Returns a vector with maxAltAlleles values containing AC values at the MLE
|
||||
*
|
||||
* The values of the ACs for this call are stored in the getAllelesUsedInGenotyping order,
|
||||
* starting from index 0 (i.e., the first alt allele is at 0). The vector is always
|
||||
* maxAltAlleles in length, and so only the first getAllelesUsedInGenotyping.size() - 1 values
|
||||
* are meaningful.
|
||||
*/
|
||||
private final int[] alleleCountsOfMLE;
|
||||
private final int[] alleleCountsOfMAP;
|
||||
|
||||
/**
|
||||
* A vector of log10 likelihood values seen, for future summation. When the size of the
|
||||
* vector is exceeed -- because we've pushed more posteriors than there's space to hold
|
||||
* -- we simply sum up the existing values, make that the first value, and continue.
|
||||
*/
|
||||
private final double[] log10LikelihoodsForAFGt0 = new double[LIKELIHOODS_CACHE_SIZE];
|
||||
private static final int LIKELIHOODS_CACHE_SIZE = 5000;
|
||||
private int log10LikelihoodsForAFGt0CacheIndex = 0;
|
||||
|
||||
/**
|
||||
* The actual sum of the likelihoods. Null if the sum hasn't been computed yet
|
||||
*/
|
||||
protected Double log10LikelihoodsForAFGt0Sum = null;
|
||||
|
||||
/**
|
||||
* Contains the likelihood for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
|
||||
*/
|
||||
private double log10LikelihoodOfAFzero = 0.0;
|
||||
|
||||
/**
|
||||
* The number of evaluates we've gone through in the AFCalc
|
||||
*/
|
||||
private int nEvaluations = 0;
|
||||
|
||||
/**
|
||||
* The list of alleles actually used in computing the AF
|
||||
*/
|
||||
private List<Allele> allelesUsedInGenotyping = null;
|
||||
|
||||
/**
|
||||
* Create a results object capability of storing results for calls with up to maxAltAlleles
|
||||
*
|
||||
* @param maxAltAlleles an integer >= 1
|
||||
*/
|
||||
public StateTracker(final int maxAltAlleles) {
|
||||
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles);
|
||||
|
||||
alleleCountsOfMLE = new int[maxAltAlleles];
|
||||
alleleCountsOfMAP = new int[maxAltAlleles];
|
||||
|
||||
reset();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -39,58 +89,200 @@ final class StateTracker {
|
|||
* @param log10LofK the log10 likelihood of the configuration we're considering analyzing
|
||||
* @return true if the configuration cannot meaningfully contribute to our likelihood sum
|
||||
*/
|
||||
public boolean tooLowLikelihood(final double log10LofK) {
|
||||
return log10LofK < getMaxLog10L() - MAX_LOG10_ERROR_TO_STOP_EARLY;
|
||||
private boolean tooLowLikelihood(final double log10LofK) {
|
||||
return log10LofK < log10MLE - MAX_LOG10_ERROR_TO_STOP_EARLY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Are all ACs in otherACs less than or equal to their corresponding ACs in the maxACsToConsider?
|
||||
* @return true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||
*/
|
||||
private boolean isLowerAC(final ExactACcounts otherACs) {
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < otherACcounts.length; i++ ) {
|
||||
if ( alleleCountsOfMLE[i] > otherACcounts[i] )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Should we stop exploring paths from ACs, given it's log10LofK
|
||||
*
|
||||
* @param otherACs the set of otherACs that we want to know if we should consider analyzing
|
||||
* @return true if otherACs is a state worth considering, or false otherwise
|
||||
* @param log10LofK the log10LofK of these ACs
|
||||
* @param ACs the ACs of this state
|
||||
* @return return true if there's no reason to continue with subpaths of AC, or false otherwise
|
||||
*/
|
||||
public boolean withinMaxACs(final ExactACcounts otherACs) {
|
||||
if ( maxACsToConsider == null )
|
||||
return true;
|
||||
protected boolean abort( final double log10LofK, final ExactACcounts ACs, final boolean enforceLowerACs ) {
|
||||
return tooLowLikelihood(log10LofK) && (!enforceLowerACs || isLowerAC(ACs));
|
||||
}
|
||||
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
@Ensures("result != null")
|
||||
protected int[] getAlleleCountsOfMAP() {
|
||||
return alleleCountsOfMAP;
|
||||
}
|
||||
|
||||
for ( int i = 0; i < maxACsToConsider.length; i++ ) {
|
||||
// consider one more than the max AC to collect a bit more likelihood mass
|
||||
if ( otherACcounts[i] > maxACsToConsider[i] + 1 )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
@Ensures("result >= 0")
|
||||
protected int getnEvaluations() {
|
||||
return nEvaluations;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||
* Returns the likelihoods summed across all AC values for AC > 0
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean isLowerAC(final ExactACcounts otherACs) {
|
||||
if ( ACsAtMax == null )
|
||||
return true;
|
||||
private double getLog10LikelihoodOfAFNotZero(final boolean capAt0) {
|
||||
if ( log10LikelihoodsForAFGt0Sum == null ) {
|
||||
if ( log10LikelihoodsForAFGt0CacheIndex == 0 ) // there's nothing to sum up, so make the sum equal to the smallest thing we have
|
||||
log10LikelihoodsForAFGt0Sum = MathUtils.LOG10_P_OF_ZERO;
|
||||
else
|
||||
log10LikelihoodsForAFGt0Sum = MathUtils.log10sumLog10(log10LikelihoodsForAFGt0, 0, log10LikelihoodsForAFGt0CacheIndex);
|
||||
}
|
||||
return Math.min(log10LikelihoodsForAFGt0Sum, capAt0 ? 0.0 : Double.POSITIVE_INFINITY);
|
||||
}
|
||||
|
||||
final int[] myACcounts = this.ACsAtMax.getCounts();
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
private double getLog10LikelihoodOfAFzero() {
|
||||
return log10LikelihoodOfAFzero;
|
||||
}
|
||||
|
||||
for ( int i = 0; i < myACcounts.length; i++ ) {
|
||||
if ( myACcounts[i] > otherACcounts[i] )
|
||||
return false;
|
||||
/**
|
||||
* Convert this state to an corresponding AFCalcResult.
|
||||
*
|
||||
* Assumes that the values in this state have been filled in with meaningful values during the calculation.
|
||||
* For example, that the allelesUsedInGenotyping has been set, that the alleleCountsOfMLE contains meaningful
|
||||
* values, etc.
|
||||
*
|
||||
* @param log10PriorsByAC
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Requires("allelesUsedInGenotyping != null")
|
||||
protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) {
|
||||
final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size() - 1);
|
||||
final double[] log10Likelihoods = new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero(true)};
|
||||
final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
|
||||
|
||||
// TODO -- replace with more meaningful computation
|
||||
// TODO -- refactor this calculation into the ref calculation
|
||||
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
|
||||
for ( int i = 0; i < subACOfMLE.length; i++ ) {
|
||||
final Allele allele = allelesUsedInGenotyping.get(i+1);
|
||||
final double log10PNonRef = alleleCountsOfMAP[i] > 0 ? 0 : -10000; // TODO -- a total hack but in effect what the old behavior was
|
||||
log10pNonRefByAllele.put(allele, log10PNonRef);
|
||||
}
|
||||
|
||||
return true;
|
||||
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pNonRefByAllele);
|
||||
}
|
||||
|
||||
public boolean abort( final double log10LofK, final ExactACcounts ACs ) {
|
||||
return tooLowLikelihood(log10LofK) && isLowerAC(ACs);
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Protected mutational methods only for use within the calculation models themselves
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Reset the data in this results object, so that it can be used in a subsequent AF calculation
|
||||
*
|
||||
* Resetting of the data is done by the calculation model itself, so shouldn't be done by callers any longer
|
||||
*/
|
||||
protected void reset() {
|
||||
log10MLE = log10MAP = log10LikelihoodOfAFzero = VALUE_NOT_CALCULATED;
|
||||
log10LikelihoodsForAFGt0CacheIndex = 0;
|
||||
log10LikelihoodsForAFGt0Sum = null;
|
||||
allelesUsedInGenotyping = null;
|
||||
nEvaluations = 0;
|
||||
Arrays.fill(alleleCountsOfMLE, 0);
|
||||
Arrays.fill(alleleCountsOfMAP, 0);
|
||||
Arrays.fill(log10LikelihoodsForAFGt0, Double.POSITIVE_INFINITY);
|
||||
}
|
||||
|
||||
public double getMaxLog10L() {
|
||||
return maxLog10L;
|
||||
/**
|
||||
* Tell this result we used one more evaluation cycle
|
||||
*/
|
||||
protected void incNEvaluations() {
|
||||
nEvaluations++;
|
||||
}
|
||||
|
||||
public void setMaxLog10L(double maxLog10L) {
|
||||
this.maxLog10L = maxLog10L;
|
||||
/**
|
||||
* Update the maximum log10 likelihoods seen, if log10LofKs is higher, and the corresponding ACs of this state
|
||||
*
|
||||
* @param log10LofK the likelihood of our current configuration state, cannot be the 0 state
|
||||
* @param alleleCountsForK the allele counts for this state
|
||||
*/
|
||||
@Requires({"alleleCountsForK != null", "MathUtils.sum(alleleCountsForK) >= 0", "MathUtils.goodLog10Probability(log10LofK)"})
|
||||
@Ensures("log10MLE == Math.max(log10LofK, log10MLE)")
|
||||
protected void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
||||
addToLikelihoodsCache(log10LofK);
|
||||
|
||||
if ( log10LofK > log10MLE ) {
|
||||
log10MLE = log10LofK;
|
||||
System.arraycopy(alleleCountsForK, 0, alleleCountsOfMLE, 0, alleleCountsForK.length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the maximum log10 posterior seen, if log10PofKs is higher, and the corresponding ACs of this state
|
||||
*
|
||||
* @param log10PofK the posterior of our current configuration state
|
||||
* @param alleleCountsForK the allele counts for this state
|
||||
*/
|
||||
@Requires({"alleleCountsForK != null", "MathUtils.sum(alleleCountsForK) >= 0", "MathUtils.goodLog10Probability(log10PofK)"})
|
||||
@Ensures("log10MAP == Math.max(log10PofK, log10MAP)")
|
||||
protected void updateMAPifNeeded(final double log10PofK, final int[] alleleCountsForK) {
|
||||
if ( log10PofK > log10MAP ) {
|
||||
log10MAP = log10PofK;
|
||||
System.arraycopy(alleleCountsForK, 0, alleleCountsOfMAP, 0, alleleCountsForK.length);
|
||||
}
|
||||
}
|
||||
|
||||
@Requires({"MathUtils.goodLog10Probability(log10LofK)"})
|
||||
private void addToLikelihoodsCache(final double log10LofK) {
|
||||
// add to the cache
|
||||
log10LikelihoodsForAFGt0[log10LikelihoodsForAFGt0CacheIndex++] = log10LofK;
|
||||
|
||||
// if we've filled up the cache, then condense by summing up all of the values and placing the sum back into the first cell
|
||||
if ( log10LikelihoodsForAFGt0CacheIndex == LIKELIHOODS_CACHE_SIZE) {
|
||||
final double temporarySum = MathUtils.log10sumLog10(log10LikelihoodsForAFGt0, 0, log10LikelihoodsForAFGt0CacheIndex);
|
||||
Arrays.fill(log10LikelihoodsForAFGt0, Double.POSITIVE_INFINITY);
|
||||
log10LikelihoodsForAFGt0[0] = temporarySum;
|
||||
log10LikelihoodsForAFGt0CacheIndex = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@Requires({"MathUtils.goodLog10Probability(log10LikelihoodOfAFzero)"})
|
||||
protected void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) {
|
||||
this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero;
|
||||
if ( log10LikelihoodOfAFzero > log10MLE ) {
|
||||
log10MLE = log10LikelihoodOfAFzero;
|
||||
Arrays.fill(alleleCountsOfMLE, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@Requires({"MathUtils.goodLog10Probability(log10LikelihoodOfAFzero)"})
|
||||
protected void setLog10PosteriorOfAFzero(final double log10PosteriorOfAFzero) {
|
||||
if ( log10PosteriorOfAFzero > log10MAP ) {
|
||||
log10MAP = log10PosteriorOfAFzero;
|
||||
Arrays.fill(alleleCountsOfMAP, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the list of alleles used in genotyping
|
||||
*
|
||||
* @param allelesUsedInGenotyping the list of alleles, where the first allele is reference
|
||||
*/
|
||||
@Requires({"allelesUsedInGenotyping != null", "allelesUsedInGenotyping.size() > 1"})
|
||||
protected void setAllelesUsedInGenotyping(List<Allele> allelesUsedInGenotyping) {
|
||||
if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.isEmpty() )
|
||||
throw new IllegalArgumentException("allelesUsedInGenotyping cannot be null or empty");
|
||||
if ( allelesUsedInGenotyping.get(0).isNonReference() )
|
||||
throw new IllegalArgumentException("The first element of allelesUsedInGenotyping must be the reference allele");
|
||||
|
||||
this.allelesUsedInGenotyping = allelesUsedInGenotyping;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1194,6 +1194,39 @@ public class MathUtils {
|
|||
return getQScoreOrderStatistic(reads, offsets, (int) Math.floor(reads.size() / 2.));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the log10 prob vector vector is well formed
|
||||
*
|
||||
* @param vector
|
||||
* @param expectedSize
|
||||
* @param shouldSumToOne
|
||||
*
|
||||
* @return true if vector is well-formed, false otherwise
|
||||
*/
|
||||
public static boolean goodLog10ProbVector(final double[] vector, final int expectedSize, final boolean shouldSumToOne) {
|
||||
if ( vector.length != expectedSize ) return false;
|
||||
|
||||
for ( final double pr : vector ) {
|
||||
if ( ! goodLog10Probability(pr) )
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( shouldSumToOne && compareDoubles(sumLog10(vector), 1.0, 1e-4) != 0 )
|
||||
return false;
|
||||
|
||||
return true; // everything is good
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that the result is a well-formed log10 probability
|
||||
*
|
||||
* @param result a supposedly well-formed log10 probability value
|
||||
* @return true if result is really well formed
|
||||
*/
|
||||
public static boolean goodLog10Probability(final double result) {
|
||||
return result <= 0.0 && ! Double.isInfinite(result) && ! Double.isNaN(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility class that computes on the fly average and standard deviation for a stream of numbers.
|
||||
* The number of observations does not have to be known in advance, and can be also very big (so that
|
||||
|
|
|
|||
Loading…
Reference in New Issue