First iteration in filling up exact AF calculation with new refactored UG. Code computes EM iterations of exact AF spectrum and returns to caller.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4381 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2010-09-29 16:21:54 +00:00
parent 2708e83198
commit 4556e3b273
1 changed files with 89 additions and 1 deletions

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.genotyper;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -34,6 +35,10 @@ import java.io.PrintStream;
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private int maxNumIterations = 50;
private double tol = 1e-4;
private boolean DEBUGOUT = true;
protected ExactAFCalculationModel(int N, Logger logger, PrintStream verboseWriter) {
super(N, logger, verboseWriter);
}
@ -44,7 +49,90 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
// TODO: implement me based on
// Math requires linear math to make efficient updates.
double[] alleleFrequencyPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPriors);
// now that we have genotype likelihoods for each sample, we can start refining allele frequency estimate
double meanAF = computeMeanAF(alleleFrequencyPosteriors);
if (DEBUGOUT)
System.out.format("Initial Mean AF: %5.6f\n", meanAF);
for (int numIterations=0; numIterations < maxNumIterations; numIterations++) {
double oldMeanAF = meanAF;
alleleFrequencyPosteriors = updateAFEstimate(GLs, alleleFrequencyPosteriors);
meanAF = computeMeanAF(alleleFrequencyPosteriors);
if (DEBUGOUT)
System.out.format("Mean AF: %5.4f. PVariant: %5.5f\n", meanAF,1.0-alleleFrequencyPosteriors[0]);
if (Math.abs(meanAF-oldMeanAF) < tol)
break;
}
for (int k=0; k < alleleFrequencyPosteriors.length; k++)
log10AlleleFrequencyPosteriors[k] = Math.log10(alleleFrequencyPosteriors[k]);
}
private double[] updateAFEstimate(Map<String, BiallelicGenotypeLikelihoods> GLs, double[] alleleFrequencyPriors) {
int numSamples = GLs.size();
int numChr = 2*numSamples;
double[][] YMatrix = new double[1+numSamples][1+numChr];
YMatrix[0][0] = 1.0;
int j=0;
for ( String sample : GLs.keySet() ) {
j++;
double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
double sum = 0.0;
double den = 2.0*j*(2.0*j-1);
for (int k=0; k <= 2*j; k++ ) {
double tmp = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[2];
if (k > 0)
tmp += (2.0*k)*(2.0*j-k)*YMatrix[j-1][k-1]*genotypeLikelihoods[1];
if (k > 1)
tmp += k*(k-1)*YMatrix[j-1][k-2]*genotypeLikelihoods[0];
YMatrix[j][k] = tmp/den;
sum += YMatrix[j][k];
}
// renormalize row
for (int k=0; k <= 2*j; k++ )
YMatrix[j][k] /= sum;
}
double sum = 0.0;
double[] newAF = new double[alleleFrequencyPriors.length];
for (int k=0; k <= numChr; k++) {
double prod = YMatrix[j][numChr-k] * alleleFrequencyPriors[k];
newAF[k] = prod;
sum += prod;
}
//renormalize now
for (int k=0; k < newAF.length; k++)
newAF[k] /= sum;
return newAF;
}
private double computeMeanAF(double[] afVector) {
// get now new site AF estimate
double sum = 0.0;
for (int k=0; k < afVector.length; k++)
sum += (double)k * afVector[k];
return sum/afVector.length;
}
}