First iteration in filling up exact AF calculation with new refactored UG. Code computes EM iterations of exact AF spectrum and returns to caller.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4381 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2708e83198
commit
4556e3b273
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.playground.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
||||||
|
|
@ -34,6 +35,10 @@ import java.io.PrintStream;
|
||||||
|
|
||||||
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
|
private int maxNumIterations = 50;
|
||||||
|
private double tol = 1e-4;
|
||||||
|
private boolean DEBUGOUT = true;
|
||||||
|
|
||||||
protected ExactAFCalculationModel(int N, Logger logger, PrintStream verboseWriter) {
|
protected ExactAFCalculationModel(int N, Logger logger, PrintStream verboseWriter) {
|
||||||
super(N, logger, verboseWriter);
|
super(N, logger, verboseWriter);
|
||||||
}
|
}
|
||||||
|
|
@ -44,7 +49,90 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
double[] log10AlleleFrequencyPosteriors) {
|
||||||
|
|
||||||
// TODO: implement me based on
|
// Math requires linear math to make efficient updates.
|
||||||
|
double[] alleleFrequencyPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPriors);
|
||||||
|
|
||||||
|
// now that we have genotype likelihoods for each sample, we can start refining allele frequency estimate
|
||||||
|
double meanAF = computeMeanAF(alleleFrequencyPosteriors);
|
||||||
|
if (DEBUGOUT)
|
||||||
|
System.out.format("Initial Mean AF: %5.6f\n", meanAF);
|
||||||
|
|
||||||
|
|
||||||
|
for (int numIterations=0; numIterations < maxNumIterations; numIterations++) {
|
||||||
|
double oldMeanAF = meanAF;
|
||||||
|
alleleFrequencyPosteriors = updateAFEstimate(GLs, alleleFrequencyPosteriors);
|
||||||
|
meanAF = computeMeanAF(alleleFrequencyPosteriors);
|
||||||
|
|
||||||
|
if (DEBUGOUT)
|
||||||
|
System.out.format("Mean AF: %5.4f. PVariant: %5.5f\n", meanAF,1.0-alleleFrequencyPosteriors[0]);
|
||||||
|
|
||||||
|
if (Math.abs(meanAF-oldMeanAF) < tol)
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int k=0; k < alleleFrequencyPosteriors.length; k++)
|
||||||
|
log10AlleleFrequencyPosteriors[k] = Math.log10(alleleFrequencyPosteriors[k]);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private double[] updateAFEstimate(Map<String, BiallelicGenotypeLikelihoods> GLs, double[] alleleFrequencyPriors) {
|
||||||
|
|
||||||
|
int numSamples = GLs.size();
|
||||||
|
int numChr = 2*numSamples;
|
||||||
|
|
||||||
|
double[][] YMatrix = new double[1+numSamples][1+numChr];
|
||||||
|
YMatrix[0][0] = 1.0;
|
||||||
|
int j=0;
|
||||||
|
|
||||||
|
for ( String sample : GLs.keySet() ) {
|
||||||
|
j++;
|
||||||
|
|
||||||
|
double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
|
||||||
|
double sum = 0.0;
|
||||||
|
double den = 2.0*j*(2.0*j-1);
|
||||||
|
|
||||||
|
for (int k=0; k <= 2*j; k++ ) {
|
||||||
|
double tmp = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[2];
|
||||||
|
if (k > 0)
|
||||||
|
tmp += (2.0*k)*(2.0*j-k)*YMatrix[j-1][k-1]*genotypeLikelihoods[1];
|
||||||
|
if (k > 1)
|
||||||
|
tmp += k*(k-1)*YMatrix[j-1][k-2]*genotypeLikelihoods[0];
|
||||||
|
|
||||||
|
YMatrix[j][k] = tmp/den;
|
||||||
|
|
||||||
|
sum += YMatrix[j][k];
|
||||||
|
}
|
||||||
|
// renormalize row
|
||||||
|
for (int k=0; k <= 2*j; k++ )
|
||||||
|
YMatrix[j][k] /= sum;
|
||||||
|
|
||||||
|
}
|
||||||
|
double sum = 0.0;
|
||||||
|
double[] newAF = new double[alleleFrequencyPriors.length];
|
||||||
|
|
||||||
|
for (int k=0; k <= numChr; k++) {
|
||||||
|
double prod = YMatrix[j][numChr-k] * alleleFrequencyPriors[k];
|
||||||
|
newAF[k] = prod;
|
||||||
|
sum += prod;
|
||||||
|
}
|
||||||
|
//renormalize now
|
||||||
|
for (int k=0; k < newAF.length; k++)
|
||||||
|
newAF[k] /= sum;
|
||||||
|
|
||||||
|
return newAF;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private double computeMeanAF(double[] afVector) {
|
||||||
|
// get now new site AF estimate
|
||||||
|
double sum = 0.0;
|
||||||
|
for (int k=0; k < afVector.length; k++)
|
||||||
|
sum += (double)k * afVector[k];
|
||||||
|
|
||||||
|
return sum/afVector.length;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue