Improvement in exact allele frequency calculation model (still under test, but this is definitely better than what I had before). Instead of approximating log(10^x+10^y) as max(x,y), approximate full Jacobian formula max(x,y)+log(1+10^-abs(x-y)) with static lookup table for the second term.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4647 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2010-11-11 01:22:35 +00:00
parent f35d1aa43f
commit 2f3be24a00
1 changed files with 65 additions and 28 deletions

View File

@ -43,16 +43,32 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private boolean DEBUGOUT = false;
private boolean SIMPLE_GREEDY_GENOTYPER = false;
private double[] log10Cache;
private static final double[] log10Cache;
private static final double[] jacobianLogTable;
private static final int JACOBIAN_LOG_TABLE_SIZE = 101;
private static final double JACOBIAN_LOG_TABLE_STEP = 0.1;
private static final double MAX_JACOBIAN_TOLERANCE = 10.0;
private static final int MAXN = 10000;
static {
log10Cache = new double[2*MAXN];
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
log10Cache[0] = Double.NEGATIVE_INFINITY;
for (int k=1; k < 2*MAXN; k++)
log10Cache[k] = Math.log10(k);
for (int k=0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
jacobianLogTable[k] = Math.log10(1.0+Math.pow(10.0,-((double)k)
* JACOBIAN_LOG_TABLE_STEP));
}
}
protected ExactAFCalculationModel(int N, Logger logger, PrintStream verboseWriter) {
super(N, logger, verboseWriter);
log10Cache = new double[2*N];
log10Cache[0] = Double.NEGATIVE_INFINITY;
for (int k=1; k < 2*N; k++)
log10Cache[k] = Math.log10(k);
}
public void getLog10PNonRef(RefMetaDataTracker tracker,
@ -137,41 +153,62 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
//YMatrix[j][k] = num/den;
logYMatrix[j][k] = logNum - logDenominator;
}
// int kstart = k-1;
// for (k=kstart; k <= 2*j; k++)
// logYMatrix[j][k] = logYMatrix[j][kstart];
}
for (int k=0; k <= numChr; k++)
log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
}
double softMax(double[] vec) {
// compute naively log10(10^x[0] + 10^x[1]+...)
//return Math.log10(MathUtils.sumLog10(vec));
//int maxInd = MathUtils.maxElementIndex(vec);
//return vec[maxInd];
// still more efficient: inline search for max. Hard-code fact that vec has length 3
if (vec[0] > vec[1])
if (vec[2] > vec[0])
return vec[2];
else
return vec[0];
else
if (vec[2]>vec[1])
return vec[2];
else
return vec[1];
// return Math.log10(MathUtils.sumLog10(vec));
// better approximation: do Jacobian logarithm function on data pairs
double a = softMaxPair(vec[0],vec[1]);
return softMaxPair(a,vec[2]);
}
double softMaxPair(double x, double y) {
if (Double.isInfinite(x))
return y;
if (Double.isInfinite(y))
return x;
if (y >= x + MAX_JACOBIAN_TOLERANCE)
return y;
if (x >= y + MAX_JACOBIAN_TOLERANCE)
return x;
// OK, so |y-x| < tol: we use the following identity then:
// we need to compute log10(10^x + 10^y)
// By Jacobian logarithm identity, this is equal to
// max(x,y) + log10(1+10^-abs(x-y))
// we compute the second term as a table lookup
// with integer quantization
double diff = Math.abs(x-y);
double t1 =x;
if (y > x)
t1 = y;
// t has max(x,y)
// we have pre-stored correction for 0,0.1,0.2,... 10.0
int ind = (int)Math.round(diff/JACOBIAN_LOG_TABLE_STEP);
double t2 = jacobianLogTable[ind];
// gdebug+
//double z =Math.log10(1+Math.pow(10.0,-diff));
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
//gdebug-
return t1+t2;
// return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
}
/**
* Can be overridden by concrete subclasses
* @param contexts alignment contexts