Improvement in exact allele frequency calculation model (still under test, but this is definitely better than what I had before). Instead of approximating log(10^x+10^y) as max(x,y), approximate full Jacobian formula max(x,y)+log(1+10^-abs(x-y)) with static lookup table for the second term.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4647 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f35d1aa43f
commit
2f3be24a00
|
|
@ -43,16 +43,32 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
private boolean DEBUGOUT = false;
|
||||
private boolean SIMPLE_GREEDY_GENOTYPER = false;
|
||||
private double[] log10Cache;
|
||||
private static final double[] log10Cache;
|
||||
private static final double[] jacobianLogTable;
|
||||
private static final int JACOBIAN_LOG_TABLE_SIZE = 101;
|
||||
private static final double JACOBIAN_LOG_TABLE_STEP = 0.1;
|
||||
private static final double MAX_JACOBIAN_TOLERANCE = 10.0;
|
||||
private static final int MAXN = 10000;
|
||||
|
||||
static {
|
||||
log10Cache = new double[2*MAXN];
|
||||
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
|
||||
|
||||
log10Cache[0] = Double.NEGATIVE_INFINITY;
|
||||
for (int k=1; k < 2*MAXN; k++)
|
||||
log10Cache[k] = Math.log10(k);
|
||||
|
||||
for (int k=0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
||||
jacobianLogTable[k] = Math.log10(1.0+Math.pow(10.0,-((double)k)
|
||||
* JACOBIAN_LOG_TABLE_STEP));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected ExactAFCalculationModel(int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(N, logger, verboseWriter);
|
||||
|
||||
log10Cache = new double[2*N];
|
||||
|
||||
log10Cache[0] = Double.NEGATIVE_INFINITY;
|
||||
for (int k=1; k < 2*N; k++)
|
||||
log10Cache[k] = Math.log10(k);
|
||||
}
|
||||
|
||||
public void getLog10PNonRef(RefMetaDataTracker tracker,
|
||||
|
|
@ -137,41 +153,62 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
//YMatrix[j][k] = num/den;
|
||||
logYMatrix[j][k] = logNum - logDenominator;
|
||||
}
|
||||
// int kstart = k-1;
|
||||
// for (k=kstart; k <= 2*j; k++)
|
||||
// logYMatrix[j][k] = logYMatrix[j][kstart];
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
for (int k=0; k <= numChr; k++)
|
||||
log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
|
||||
|
||||
}
|
||||
|
||||
|
||||
double softMax(double[] vec) {
|
||||
// compute naively log10(10^x[0] + 10^x[1]+...)
|
||||
//return Math.log10(MathUtils.sumLog10(vec));
|
||||
|
||||
//int maxInd = MathUtils.maxElementIndex(vec);
|
||||
//return vec[maxInd];
|
||||
|
||||
// still more efficient: inline search for max. Hard-code fact that vec has length 3
|
||||
if (vec[0] > vec[1])
|
||||
if (vec[2] > vec[0])
|
||||
return vec[2];
|
||||
else
|
||||
return vec[0];
|
||||
else
|
||||
if (vec[2]>vec[1])
|
||||
return vec[2];
|
||||
else
|
||||
return vec[1];
|
||||
|
||||
|
||||
// return Math.log10(MathUtils.sumLog10(vec));
|
||||
|
||||
// better approximation: do Jacobian logarithm function on data pairs
|
||||
double a = softMaxPair(vec[0],vec[1]);
|
||||
return softMaxPair(a,vec[2]);
|
||||
}
|
||||
|
||||
double softMaxPair(double x, double y) {
|
||||
if (Double.isInfinite(x))
|
||||
return y;
|
||||
|
||||
if (Double.isInfinite(y))
|
||||
return x;
|
||||
|
||||
if (y >= x + MAX_JACOBIAN_TOLERANCE)
|
||||
return y;
|
||||
if (x >= y + MAX_JACOBIAN_TOLERANCE)
|
||||
return x;
|
||||
|
||||
// OK, so |y-x| < tol: we use the following identity then:
|
||||
// we need to compute log10(10^x + 10^y)
|
||||
// By Jacobian logarithm identity, this is equal to
|
||||
// max(x,y) + log10(1+10^-abs(x-y))
|
||||
// we compute the second term as a table lookup
|
||||
// with integer quantization
|
||||
double diff = Math.abs(x-y);
|
||||
double t1 =x;
|
||||
if (y > x)
|
||||
t1 = y;
|
||||
// t has max(x,y)
|
||||
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||
int ind = (int)Math.round(diff/JACOBIAN_LOG_TABLE_STEP);
|
||||
double t2 = jacobianLogTable[ind];
|
||||
|
||||
// gdebug+
|
||||
//double z =Math.log10(1+Math.pow(10.0,-diff));
|
||||
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
|
||||
//gdebug-
|
||||
return t1+t2;
|
||||
// return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Can be overridden by concrete subclasses
|
||||
* @param contexts alignment contexts
|
||||
|
|
|
|||
Loading…
Reference in New Issue