Intermediate optimization checkin. LinearExact model now about 10-20% faster than previous commit, by reorganizing and optimizing the if statements and genotype likelihood calculations. Next commit will include a banded implementation.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5362 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f0f4bc3363
commit
0181d95fe4
|
|
@ -38,24 +38,32 @@ import java.util.*;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
||||||
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
//
|
||||||
|
// code for testing purposes
|
||||||
|
//
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
private final static boolean PRINT_LIKELIHOODS = false;
|
private final static boolean PRINT_LIKELIHOODS = false;
|
||||||
|
private final static int N_CYCLES = 1;
|
||||||
|
private SimpleTimer timerExpt = new SimpleTimer("linearExact");
|
||||||
|
private SimpleTimer timerGS = new SimpleTimer("linearExactGS");
|
||||||
|
private final static boolean COMPARE_TO_GS = false;
|
||||||
|
|
||||||
public enum ExactCalculation {
|
public enum ExactCalculation {
|
||||||
N2_GOLD_STANDARD,
|
N2_GOLD_STANDARD,
|
||||||
LINEAR_EXPERIMENTAL
|
LINEAR_EXPERIMENTAL
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static boolean COMPARE_TO_GS = false;
|
|
||||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||||
|
|
||||||
private boolean SIMPLE_GREEDY_GENOTYPER = false;
|
private boolean SIMPLE_GREEDY_GENOTYPER = false;
|
||||||
private static final double[] log10Cache;
|
private static final double[] log10Cache;
|
||||||
private static final double[] jacobianLogTable;
|
private static final double[] jacobianLogTable;
|
||||||
|
// private static final int JACOBIAN_LOG_TABLE_SIZE = 100001;
|
||||||
|
// private static final double JACOBIAN_LOG_TABLE_STEP = 0.0001;
|
||||||
private static final int JACOBIAN_LOG_TABLE_SIZE = 101;
|
private static final int JACOBIAN_LOG_TABLE_SIZE = 101;
|
||||||
private static final double JACOBIAN_LOG_TABLE_STEP = 0.1;
|
private static final double JACOBIAN_LOG_TABLE_STEP = 0.1;
|
||||||
private static final double MAX_JACOBIAN_TOLERANCE = 10.0;
|
private static final double MAX_JACOBIAN_TOLERANCE = 10.0;
|
||||||
private static final int MAXN = 10000;
|
private static final int MAXN = 10000; // todo -- warning, this might be hit at some point...
|
||||||
|
|
||||||
static {
|
static {
|
||||||
log10Cache = new double[2*MAXN];
|
log10Cache = new double[2*MAXN];
|
||||||
|
|
@ -66,8 +74,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
log10Cache[k] = Math.log10(k);
|
log10Cache[k] = Math.log10(k);
|
||||||
|
|
||||||
for (int k=0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
for (int k=0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
||||||
jacobianLogTable[k] = Math.log10(1.0+Math.pow(10.0,-((double)k)
|
jacobianLogTable[k] = Math.log10(1.0+Math.pow(10.0,-((double)k) * JACOBIAN_LOG_TABLE_STEP));
|
||||||
* JACOBIAN_LOG_TABLE_STEP));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -91,6 +98,22 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here
|
if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here
|
||||||
gsPosteriors = log10AlleleFrequencyPosteriors.clone();
|
gsPosteriors = log10AlleleFrequencyPosteriors.clone();
|
||||||
|
|
||||||
|
// todo -- remove me after testing
|
||||||
|
if ( N_CYCLES > 1 ) {
|
||||||
|
for ( int i = 0; i < N_CYCLES; i++) {
|
||||||
|
timerGS.restart();
|
||||||
|
linearExactClean(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
|
||||||
|
timerGS.stop();
|
||||||
|
|
||||||
|
timerExpt.restart();
|
||||||
|
linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
|
||||||
|
timerExpt.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.printf("good = %.2f, expt = %.2f, delta = %.2f%n",
|
||||||
|
timerGS.getElapsedTime(), timerExpt.getElapsedTime(), timerExpt.getElapsedTime()-timerGS.getElapsedTime());
|
||||||
|
}
|
||||||
|
|
||||||
int lastK = -1;
|
int lastK = -1;
|
||||||
switch ( calcToUse ) {
|
switch ( calcToUse ) {
|
||||||
case N2_GOLD_STANDARD:
|
case N2_GOLD_STANDARD:
|
||||||
|
|
@ -141,37 +164,45 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
return genotypeLikelihoods;
|
return genotypeLikelihoods;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class ExactACCache {
|
// -------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Linearized, ~O(N), implementation.
|
||||||
|
//
|
||||||
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple data structure that holds the current, prev, and prev->prev likelihoods vectors
|
||||||
|
* for the exact model calculation
|
||||||
|
*/
|
||||||
|
private final static class ExactACCache {
|
||||||
double[] kMinus2, kMinus1, kMinus0;
|
double[] kMinus2, kMinus1, kMinus0;
|
||||||
|
|
||||||
private static double[] create(int n, double defaultValue) {
|
private final static double[] create(int n) {
|
||||||
double[] v = new double[n];
|
return new double[n];
|
||||||
Arrays.fill(v, defaultValue);
|
|
||||||
return v;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public ExactACCache(int n, double defaultValue) {
|
public ExactACCache(int n) {
|
||||||
kMinus2 = create(n, defaultValue);
|
kMinus2 = create(n);
|
||||||
kMinus1 = create(n, defaultValue);
|
kMinus1 = create(n);
|
||||||
kMinus0 = create(n, defaultValue);
|
kMinus0 = create(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void rotate() {
|
final public void rotate() {
|
||||||
double[] tmp = kMinus2;
|
double[] tmp = kMinus2;
|
||||||
kMinus2 = kMinus1;
|
kMinus2 = kMinus1;
|
||||||
kMinus1 = kMinus0;
|
kMinus1 = kMinus0;
|
||||||
kMinus0 = tmp;
|
kMinus0 = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
public double[] getkMinus2() {
|
final public double[] getkMinus2() {
|
||||||
return kMinus2;
|
return kMinus2;
|
||||||
}
|
}
|
||||||
|
|
||||||
public double[] getkMinus1() {
|
final public double[] getkMinus1() {
|
||||||
return kMinus1;
|
return kMinus1;
|
||||||
}
|
}
|
||||||
|
|
||||||
public double[] getkMinus0() {
|
final public double[] getkMinus0() {
|
||||||
return kMinus0;
|
return kMinus0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -179,6 +210,109 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
public int linearExact(Map<String, Genotype> GLs,
|
public int linearExact(Map<String, Genotype> GLs,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
double[] log10AlleleFrequencyPosteriors) {
|
||||||
|
final int numSamples = GLs.size();
|
||||||
|
final int numChr = 2*numSamples;
|
||||||
|
final double[][] genotypeLikelihoods = getGLs(GLs);
|
||||||
|
|
||||||
|
final ExactACCache logY = new ExactACCache(numSamples+1);
|
||||||
|
logY.getkMinus0()[0] = 0.0; // the zero case
|
||||||
|
|
||||||
|
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||||
|
boolean done = false;
|
||||||
|
int lastK = -1;
|
||||||
|
|
||||||
|
for (int k=0; k <= numChr && ! done; k++ ) {
|
||||||
|
final double[] kMinus0 = logY.getkMinus0();
|
||||||
|
|
||||||
|
if ( k == 0 ) { // special case for k = 0
|
||||||
|
for ( int j=1; j <= numSamples; j++ ) {
|
||||||
|
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
|
||||||
|
}
|
||||||
|
} else { // k > 0
|
||||||
|
final double[] kMinus1 = logY.getkMinus1();
|
||||||
|
final double[] kMinus2 = logY.getkMinus2();
|
||||||
|
|
||||||
|
for ( int j=1; j <= numSamples; j++ ) {
|
||||||
|
final double[] gl = genotypeLikelihoods[j];
|
||||||
|
final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
|
||||||
|
|
||||||
|
double aa = Double.NEGATIVE_INFINITY;
|
||||||
|
double ab = Double.NEGATIVE_INFINITY;
|
||||||
|
if (k < 2*j-1)
|
||||||
|
aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()];
|
||||||
|
|
||||||
|
if (k < 2*j)
|
||||||
|
ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()];
|
||||||
|
|
||||||
|
double log10Max;
|
||||||
|
if (k > 1) {
|
||||||
|
final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()];
|
||||||
|
log10Max = approximateLog10SumLog10(aa, ab, bb);
|
||||||
|
} else {
|
||||||
|
// we know we aren't considering the BB case, so we can use an optimized log10 function
|
||||||
|
log10Max = approximateLog10SumLog10(aa, ab);
|
||||||
|
}
|
||||||
|
|
||||||
|
// finally, update the L(j,k) value
|
||||||
|
kMinus0[j] = log10Max - logDenominator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// update the posteriors vector
|
||||||
|
final double log10LofK = kMinus0[numSamples];
|
||||||
|
log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
|
||||||
|
|
||||||
|
// can we abort early?
|
||||||
|
lastK = k;
|
||||||
|
maxLog10L = Math.max(maxLog10L, log10LofK);
|
||||||
|
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||||
|
if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
logY.rotate();
|
||||||
|
}
|
||||||
|
|
||||||
|
return lastK;
|
||||||
|
}
|
||||||
|
|
||||||
|
final static double approximateLog10SumLog10(double a, double b, double c) {
|
||||||
|
//return softMax(new double[]{a, b, c});
|
||||||
|
return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c);
|
||||||
|
}
|
||||||
|
|
||||||
|
final static double approximateLog10SumLog10(double small, double big) {
|
||||||
|
// make sure small is really the smaller value
|
||||||
|
if ( small > big ) {
|
||||||
|
final double t = big;
|
||||||
|
big = small;
|
||||||
|
small = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
|
||||||
|
return big;
|
||||||
|
|
||||||
|
if (big >= small + MAX_JACOBIAN_TOLERANCE)
|
||||||
|
return big;
|
||||||
|
|
||||||
|
// OK, so |y-x| < tol: we use the following identity then:
|
||||||
|
// we need to compute log10(10^x + 10^y)
|
||||||
|
// By Jacobian logarithm identity, this is equal to
|
||||||
|
// max(x,y) + log10(1+10^-abs(x-y))
|
||||||
|
// we compute the second term as a table lookup
|
||||||
|
// with integer quantization
|
||||||
|
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||||
|
//final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding
|
||||||
|
int ind = (int)(Math.round((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding
|
||||||
|
|
||||||
|
//double z =Math.log10(1+Math.pow(10.0,-diff));
|
||||||
|
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
|
||||||
|
return big + jacobianLogTable[ind];
|
||||||
|
}
|
||||||
|
|
||||||
|
public int linearExactClean(Map<String, Genotype> GLs,
|
||||||
|
double[] log10AlleleFrequencyPriors,
|
||||||
|
double[] log10AlleleFrequencyPosteriors) {
|
||||||
int numSamples = GLs.size();
|
int numSamples = GLs.size();
|
||||||
int numChr = 2*numSamples;
|
int numChr = 2*numSamples;
|
||||||
double[][] genotypeLikelihoods = getGLs(GLs); // todo -- remove me, not sure this is helping
|
double[][] genotypeLikelihoods = getGLs(GLs); // todo -- remove me, not sure this is helping
|
||||||
|
|
@ -187,7 +321,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
// set posteriors to negative infinity by default:
|
// set posteriors to negative infinity by default:
|
||||||
//Arrays.fill(log10AlleleFrequencyPosteriors, Double.NEGATIVE_INFINITY);
|
//Arrays.fill(log10AlleleFrequencyPosteriors, Double.NEGATIVE_INFINITY);
|
||||||
|
|
||||||
ExactACCache logY = new ExactACCache(numSamples+1, Double.NEGATIVE_INFINITY);
|
ExactACCache logY = new ExactACCache(numSamples+1);
|
||||||
logY.getkMinus0()[0] = 0.0; // the zero case
|
logY.getkMinus0()[0] = 0.0; // the zero case
|
||||||
|
|
||||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||||
|
|
@ -251,129 +385,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
return lastK;
|
return lastK;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int gdaN2GoldStandard(Map<String, Genotype> GLs,
|
|
||||||
double[] log10AlleleFrequencyPriors,
|
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
|
||||||
int numSamples = GLs.size();
|
|
||||||
int numChr = 2*numSamples;
|
|
||||||
|
|
||||||
double[][] logYMatrix = new double[1+numSamples][1+numChr];
|
|
||||||
|
|
||||||
for (int i=0; i <=numSamples; i++)
|
|
||||||
for (int j=0; j <=numChr; j++)
|
|
||||||
logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
//YMatrix[0][0] = 1.0;
|
|
||||||
logYMatrix[0][0] = 0.0;
|
|
||||||
int j=0;
|
|
||||||
|
|
||||||
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
|
|
||||||
j++;
|
|
||||||
|
|
||||||
if ( !sample.getValue().hasLikelihoods() )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
|
|
||||||
double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector();
|
|
||||||
//double logDenominator = Math.log10(2.0*j*(2.0*j-1));
|
|
||||||
double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
|
|
||||||
|
|
||||||
// special treatment for k=0: iteration reduces to:
|
|
||||||
//YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
|
||||||
logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
|
||||||
|
|
||||||
for (int k=1; k <= 2*j; k++ ) {
|
|
||||||
|
|
||||||
//double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
|
||||||
double logNumerator[];
|
|
||||||
logNumerator = new double[3];
|
|
||||||
if (k < 2*j-1)
|
|
||||||
logNumerator[0] = log10Cache[2*j-k] + log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
|
|
||||||
genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
|
||||||
else
|
|
||||||
logNumerator[0] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
|
|
||||||
if (k < 2*j)
|
|
||||||
logNumerator[1] = log10Cache[2*k] + log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
|
|
||||||
genotypeLikelihoods[GenotypeType.AB.ordinal()];
|
|
||||||
else
|
|
||||||
logNumerator[1] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
if (k > 1)
|
|
||||||
logNumerator[2] = log10Cache[k] + log10Cache[k-1] + logYMatrix[j-1][k-2] +
|
|
||||||
genotypeLikelihoods[GenotypeType.BB.ordinal()];
|
|
||||||
else
|
|
||||||
logNumerator[2] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
double logNum = softMax(logNumerator);
|
|
||||||
|
|
||||||
//YMatrix[j][k] = num/den;
|
|
||||||
logYMatrix[j][k] = logNum - logDenominator;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int k=0; k <= numChr; k++)
|
|
||||||
log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
|
|
||||||
|
|
||||||
return numChr;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
|
|
||||||
int j = logYMatrix.length - 1;
|
|
||||||
System.out.printf("-----------------------------------%n");
|
|
||||||
for (int k=0; k <= numChr; k++) {
|
|
||||||
double posterior = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
|
|
||||||
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
double softMax(double[] vec) {
|
|
||||||
// compute naively log10(10^x[0] + 10^x[1]+...)
|
|
||||||
// return Math.log10(MathUtils.sumLog10(vec));
|
|
||||||
|
|
||||||
// better approximation: do Jacobian logarithm function on data pairs
|
|
||||||
double a = softMaxPair(vec[0],vec[1]);
|
|
||||||
return softMaxPair(a,vec[2]);
|
|
||||||
}
|
|
||||||
|
|
||||||
static public double softMaxPair(double x, double y) {
|
|
||||||
if (Double.isInfinite(x))
|
|
||||||
return y;
|
|
||||||
|
|
||||||
if (Double.isInfinite(y))
|
|
||||||
return x;
|
|
||||||
|
|
||||||
if (y >= x + MAX_JACOBIAN_TOLERANCE)
|
|
||||||
return y;
|
|
||||||
if (x >= y + MAX_JACOBIAN_TOLERANCE)
|
|
||||||
return x;
|
|
||||||
|
|
||||||
// OK, so |y-x| < tol: we use the following identity then:
|
|
||||||
// we need to compute log10(10^x + 10^y)
|
|
||||||
// By Jacobian logarithm identity, this is equal to
|
|
||||||
// max(x,y) + log10(1+10^-abs(x-y))
|
|
||||||
// we compute the second term as a table lookup
|
|
||||||
// with integer quantization
|
|
||||||
double diff = Math.abs(x-y);
|
|
||||||
double t1 =x;
|
|
||||||
if (y > x)
|
|
||||||
t1 = y;
|
|
||||||
// t has max(x,y)
|
|
||||||
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
|
||||||
int ind = (int)Math.round(diff/JACOBIAN_LOG_TABLE_STEP);
|
|
||||||
double t2 = jacobianLogTable[ind];
|
|
||||||
|
|
||||||
// gdebug+
|
|
||||||
//double z =Math.log10(1+Math.pow(10.0,-diff));
|
|
||||||
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
|
|
||||||
//gdebug-
|
|
||||||
return t1+t2;
|
|
||||||
// return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Can be overridden by concrete subclasses
|
* Can be overridden by concrete subclasses
|
||||||
|
|
@ -501,456 +512,132 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
return calls;
|
return calls;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
// -------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Gold standard, but O(N^2), implementation.
|
||||||
|
//
|
||||||
|
// TODO -- remove me for clarity in this code
|
||||||
|
//
|
||||||
|
// -------------------------------------------------------------------------------------
|
||||||
|
public int gdaN2GoldStandard(Map<String, Genotype> GLs,
|
||||||
|
double[] log10AlleleFrequencyPriors,
|
||||||
|
double[] log10AlleleFrequencyPosteriors) {
|
||||||
|
int numSamples = GLs.size();
|
||||||
|
int numChr = 2*numSamples;
|
||||||
|
|
||||||
// working linearized version
|
double[][] logYMatrix = new double[1+numSamples][1+numChr];
|
||||||
//public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|
||||||
// private final static boolean PRINT_LIKELIHOODS = false;
|
for (int i=0; i <=numSamples; i++)
|
||||||
//
|
for (int j=0; j <=numChr; j++)
|
||||||
// public enum ExactCalculation {
|
logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
|
||||||
// N2_GOLD_STANDARD,
|
|
||||||
// LINEAR_EXPERIMENTAL
|
//YMatrix[0][0] = 1.0;
|
||||||
// }
|
logYMatrix[0][0] = 0.0;
|
||||||
//
|
int j=0;
|
||||||
// private final static boolean COMPARE_TO_GS = false;
|
|
||||||
// private final static boolean PRINT_MAD_AC_POSTERIORS = false;
|
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
|
||||||
// private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
j++;
|
||||||
//
|
|
||||||
// private boolean SIMPLE_GREEDY_GENOTYPER = false;
|
if ( !sample.getValue().hasLikelihoods() )
|
||||||
// private static final double[] log10Cache;
|
continue;
|
||||||
// private static final double[] jacobianLogTable;
|
|
||||||
// private static final int JACOBIAN_LOG_TABLE_SIZE = 101;
|
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
|
||||||
// private static final double JACOBIAN_LOG_TABLE_STEP = 0.1;
|
double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector();
|
||||||
// private static final double MAX_JACOBIAN_TOLERANCE = 10.0;
|
//double logDenominator = Math.log10(2.0*j*(2.0*j-1));
|
||||||
// private static final int MAXN = 10000;
|
double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
|
||||||
//
|
|
||||||
// static {
|
// special treatment for k=0: iteration reduces to:
|
||||||
// log10Cache = new double[2*MAXN];
|
//YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
||||||
// jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
|
logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
||||||
//
|
|
||||||
// log10Cache[0] = Double.NEGATIVE_INFINITY;
|
for (int k=1; k <= 2*j; k++ ) {
|
||||||
// for (int k=1; k < 2*MAXN; k++)
|
|
||||||
// log10Cache[k] = Math.log10(k);
|
//double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
||||||
//
|
double logNumerator[];
|
||||||
// for (int k=0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
logNumerator = new double[3];
|
||||||
// jacobianLogTable[k] = Math.log10(1.0+Math.pow(10.0,-((double)k)
|
if (k < 2*j-1)
|
||||||
// * JACOBIAN_LOG_TABLE_STEP));
|
logNumerator[0] = log10Cache[2*j-k] + log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
|
||||||
//
|
genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
||||||
// }
|
else
|
||||||
//
|
logNumerator[0] = Double.NEGATIVE_INFINITY;
|
||||||
// }
|
|
||||||
//
|
|
||||||
// final private ExactCalculation calcToUse;
|
if (k < 2*j)
|
||||||
// protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
logNumerator[1] = log10Cache[2*k] + log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
|
||||||
// super(UAC, N, logger, verboseWriter);
|
genotypeLikelihoods[GenotypeType.AB.ordinal()];
|
||||||
// calcToUse = UAC.EXACT_CALCULATION_TYPE;
|
else
|
||||||
// }
|
logNumerator[1] = Double.NEGATIVE_INFINITY;
|
||||||
//
|
|
||||||
// public void getLog10PNonRef(RefMetaDataTracker tracker,
|
if (k > 1)
|
||||||
// ReferenceContext ref,
|
logNumerator[2] = log10Cache[k] + log10Cache[k-1] + logYMatrix[j-1][k-2] +
|
||||||
// Map<String, Genotype> GLs,
|
genotypeLikelihoods[GenotypeType.BB.ordinal()];
|
||||||
// double[] log10AlleleFrequencyPriors,
|
else
|
||||||
// double[] log10AlleleFrequencyPosteriors) {
|
logNumerator[2] = Double.NEGATIVE_INFINITY;
|
||||||
// switch ( calcToUse ) {
|
|
||||||
// case N2_GOLD_STANDARD:
|
double logNum = softMax(logNumerator);
|
||||||
// gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors);
|
|
||||||
// break;
|
//YMatrix[j][k] = num/den;
|
||||||
// case LINEAR_EXPERIMENTAL:
|
logYMatrix[j][k] = logNum - logDenominator;
|
||||||
// madByAC(ref, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors);
|
}
|
||||||
// break;
|
|
||||||
// }
|
}
|
||||||
// }
|
|
||||||
//
|
for (int k=0; k <= numChr; k++)
|
||||||
// private static final double[][] getGLs(Map<String, Genotype> GLs) {
|
log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
|
||||||
// double[][] genotypeLikelihoods = new double[GLs.size()+1][];
|
|
||||||
//
|
return numChr;
|
||||||
// int j = 0;
|
}
|
||||||
// for ( Genotype sample : GLs.values() ) {
|
|
||||||
// j++;
|
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
|
||||||
//
|
int j = logYMatrix.length - 1;
|
||||||
// if ( sample.hasLikelihoods() ) {
|
System.out.printf("-----------------------------------%n");
|
||||||
// //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
|
for (int k=0; k <= numChr; k++) {
|
||||||
// genotypeLikelihoods[j] = sample.getLikelihoods().getAsVector();
|
double posterior = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
|
||||||
// }
|
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
|
||||||
// }
|
}
|
||||||
//
|
}
|
||||||
// return genotypeLikelihoods;
|
|
||||||
// }
|
double softMax(double[] vec) {
|
||||||
//
|
// compute naively log10(10^x[0] + 10^x[1]+...)
|
||||||
// private static class ExactACCache {
|
// return Math.log10(MathUtils.sumLog10(vec));
|
||||||
// double[] kMinus2, kMinus1, kMinus0;
|
|
||||||
//
|
// better approximation: do Jacobian logarithm function on data pairs
|
||||||
// private static double[] create(int n, double defaultValue) {
|
double a = softMaxPair(vec[0],vec[1]);
|
||||||
// double[] v = new double[n];
|
return softMaxPair(a,vec[2]);
|
||||||
// Arrays.fill(v, defaultValue);
|
}
|
||||||
// return v;
|
|
||||||
// }
|
static public double softMaxPair(double x, double y) {
|
||||||
//
|
if (Double.isInfinite(x))
|
||||||
// public ExactACCache(int nSamples, double defaultValue) {
|
return y;
|
||||||
// kMinus2 = create(nSamples, defaultValue);
|
|
||||||
// kMinus1 = create(nSamples, defaultValue);
|
if (Double.isInfinite(y))
|
||||||
// kMinus0 = create(nSamples, defaultValue);
|
return x;
|
||||||
// }
|
|
||||||
//
|
if (y >= x + MAX_JACOBIAN_TOLERANCE)
|
||||||
// public void rotate() {
|
return y;
|
||||||
// double[] tmp = kMinus2;
|
if (x >= y + MAX_JACOBIAN_TOLERANCE)
|
||||||
// kMinus2 = kMinus1;
|
return x;
|
||||||
// kMinus1 = kMinus0;
|
|
||||||
// kMinus0 = tmp;
|
// OK, so |y-x| < tol: we use the following identity then:
|
||||||
// }
|
// we need to compute log10(10^x + 10^y)
|
||||||
//
|
// By Jacobian logarithm identity, this is equal to
|
||||||
// public double[] getkMinus2() {
|
// max(x,y) + log10(1+10^-abs(x-y))
|
||||||
// return kMinus2;
|
// we compute the second term as a table lookup
|
||||||
// }
|
// with integer quantization
|
||||||
//
|
double diff = Math.abs(x-y);
|
||||||
// public double[] getkMinus1() {
|
double t1 =x;
|
||||||
// return kMinus1;
|
if (y > x)
|
||||||
// }
|
t1 = y;
|
||||||
//
|
// t has max(x,y)
|
||||||
// public double[] getkMinus0() {
|
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||||
// return kMinus0;
|
int ind = (int)Math.round(diff/JACOBIAN_LOG_TABLE_STEP);
|
||||||
// }
|
double t2 = jacobianLogTable[ind];
|
||||||
// }
|
|
||||||
//
|
// gdebug+
|
||||||
// public void madByAC(ReferenceContext ref,
|
//double z =Math.log10(1+Math.pow(10.0,-diff));
|
||||||
// Map<String, Genotype> GLs,
|
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
|
||||||
// double[] log10AlleleFrequencyPriors,
|
//gdebug-
|
||||||
// double[] log10AlleleFrequencyPosteriors) {
|
return t1+t2;
|
||||||
// // todo -- remove me after testing
|
// return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
|
||||||
// double[] gsPosteriors = log10AlleleFrequencyPosteriors;
|
}
|
||||||
// if ( COMPARE_TO_GS ) {
|
}
|
||||||
// gsPosteriors = log10AlleleFrequencyPosteriors.clone();
|
|
||||||
// gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// int numSamples = GLs.size();
|
|
||||||
// int numChr = 2*numSamples;
|
|
||||||
// double[][] genotypeLikelihoods = getGLs(GLs); // todo -- remove me, not sure this is helping
|
|
||||||
//
|
|
||||||
// // set posteriors to negative infinity by default:
|
|
||||||
// Arrays.fill(log10AlleleFrequencyPosteriors, Double.NEGATIVE_INFINITY);
|
|
||||||
//
|
|
||||||
// // todo -- replace this matrix with 3 vectors (k, k-1, k-2) and cycle through them
|
|
||||||
// // todo -- this is *CRITICAL* to reduce the algorithm to a true ~linear algorithm
|
|
||||||
// double[][] logYMatrix = new double[1+numSamples][1+numChr];
|
|
||||||
// for (int i=0; i <= numSamples; i++) // initialize
|
|
||||||
// Arrays.fill(logYMatrix[i], Double.NEGATIVE_INFINITY);
|
|
||||||
// logYMatrix[0][0] = 0.0; // the zero case
|
|
||||||
//
|
|
||||||
// double maxLog10L = Double.NEGATIVE_INFINITY;
|
|
||||||
// boolean done = false;
|
|
||||||
// int lastK = -1;
|
|
||||||
//
|
|
||||||
// // todo -- we may be able to start second loop some way down the calculation, since GdAs loop only
|
|
||||||
// // todo -- considers part of the matrix as well
|
|
||||||
// for (int k=0; k <= numChr && ! done; k++ ) {
|
|
||||||
// if ( k == 0 ) {
|
|
||||||
// // special case for k = 0
|
|
||||||
// for ( int j=1; j <= numSamples; j++ ) {
|
|
||||||
// logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
|
|
||||||
// }
|
|
||||||
// } else { // k > 0
|
|
||||||
// for ( int j=1; j <= numSamples; j++ ) {
|
|
||||||
// double[] gl = genotypeLikelihoods[j];
|
|
||||||
// double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
|
|
||||||
//
|
|
||||||
// double[] logNumerator = {Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY};
|
|
||||||
// if (k < 2*j-1)
|
|
||||||
// logNumerator[0] = log10Cache[2*j-k] + log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
|
|
||||||
// gl[GenotypeType.AA.ordinal()];
|
|
||||||
//
|
|
||||||
// if (k < 2*j)
|
|
||||||
// logNumerator[1] = log10Cache[2*k] + log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
|
|
||||||
// gl[GenotypeType.AB.ordinal()];
|
|
||||||
//
|
|
||||||
// if (k > 1)
|
|
||||||
// logNumerator[2] = log10Cache[k] + log10Cache[k-1] + logYMatrix[j-1][k-2] +
|
|
||||||
// gl[GenotypeType.BB.ordinal()];
|
|
||||||
//
|
|
||||||
// logYMatrix[j][k] = softMax(logNumerator) - logDenominator;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // update the posteriors vector
|
|
||||||
// double log10LofK = logYMatrix[numSamples][k];
|
|
||||||
// log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
|
|
||||||
//
|
|
||||||
// // can we abort early?
|
|
||||||
// lastK = k;
|
|
||||||
// maxLog10L = Math.max(maxLog10L, log10LofK);
|
|
||||||
// if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
|
||||||
// if ( PRINT_MAD_AC_POSTERIORS )
|
|
||||||
// System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
|
|
||||||
// done = true;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if ( PRINT_MAD_AC_POSTERIORS ) {
|
|
||||||
// System.out.printf("----------------------------------------%n");
|
|
||||||
// for (int k=0; k <= numChr; k++) {
|
|
||||||
// System.out.printf(" %d\t%.2f\t%.2f\t%b%n", k,
|
|
||||||
// log10AlleleFrequencyPosteriors[k], gsPosteriors[k],
|
|
||||||
// log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]);
|
|
||||||
// }
|
|
||||||
// double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]);
|
|
||||||
// double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]);
|
|
||||||
// System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n",
|
|
||||||
// ref.getLocus().getStart(), lastK, log10thisPVar, log10gsPVar, log10thisPVar - log10gsPVar);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if ( PRINT_LIKELIHOODS ) printLikelihoods(numChr, logYMatrix, log10AlleleFrequencyPriors);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// public void gdaN2GoldStandard(Map<String, Genotype> GLs,
|
|
||||||
// double[] log10AlleleFrequencyPriors,
|
|
||||||
// double[] log10AlleleFrequencyPosteriors) {
|
|
||||||
// int numSamples = GLs.size();
|
|
||||||
// int numChr = 2*numSamples;
|
|
||||||
//
|
|
||||||
// double[][] logYMatrix = new double[1+numSamples][1+numChr];
|
|
||||||
//
|
|
||||||
// for (int i=0; i <=numSamples; i++)
|
|
||||||
// for (int j=0; j <=numChr; j++)
|
|
||||||
// logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
|
|
||||||
//
|
|
||||||
// //YMatrix[0][0] = 1.0;
|
|
||||||
// logYMatrix[0][0] = 0.0;
|
|
||||||
// int j=0;
|
|
||||||
//
|
|
||||||
// for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
|
|
||||||
// j++;
|
|
||||||
//
|
|
||||||
// if ( !sample.getValue().hasLikelihoods() )
|
|
||||||
// continue;
|
|
||||||
//
|
|
||||||
// //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
|
|
||||||
// double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector();
|
|
||||||
// //double logDenominator = Math.log10(2.0*j*(2.0*j-1));
|
|
||||||
// double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
|
|
||||||
//
|
|
||||||
// // special treatment for k=0: iteration reduces to:
|
|
||||||
// //YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
|
||||||
// logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
|
||||||
//
|
|
||||||
// for (int k=1; k <= 2*j; k++ ) {
|
|
||||||
//
|
|
||||||
// //double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
|
||||||
// double logNumerator[];
|
|
||||||
// logNumerator = new double[3];
|
|
||||||
// if (k < 2*j-1)
|
|
||||||
// logNumerator[0] = log10Cache[2*j-k] + log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
|
|
||||||
// genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
|
||||||
// else
|
|
||||||
// logNumerator[0] = Double.NEGATIVE_INFINITY;
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// if (k < 2*j)
|
|
||||||
// logNumerator[1] = log10Cache[2*k] + log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
|
|
||||||
// genotypeLikelihoods[GenotypeType.AB.ordinal()];
|
|
||||||
// else
|
|
||||||
// logNumerator[1] = Double.NEGATIVE_INFINITY;
|
|
||||||
//
|
|
||||||
// if (k > 1)
|
|
||||||
// logNumerator[2] = log10Cache[k] + log10Cache[k-1] + logYMatrix[j-1][k-2] +
|
|
||||||
// genotypeLikelihoods[GenotypeType.BB.ordinal()];
|
|
||||||
// else
|
|
||||||
// logNumerator[2] = Double.NEGATIVE_INFINITY;
|
|
||||||
//
|
|
||||||
// double logNum = softMax(logNumerator);
|
|
||||||
//
|
|
||||||
// //YMatrix[j][k] = num/den;
|
|
||||||
// logYMatrix[j][k] = logNum - logDenominator;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// for (int k=0; k <= numChr; k++)
|
|
||||||
// log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
|
|
||||||
//
|
|
||||||
// if ( PRINT_LIKELIHOODS ) printLikelihoods(numChr, logYMatrix, log10AlleleFrequencyPriors);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
|
|
||||||
// int j = logYMatrix.length - 1;
|
|
||||||
// System.out.printf("-----------------------------------%n");
|
|
||||||
// for (int k=0; k <= numChr; k++) {
|
|
||||||
// double posterior = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
|
|
||||||
// System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// double softMax(double[] vec) {
|
|
||||||
// // compute naively log10(10^x[0] + 10^x[1]+...)
|
|
||||||
// // return Math.log10(MathUtils.sumLog10(vec));
|
|
||||||
//
|
|
||||||
// // better approximation: do Jacobian logarithm function on data pairs
|
|
||||||
// double a = softMaxPair(vec[0],vec[1]);
|
|
||||||
// return softMaxPair(a,vec[2]);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// static public double softMaxPair(double x, double y) {
|
|
||||||
// if (Double.isInfinite(x))
|
|
||||||
// return y;
|
|
||||||
//
|
|
||||||
// if (Double.isInfinite(y))
|
|
||||||
// return x;
|
|
||||||
//
|
|
||||||
// if (y >= x + MAX_JACOBIAN_TOLERANCE)
|
|
||||||
// return y;
|
|
||||||
// if (x >= y + MAX_JACOBIAN_TOLERANCE)
|
|
||||||
// return x;
|
|
||||||
//
|
|
||||||
// // OK, so |y-x| < tol: we use the following identity then:
|
|
||||||
// // we need to compute log10(10^x + 10^y)
|
|
||||||
// // By Jacobian logarithm identity, this is equal to
|
|
||||||
// // max(x,y) + log10(1+10^-abs(x-y))
|
|
||||||
// // we compute the second term as a table lookup
|
|
||||||
// // with integer quantization
|
|
||||||
// double diff = Math.abs(x-y);
|
|
||||||
// double t1 =x;
|
|
||||||
// if (y > x)
|
|
||||||
// t1 = y;
|
|
||||||
// // t has max(x,y)
|
|
||||||
// // we have pre-stored correction for 0,0.1,0.2,... 10.0
|
|
||||||
// int ind = (int)Math.round(diff/JACOBIAN_LOG_TABLE_STEP);
|
|
||||||
// double t2 = jacobianLogTable[ind];
|
|
||||||
//
|
|
||||||
// // gdebug+
|
|
||||||
// //double z =Math.log10(1+Math.pow(10.0,-diff));
|
|
||||||
// //System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
|
|
||||||
// //gdebug-
|
|
||||||
// return t1+t2;
|
|
||||||
// // return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// /**
|
|
||||||
// * Can be overridden by concrete subclasses
|
|
||||||
// * @param vc variant context with genotype likelihoods
|
|
||||||
// * @param log10AlleleFrequencyPosteriors allele frequency results
|
|
||||||
// * @param AFofMaxLikelihood allele frequency of max likelihood
|
|
||||||
// *
|
|
||||||
// * @return calls
|
|
||||||
// */
|
|
||||||
// public Map<String, Genotype> assignGenotypes(VariantContext vc,
|
|
||||||
// double[] log10AlleleFrequencyPosteriors,
|
|
||||||
// int AFofMaxLikelihood) {
|
|
||||||
// if ( !vc.isVariant() )
|
|
||||||
// throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
|
|
||||||
//
|
|
||||||
// Allele refAllele = vc.getReference();
|
|
||||||
// Allele altAllele = vc.getAlternateAllele(0);
|
|
||||||
//
|
|
||||||
// Map<String, Genotype> GLs = vc.getGenotypes();
|
|
||||||
// double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
|
|
||||||
// int[][] tracebackArray = new int[GLs.size()+1][AFofMaxLikelihood+1];
|
|
||||||
//
|
|
||||||
// ArrayList<String> sampleIndices = new ArrayList<String>();
|
|
||||||
// int sampleIdx = 0;
|
|
||||||
//
|
|
||||||
// // todo - optimize initialization
|
|
||||||
// for (int k=0; k <= AFofMaxLikelihood; k++)
|
|
||||||
// for (int j=0; j <= GLs.size(); j++)
|
|
||||||
// pathMetricArray[j][k] = -1e30;
|
|
||||||
//
|
|
||||||
// pathMetricArray[0][0] = 0.0;
|
|
||||||
//
|
|
||||||
// if (SIMPLE_GREEDY_GENOTYPER) {
|
|
||||||
// sampleIndices.addAll(GLs.keySet());
|
|
||||||
// sampleIdx = GLs.size();
|
|
||||||
// }
|
|
||||||
// else {
|
|
||||||
//
|
|
||||||
// for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
|
|
||||||
// if ( !sample.getValue().hasLikelihoods() )
|
|
||||||
// continue;
|
|
||||||
//
|
|
||||||
// double[] likelihoods = sample.getValue().getLikelihoods().getAsVector();
|
|
||||||
// sampleIndices.add(sample.getKey());
|
|
||||||
//
|
|
||||||
// for (int k=0; k <= AFofMaxLikelihood; k++) {
|
|
||||||
//
|
|
||||||
// double bestMetric = pathMetricArray[sampleIdx][k] + likelihoods[0];
|
|
||||||
// int bestIndex = k;
|
|
||||||
//
|
|
||||||
// if (k>0) {
|
|
||||||
// double m2 = pathMetricArray[sampleIdx][k-1] + likelihoods[1];
|
|
||||||
// if (m2 > bestMetric) {
|
|
||||||
// bestMetric = m2;
|
|
||||||
// bestIndex = k-1;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if (k>1) {
|
|
||||||
// double m2 = pathMetricArray[sampleIdx][k-2] + likelihoods[2];
|
|
||||||
// if (m2 > bestMetric) {
|
|
||||||
// bestMetric = m2;
|
|
||||||
// bestIndex = k-2;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// pathMetricArray[sampleIdx+1][k] = bestMetric;
|
|
||||||
// tracebackArray[sampleIdx+1][k] = bestIndex;
|
|
||||||
// }
|
|
||||||
// sampleIdx++;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// HashMap<String, Genotype> calls = new HashMap<String, Genotype>();
|
|
||||||
//
|
|
||||||
// int startIdx = AFofMaxLikelihood;
|
|
||||||
// for (int k = sampleIdx; k > 0; k--) {
|
|
||||||
// int bestGTguess;
|
|
||||||
// String sample = sampleIndices.get(k-1);
|
|
||||||
// Genotype g = GLs.get(sample);
|
|
||||||
// if ( !g.hasLikelihoods() )
|
|
||||||
// continue;
|
|
||||||
//
|
|
||||||
// if (SIMPLE_GREEDY_GENOTYPER)
|
|
||||||
// bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
|
|
||||||
// else {
|
|
||||||
// int newIdx = tracebackArray[k][startIdx];
|
|
||||||
// bestGTguess = startIdx - newIdx;
|
|
||||||
// startIdx = newIdx;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
|
||||||
//
|
|
||||||
// double qual;
|
|
||||||
// double[] likelihoods = g.getLikelihoods().getAsVector();
|
|
||||||
//
|
|
||||||
// if (bestGTguess == 0) {
|
|
||||||
// myAlleles.add(refAllele);
|
|
||||||
// myAlleles.add(refAllele);
|
|
||||||
// qual = likelihoods[0] - Math.max(likelihoods[1], likelihoods[2]);
|
|
||||||
// } else if(bestGTguess == 1) {
|
|
||||||
// myAlleles.add(refAllele);
|
|
||||||
// myAlleles.add(altAllele);
|
|
||||||
// qual = likelihoods[1] - Math.max(likelihoods[0], likelihoods[2]);
|
|
||||||
//
|
|
||||||
// } else {
|
|
||||||
// myAlleles.add(altAllele);
|
|
||||||
// myAlleles.add(altAllele);
|
|
||||||
// qual = likelihoods[2] - Math.max(likelihoods[1], likelihoods[0]);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// if (qual < 0) {
|
|
||||||
// // QUAL can be negative if the chosen genotype is not the most likely one individually.
|
|
||||||
// // In this case, we compute the actual genotype probability and QUAL is the likelihood of it not being the chosen on
|
|
||||||
// double[] normalized = MathUtils.normalizeFromLog10(likelihoods);
|
|
||||||
// double chosenGenotype = normalized[bestGTguess];
|
|
||||||
// qual = -1.0 * Math.log10(1.0 - chosenGenotype);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// calls.put(sample, new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return calls;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//}
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue