Slightly optimized calculation for ~linear exact model, as well as totally incorrect banded calculation, for future development, if this proves useful.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5382 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-03-06 18:47:08 +00:00
parent fc5a071de2
commit 5b8fdc5b1f
1 changed files with 84 additions and 79 deletions

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.exceptions.UserException;
import sun.reflect.generics.reflectiveObjects.NotImplementedException;
import java.util.*;
import java.io.PrintStream;
@ -44,7 +45,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private final static boolean DEBUG = false;
private final static boolean PRINT_LIKELIHOODS = false;
private final static int N_CYCLES = 1;
private SimpleTimer timerExpt = new SimpleTimer("linearExact");
private SimpleTimer timerExpt = new SimpleTimer("linearExactBanded");
private SimpleTimer timerGS = new SimpleTimer("linearExactGS");
private final static boolean COMPARE_TO_GS = false;
@ -102,11 +103,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( N_CYCLES > 1 ) {
for ( int i = 0; i < N_CYCLES; i++) {
timerGS.restart();
linearExactClean(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
timerGS.stop();
timerExpt.restart();
linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
linearExactBanded(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
timerExpt.stop();
}
@ -137,8 +138,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( ! eq || PRINT_LIKELIHOODS ) {
System.out.printf("----------------------------------------%n");
for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) {
double x = log10AlleleFrequencyPosteriors[k];
System.out.printf(" %d\t%.2f\t%.2f\t%b%n", k,
log10AlleleFrequencyPosteriors[k], gsPosteriors[k],
x < -1e10 ? Double.NEGATIVE_INFINITY : x, gsPosteriors[k],
log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]);
}
System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n",
@ -207,6 +209,84 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
}
// now with banding
public int linearExactBanded(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
throw new NotImplementedException();
// final int numSamples = GLs.size();
// final int numChr = 2*numSamples;
// final double[][] genotypeLikelihoods = getGLs(GLs);
//
// final ExactACCache logY = new ExactACCache(numSamples+1);
// logY.getkMinus0()[0] = 0.0; // the zero case
//
// double maxLog10L = Double.NEGATIVE_INFINITY;
// boolean done = false;
// int lastK = -1;
// final int BAND_SIZE = 10;
//
// for (int k=0; k <= numChr && ! done; k++ ) {
// final double[] kMinus0 = logY.getkMinus0();
// int jStart = Math.max(k - BAND_SIZE, 1);
// int jStop = Math.min(k + BAND_SIZE, numSamples);
//
// if ( k == 0 ) { // special case for k = 0
// for ( int j=1; j <= numSamples; j++ ) {
// kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
// }
// } else { // k > 0
// final double[] kMinus1 = logY.getkMinus1();
// final double[] kMinus2 = logY.getkMinus2();
// Arrays.fill(kMinus0,0);
//
// for ( int j = jStart; j <= jStop; j++ ) {
// final double[] gl = genotypeLikelihoods[j];
// final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
//
// double aa = Double.NEGATIVE_INFINITY;
// double ab = Double.NEGATIVE_INFINITY;
// if (k < 2*j-1)
// aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()];
//
// if (k < 2*j)
// ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()];
//
// double log10Max;
// if (k > 1) {
// final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()];
// log10Max = approximateLog10SumLog10(aa, ab, bb);
// } else {
// // we know we aren't considering the BB case, so we can use an optimized log10 function
// log10Max = approximateLog10SumLog10(aa, ab);
// }
//
// // finally, update the L(j,k) value
// kMinus0[j] = log10Max - logDenominator;
//
// String offset = Utils.dupString(' ',k);
// System.out.printf("%s%3d %3d %.2f%n", offset, k, j, kMinus0[j]);
// }
// }
//
// // update the posteriors vector
// final double log10LofK = kMinus0[jStop];
// log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
//
// // can we abort early?
// lastK = k;
// maxLog10L = Math.max(maxLog10L, log10LofK);
// if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
// if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
// done = true;
// }
//
// logY.rotate();
// }
//
// return lastK;
}
public int linearExact(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
@ -310,81 +390,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
return big + jacobianLogTable[ind];
}
public int linearExactClean(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
int numSamples = GLs.size();
int numChr = 2*numSamples;
double[][] genotypeLikelihoods = getGLs(GLs); // todo -- remove me, not sure this is helping
double[] logNumerator = new double[3];
// set posteriors to negative infinity by default:
//Arrays.fill(log10AlleleFrequencyPosteriors, Double.NEGATIVE_INFINITY);
ExactACCache logY = new ExactACCache(numSamples+1);
logY.getkMinus0()[0] = 0.0; // the zero case
double maxLog10L = Double.NEGATIVE_INFINITY;
boolean done = false;
int lastK = -1;
// todo -- we may be able to start second loop some way down the calculation, since GdAs loop only
// todo -- considers part of the matrix as well
for (int k=0; k <= numChr && ! done; k++ ) {
double[] kMinus0 = logY.getkMinus0();
double[] kMinus1 = logY.getkMinus1();
double[] kMinus2 = logY.getkMinus2();
if ( k == 0 ) {
// special case for k = 0
for ( int j=1; j <= numSamples; j++ ) {
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
}
} else { // k > 0
for ( int j=1; j <= numSamples; j++ ) {
double[] gl = genotypeLikelihoods[j];
double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
if (k < 2*j-1)
logNumerator[0] = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] +
gl[GenotypeType.AA.ordinal()];
else
logNumerator[0] = Double.NEGATIVE_INFINITY;
if (k < 2*j)
logNumerator[1] = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] +
gl[GenotypeType.AB.ordinal()];
else
logNumerator[1] = Double.NEGATIVE_INFINITY;
if (k > 1)
logNumerator[2] = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] +
gl[GenotypeType.BB.ordinal()];
else
logNumerator[2] = Double.NEGATIVE_INFINITY;
kMinus0[j] = softMax(logNumerator) - logDenominator;
}
}
// update the posteriors vector
double log10LofK = kMinus0[numSamples];
log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
// can we abort early?
lastK = k;
maxLog10L = Math.max(maxLog10L, log10LofK);
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
done = true;
}
logY.rotate();
}
return lastK;
}
/**
* Can be overridden by concrete subclasses