Slightly optimized calculation for ~linear exact model, as well as totally incorrect banded calculation, for future development, if this proves useful.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5382 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
fc5a071de2
commit
5b8fdc5b1f
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import sun.reflect.generics.reflectiveObjects.NotImplementedException;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
@ -44,7 +45,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
private final static boolean PRINT_LIKELIHOODS = false;
|
private final static boolean PRINT_LIKELIHOODS = false;
|
||||||
private final static int N_CYCLES = 1;
|
private final static int N_CYCLES = 1;
|
||||||
private SimpleTimer timerExpt = new SimpleTimer("linearExact");
|
private SimpleTimer timerExpt = new SimpleTimer("linearExactBanded");
|
||||||
private SimpleTimer timerGS = new SimpleTimer("linearExactGS");
|
private SimpleTimer timerGS = new SimpleTimer("linearExactGS");
|
||||||
private final static boolean COMPARE_TO_GS = false;
|
private final static boolean COMPARE_TO_GS = false;
|
||||||
|
|
||||||
|
|
@ -102,11 +103,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
if ( N_CYCLES > 1 ) {
|
if ( N_CYCLES > 1 ) {
|
||||||
for ( int i = 0; i < N_CYCLES; i++) {
|
for ( int i = 0; i < N_CYCLES; i++) {
|
||||||
timerGS.restart();
|
timerGS.restart();
|
||||||
linearExactClean(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
|
linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
|
||||||
timerGS.stop();
|
timerGS.stop();
|
||||||
|
|
||||||
timerExpt.restart();
|
timerExpt.restart();
|
||||||
linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
|
linearExactBanded(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
|
||||||
timerExpt.stop();
|
timerExpt.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -137,8 +138,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
if ( ! eq || PRINT_LIKELIHOODS ) {
|
if ( ! eq || PRINT_LIKELIHOODS ) {
|
||||||
System.out.printf("----------------------------------------%n");
|
System.out.printf("----------------------------------------%n");
|
||||||
for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) {
|
for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) {
|
||||||
|
double x = log10AlleleFrequencyPosteriors[k];
|
||||||
System.out.printf(" %d\t%.2f\t%.2f\t%b%n", k,
|
System.out.printf(" %d\t%.2f\t%.2f\t%b%n", k,
|
||||||
log10AlleleFrequencyPosteriors[k], gsPosteriors[k],
|
x < -1e10 ? Double.NEGATIVE_INFINITY : x, gsPosteriors[k],
|
||||||
log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]);
|
log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]);
|
||||||
}
|
}
|
||||||
System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n",
|
System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n",
|
||||||
|
|
@ -207,6 +209,84 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// now with banding
|
||||||
|
public int linearExactBanded(Map<String, Genotype> GLs,
|
||||||
|
double[] log10AlleleFrequencyPriors,
|
||||||
|
double[] log10AlleleFrequencyPosteriors) {
|
||||||
|
throw new NotImplementedException();
|
||||||
|
// final int numSamples = GLs.size();
|
||||||
|
// final int numChr = 2*numSamples;
|
||||||
|
// final double[][] genotypeLikelihoods = getGLs(GLs);
|
||||||
|
//
|
||||||
|
// final ExactACCache logY = new ExactACCache(numSamples+1);
|
||||||
|
// logY.getkMinus0()[0] = 0.0; // the zero case
|
||||||
|
//
|
||||||
|
// double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||||
|
// boolean done = false;
|
||||||
|
// int lastK = -1;
|
||||||
|
// final int BAND_SIZE = 10;
|
||||||
|
//
|
||||||
|
// for (int k=0; k <= numChr && ! done; k++ ) {
|
||||||
|
// final double[] kMinus0 = logY.getkMinus0();
|
||||||
|
// int jStart = Math.max(k - BAND_SIZE, 1);
|
||||||
|
// int jStop = Math.min(k + BAND_SIZE, numSamples);
|
||||||
|
//
|
||||||
|
// if ( k == 0 ) { // special case for k = 0
|
||||||
|
// for ( int j=1; j <= numSamples; j++ ) {
|
||||||
|
// kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
|
||||||
|
// }
|
||||||
|
// } else { // k > 0
|
||||||
|
// final double[] kMinus1 = logY.getkMinus1();
|
||||||
|
// final double[] kMinus2 = logY.getkMinus2();
|
||||||
|
// Arrays.fill(kMinus0,0);
|
||||||
|
//
|
||||||
|
// for ( int j = jStart; j <= jStop; j++ ) {
|
||||||
|
// final double[] gl = genotypeLikelihoods[j];
|
||||||
|
// final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
|
||||||
|
//
|
||||||
|
// double aa = Double.NEGATIVE_INFINITY;
|
||||||
|
// double ab = Double.NEGATIVE_INFINITY;
|
||||||
|
// if (k < 2*j-1)
|
||||||
|
// aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()];
|
||||||
|
//
|
||||||
|
// if (k < 2*j)
|
||||||
|
// ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()];
|
||||||
|
//
|
||||||
|
// double log10Max;
|
||||||
|
// if (k > 1) {
|
||||||
|
// final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()];
|
||||||
|
// log10Max = approximateLog10SumLog10(aa, ab, bb);
|
||||||
|
// } else {
|
||||||
|
// // we know we aren't considering the BB case, so we can use an optimized log10 function
|
||||||
|
// log10Max = approximateLog10SumLog10(aa, ab);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // finally, update the L(j,k) value
|
||||||
|
// kMinus0[j] = log10Max - logDenominator;
|
||||||
|
//
|
||||||
|
// String offset = Utils.dupString(' ',k);
|
||||||
|
// System.out.printf("%s%3d %3d %.2f%n", offset, k, j, kMinus0[j]);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // update the posteriors vector
|
||||||
|
// final double log10LofK = kMinus0[jStop];
|
||||||
|
// log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
|
||||||
|
//
|
||||||
|
// // can we abort early?
|
||||||
|
// lastK = k;
|
||||||
|
// maxLog10L = Math.max(maxLog10L, log10LofK);
|
||||||
|
// if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||||
|
// if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
|
||||||
|
// done = true;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// logY.rotate();
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return lastK;
|
||||||
|
}
|
||||||
|
|
||||||
public int linearExact(Map<String, Genotype> GLs,
|
public int linearExact(Map<String, Genotype> GLs,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
double[] log10AlleleFrequencyPosteriors) {
|
||||||
|
|
@ -310,81 +390,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
return big + jacobianLogTable[ind];
|
return big + jacobianLogTable[ind];
|
||||||
}
|
}
|
||||||
|
|
||||||
public int linearExactClean(Map<String, Genotype> GLs,
|
|
||||||
double[] log10AlleleFrequencyPriors,
|
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
|
||||||
int numSamples = GLs.size();
|
|
||||||
int numChr = 2*numSamples;
|
|
||||||
double[][] genotypeLikelihoods = getGLs(GLs); // todo -- remove me, not sure this is helping
|
|
||||||
double[] logNumerator = new double[3];
|
|
||||||
|
|
||||||
// set posteriors to negative infinity by default:
|
|
||||||
//Arrays.fill(log10AlleleFrequencyPosteriors, Double.NEGATIVE_INFINITY);
|
|
||||||
|
|
||||||
ExactACCache logY = new ExactACCache(numSamples+1);
|
|
||||||
logY.getkMinus0()[0] = 0.0; // the zero case
|
|
||||||
|
|
||||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
|
||||||
boolean done = false;
|
|
||||||
int lastK = -1;
|
|
||||||
|
|
||||||
// todo -- we may be able to start second loop some way down the calculation, since GdAs loop only
|
|
||||||
// todo -- considers part of the matrix as well
|
|
||||||
for (int k=0; k <= numChr && ! done; k++ ) {
|
|
||||||
double[] kMinus0 = logY.getkMinus0();
|
|
||||||
double[] kMinus1 = logY.getkMinus1();
|
|
||||||
double[] kMinus2 = logY.getkMinus2();
|
|
||||||
|
|
||||||
if ( k == 0 ) {
|
|
||||||
// special case for k = 0
|
|
||||||
for ( int j=1; j <= numSamples; j++ ) {
|
|
||||||
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
|
|
||||||
}
|
|
||||||
} else { // k > 0
|
|
||||||
for ( int j=1; j <= numSamples; j++ ) {
|
|
||||||
double[] gl = genotypeLikelihoods[j];
|
|
||||||
double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
|
|
||||||
|
|
||||||
if (k < 2*j-1)
|
|
||||||
logNumerator[0] = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] +
|
|
||||||
gl[GenotypeType.AA.ordinal()];
|
|
||||||
else
|
|
||||||
logNumerator[0] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
if (k < 2*j)
|
|
||||||
logNumerator[1] = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] +
|
|
||||||
gl[GenotypeType.AB.ordinal()];
|
|
||||||
else
|
|
||||||
logNumerator[1] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
if (k > 1)
|
|
||||||
logNumerator[2] = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] +
|
|
||||||
gl[GenotypeType.BB.ordinal()];
|
|
||||||
else
|
|
||||||
logNumerator[2] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
kMinus0[j] = softMax(logNumerator) - logDenominator;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// update the posteriors vector
|
|
||||||
double log10LofK = kMinus0[numSamples];
|
|
||||||
log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
|
|
||||||
|
|
||||||
// can we abort early?
|
|
||||||
lastK = k;
|
|
||||||
maxLog10L = Math.max(maxLog10L, log10LofK);
|
|
||||||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
|
||||||
if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
|
|
||||||
done = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
logY.rotate();
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastK;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Can be overridden by concrete subclasses
|
* Can be overridden by concrete subclasses
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue