Refactored the static lookup arrays in MathUtils (log10Cache, log10FactorialCache, jacobianLogTable)

-They are now only computed when necessary
 -Log10Cache is dynamically resizable, either by calling get() on an out-of-range value or by calling ensureCacheContains
 -Log10FactorialCache and JacobianLogTable are initialized to a fixed size on first access and are not resizable
 -Addresses PT 69124396
This commit is contained in:
Phillip Dexheimer 2014-05-18 20:26:24 -04:00
parent b77589696e
commit c15e6fcc0e
6 changed files with 118 additions and 43 deletions

View File

@ -247,7 +247,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
for (int i=0; i < readHaplotypeLikelihoods.length; i++) {
double acc[] = new double[alleleList.size()];
for (int k=0; k < acc.length; k++ )
acc[k] = readHaplotypeLikelihoods[i][k] + MathUtils.log10Cache[currentCnt[k]]-LOG10_PLOIDY;
acc[k] = readHaplotypeLikelihoods[i][k] + MathUtils.Log10Cache.get(currentCnt[k])-LOG10_PLOIDY;
p1 += MathUtils.log10sumLog10(acc);
}

View File

@ -291,8 +291,8 @@ public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLi
continue;
final double acc[] = new double[ACset.getACcounts().getCounts().length];
for (int k=0; k < acc.length; k++ )
acc[k] = qualLikelihoodCache[BaseUtils.simpleBaseToBaseIndex(alleleList.get(k).getBases()[0])][BaseUtils.simpleBaseToBaseIndex(obsBase)][qual] +MathUtils.log10Cache[ACset.getACcounts().getCounts()[k]]
- LOG10_PLOIDY;
acc[k] = qualLikelihoodCache[BaseUtils.simpleBaseToBaseIndex(alleleList.get(k).getBases()[0])][BaseUtils.simpleBaseToBaseIndex(obsBase)][qual] +
MathUtils.Log10Cache.get(ACset.getACcounts().getCounts()[k]) - LOG10_PLOIDY;
p1 += MathUtils.log10sumLog10(acc);
}
}

View File

@ -155,6 +155,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
this.toolkit = toolkit;
this.sampleNames = sampleNames != null ? sampleNames : toolkit.getSampleDB().getSampleNames();
numberOfGenomes = this.sampleNames.size() * configuration.samplePloidy;
MathUtils.Log10Cache.ensureCacheContains(numberOfGenomes * 2);
log10AlleleFrequencyPriorsSNPs = computeAlleleFrequencyPriors(numberOfGenomes,
configuration.snpHeterozygosity,configuration.inputPrior);
log10AlleleFrequencyPriorsIndels = computeAlleleFrequencyPriors(numberOfGenomes,

View File

@ -56,6 +56,9 @@ import htsjdk.variant.variantcontext.VariantContext;
import java.util.*;
public abstract class DiploidExactAFCalc extends ExactAFCalc {
private static final double LOG10_OF_2 = MathUtils.Log10Cache.get(2);
public DiploidExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
@ -247,11 +250,11 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
if ( totalK < 2*j-1 ) {
final double[] gl = genotypeLikelihoods.get(j);
final double conformationValue = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.getLog10Likelihoods()[j-1] + gl[HOM_REF_INDEX];
final double conformationValue = MathUtils.Log10Cache.get(2*j-totalK) + MathUtils.Log10Cache.get(2*j-totalK-1) + set.getLog10Likelihoods()[j-1] + gl[HOM_REF_INDEX];
set.getLog10Likelihoods()[j] = MathUtils.approximateLog10SumLog10(set.getLog10Likelihoods()[j], conformationValue);
}
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
final double logDenominator = MathUtils.Log10Cache.get(2*j) + MathUtils.Log10Cache.get(2*j-1);
set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j] - logDenominator;
}
@ -303,19 +306,19 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
// the AX het case
if ( alleles.alleleIndex1 == 0 )
return MathUtils.log10Cache[2*ACcounts[alleles.alleleIndex2-1]] + MathUtils.log10Cache[2*j-totalK];
return MathUtils.Log10Cache.get(2*ACcounts[alleles.alleleIndex2-1]) + MathUtils.Log10Cache.get(2*j-totalK);
final int k_i = ACcounts[alleles.alleleIndex1-1];
// the hom var case (e.g. BB, CC, DD)
final double coeff;
if ( alleles.alleleIndex1 == alleles.alleleIndex2 ) {
coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1];
coeff = MathUtils.Log10Cache.get(k_i) + MathUtils.Log10Cache.get(k_i - 1);
}
// the het non-ref case (e.g. BC, BD, CD)
else {
final int k_j = ACcounts[alleles.alleleIndex2-1];
coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j];
coeff = LOG10_OF_2 + MathUtils.Log10Cache.get(k_i) + MathUtils.Log10Cache.get(k_j);
}
return coeff;

View File

@ -150,19 +150,19 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
for ( int j=1; j <= numSamples; j++ ) {
final double[] gl = genotypeLikelihoods.get(j);
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
final double logDenominator = MathUtils.Log10Cache.get(2*j) + MathUtils.Log10Cache.get(2*j-1);
double aa = Double.NEGATIVE_INFINITY;
double ab = Double.NEGATIVE_INFINITY;
if (k < 2*j-1)
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0];
aa = MathUtils.Log10Cache.get(2*j-k) + MathUtils.Log10Cache.get(2*j-k-1) + kMinus0[j-1] + gl[0];
if (k < 2*j)
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1];
ab = MathUtils.Log10Cache.get(2*k) + MathUtils.Log10Cache.get(2*j-k)+ kMinus1[j-1] + gl[1];
double log10Max;
if (k > 1) {
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2];
final double bb = MathUtils.Log10Cache.get(k) + MathUtils.Log10Cache.get(k-1) + kMinus2[j-1] + gl[2];
log10Max = MathUtils.approximateLog10SumLog10(aa, ab, bb);
} else {
// we know we aren't considering the BB case, so we can use an optimized log10 function

View File

@ -48,16 +48,6 @@ public class MathUtils {
private MathUtils() {
}
public static final double[] log10Cache;
public static final double[] log10FactorialCache;
private static final double[] jacobianLogTable;
private static final double JACOBIAN_LOG_TABLE_STEP = 0.0001;
private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP;
private static final double MAX_JACOBIAN_TOLERANCE = 8.0;
private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1;
private static final int MAXN = 100_000;
private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients
/**
* The smallest log10 value we'll emit from normalizeFromLog10 and other functions
* where the real-space value is 0.0.
@ -69,22 +59,46 @@ public class MathUtils {
private static final double NATURAL_LOG_OF_TEN = Math.log(10.0);
private static final double SQUARE_ROOT_OF_TWO_TIMES_PI = Math.sqrt(2.0 * Math.PI);
static {
log10Cache = new double[LOG10_CACHE_SIZE];
log10FactorialCache = new double[LOG10_CACHE_SIZE];
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
log10Cache[0] = Double.NEGATIVE_INFINITY;
log10FactorialCache[0] = 0.0;
for (int k = 1; k < LOG10_CACHE_SIZE; k++) {
log10Cache[k] = Math.log10(k);
log10FactorialCache[k] = log10FactorialCache[k-1] + log10Cache[k];
/**
* A helper class to maintain a cache of log10 values
*/
public static class Log10Cache {
/**
* Get the value of log10(n), expanding the cache as necessary
* @param n operand
* @return log10(n)
*/
public static double get(final int n) {
if (n < 0)
throw new ReviewedGATKException(String.format("Can't take the log of a negative number: %d", n));
if (n >= cache.length)
ensureCacheContains(Math.max(n+10, 2*cache.length));
/*
Array lookups are not atomic. It's possible that the reference to cache could be
changed between the time the reference is loaded and the data is fetched from the correct
offset. However, the value retrieved can't change, and it's guaranteed to be present in the
old reference by the conditional above.
*/
return cache[n];
}
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
/**
* Ensures that the cache contains a value for n. After completion of ensureCacheContains(n),
* #get(n) is guaranteed to return without causing a cache expansion
* @param n desired value to be precomputed
*/
public static synchronized void ensureCacheContains(final int n) {
if (n < cache.length)
return;
final double[] newCache = new double[n + 1];
System.arraycopy(cache, 0, newCache, 0, cache.length);
for (int i=cache.length; i < newCache.length; i++)
newCache[i] = Math.log10(i);
cache = newCache;
}
//initialize with the special case: log10(0) = NEGATIVE_INFINITY
private static double[] cache = new double[] { Double.NEGATIVE_INFINITY };
}
/**
@ -98,6 +112,34 @@ public class MathUtils {
return GenomeAnalysisEngine.getRandomGenerator().nextInt(max - min + 1) + min;
}
/**
* Encapsulates the second term of Jacobian log identity for differences up to MAX_TOLERANCE
*/
private static class JacobianLogTable {
public static final double MAX_TOLERANCE = 8.0;
public static double get(final double difference) {
if (cache == null)
initialize();
final int index = fastRound(difference * INV_STEP);
return cache[index];
}
private static synchronized void initialize() {
if (cache == null) {
final int tableSize = (int) (MAX_TOLERANCE / TABLE_STEP) + 1;
cache = new double[tableSize];
for (int k = 0; k < cache.length; k++)
cache[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * TABLE_STEP));
}
}
private static final double TABLE_STEP = 0.0001;
private static final double INV_STEP = 1.0 / TABLE_STEP;
private static double[] cache = null;
}
// A fast implementation of the Math.round() method. This method does not perform
// under/overflow checking, so this shouldn't be used in the general case (but is fine
// if one is already make those checks before calling in to the rounding).
@ -119,10 +161,9 @@ public class MathUtils {
continue;
final double diff = approxSum - vals[i];
if (diff < MathUtils.MAX_JACOBIAN_TOLERANCE) {
if (diff < JacobianLogTable.MAX_TOLERANCE) {
// See notes from the 2-inout implementation below
final int ind = fastRound(diff * MathUtils.JACOBIAN_LOG_TABLE_INV_STEP); // hard rounding
approxSum += MathUtils.jacobianLogTable[ind];
approxSum += JacobianLogTable.get(diff);
}
}
@ -145,7 +186,7 @@ public class MathUtils {
return big;
final double diff = big - small;
if (diff >= MathUtils.MAX_JACOBIAN_TOLERANCE)
if (diff >= JacobianLogTable.MAX_TOLERANCE)
return big;
// OK, so |y-x| < tol: we use the following identity then:
@ -154,8 +195,7 @@ public class MathUtils {
// max(x,y) + log10(1+10^-abs(x-y))
// we compute the second term as a table lookup with integer quantization
// we have pre-stored correction for 0,0.1,0.2,... 10.0
final int ind = fastRound(diff * MathUtils.JACOBIAN_LOG_TABLE_INV_STEP); // hard rounding
return big + MathUtils.jacobianLogTable[ind];
return big + JacobianLogTable.get(diff);
}
public static double sum(final double[] values) {
@ -1408,10 +1448,41 @@ public class MathUtils {
}
public static double log10Factorial(final int x) {
if (x >= log10FactorialCache.length || x < 0)
if (x >= Log10FactorialCache.size() || x < 0)
return log10Gamma(x + 1);
else
return log10FactorialCache[x];
return Log10FactorialCache.get(x);
}
/**
* Wrapper class so that the log10Factorial array is only calculated if it's used
*/
private static class Log10FactorialCache {
/**
* The size of the precomputed cache. Must be a positive number!
*/
private static final int CACHE_SIZE = 10_000;
public static int size() { return CACHE_SIZE; }
public static double get(final int n) {
if (cache == null)
initialize();
return cache[n];
}
private static synchronized void initialize() {
if (cache == null) {
Log10Cache.ensureCacheContains(CACHE_SIZE);
cache = new double[CACHE_SIZE];
cache[0] = 0.0;
for (int k = 1; k < cache.length; k++)
cache[k] = cache[k-1] + Log10Cache.get(k);
}
}
private static double[] cache = null;
}
/**