Refactored the static lookup arrays in MathUtils (log10Cache, log10FactorialCache, jacobianLogTable)
-They are now only computed when necessary -Log10Cache is dynamically resizable, either by calling get() on an out-of-range value or by calling ensureCacheContains -Log10FactorialCache and JacobianLogTable are initialized to a fixed size on first access and are not resizable -Addresses PT 69124396
This commit is contained in:
parent
b77589696e
commit
c15e6fcc0e
|
|
@ -247,7 +247,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
|||
for (int i=0; i < readHaplotypeLikelihoods.length; i++) {
|
||||
double acc[] = new double[alleleList.size()];
|
||||
for (int k=0; k < acc.length; k++ )
|
||||
acc[k] = readHaplotypeLikelihoods[i][k] + MathUtils.log10Cache[currentCnt[k]]-LOG10_PLOIDY;
|
||||
acc[k] = readHaplotypeLikelihoods[i][k] + MathUtils.Log10Cache.get(currentCnt[k])-LOG10_PLOIDY;
|
||||
p1 += MathUtils.log10sumLog10(acc);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -291,8 +291,8 @@ public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLi
|
|||
continue;
|
||||
final double acc[] = new double[ACset.getACcounts().getCounts().length];
|
||||
for (int k=0; k < acc.length; k++ )
|
||||
acc[k] = qualLikelihoodCache[BaseUtils.simpleBaseToBaseIndex(alleleList.get(k).getBases()[0])][BaseUtils.simpleBaseToBaseIndex(obsBase)][qual] +MathUtils.log10Cache[ACset.getACcounts().getCounts()[k]]
|
||||
- LOG10_PLOIDY;
|
||||
acc[k] = qualLikelihoodCache[BaseUtils.simpleBaseToBaseIndex(alleleList.get(k).getBases()[0])][BaseUtils.simpleBaseToBaseIndex(obsBase)][qual] +
|
||||
MathUtils.Log10Cache.get(ACset.getACcounts().getCounts()[k]) - LOG10_PLOIDY;
|
||||
p1 += MathUtils.log10sumLog10(acc);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -155,6 +155,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
this.toolkit = toolkit;
|
||||
this.sampleNames = sampleNames != null ? sampleNames : toolkit.getSampleDB().getSampleNames();
|
||||
numberOfGenomes = this.sampleNames.size() * configuration.samplePloidy;
|
||||
MathUtils.Log10Cache.ensureCacheContains(numberOfGenomes * 2);
|
||||
log10AlleleFrequencyPriorsSNPs = computeAlleleFrequencyPriors(numberOfGenomes,
|
||||
configuration.snpHeterozygosity,configuration.inputPrior);
|
||||
log10AlleleFrequencyPriorsIndels = computeAlleleFrequencyPriors(numberOfGenomes,
|
||||
|
|
|
|||
|
|
@ -56,6 +56,9 @@ import htsjdk.variant.variantcontext.VariantContext;
|
|||
import java.util.*;
|
||||
|
||||
public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
||||
|
||||
private static final double LOG10_OF_2 = MathUtils.Log10Cache.get(2);
|
||||
|
||||
public DiploidExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, ploidy);
|
||||
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
|
||||
|
|
@ -247,11 +250,11 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
|
||||
if ( totalK < 2*j-1 ) {
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
final double conformationValue = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.getLog10Likelihoods()[j-1] + gl[HOM_REF_INDEX];
|
||||
final double conformationValue = MathUtils.Log10Cache.get(2*j-totalK) + MathUtils.Log10Cache.get(2*j-totalK-1) + set.getLog10Likelihoods()[j-1] + gl[HOM_REF_INDEX];
|
||||
set.getLog10Likelihoods()[j] = MathUtils.approximateLog10SumLog10(set.getLog10Likelihoods()[j], conformationValue);
|
||||
}
|
||||
|
||||
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||
final double logDenominator = MathUtils.Log10Cache.get(2*j) + MathUtils.Log10Cache.get(2*j-1);
|
||||
set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j] - logDenominator;
|
||||
}
|
||||
|
||||
|
|
@ -303,19 +306,19 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
|
||||
// the AX het case
|
||||
if ( alleles.alleleIndex1 == 0 )
|
||||
return MathUtils.log10Cache[2*ACcounts[alleles.alleleIndex2-1]] + MathUtils.log10Cache[2*j-totalK];
|
||||
return MathUtils.Log10Cache.get(2*ACcounts[alleles.alleleIndex2-1]) + MathUtils.Log10Cache.get(2*j-totalK);
|
||||
|
||||
final int k_i = ACcounts[alleles.alleleIndex1-1];
|
||||
|
||||
// the hom var case (e.g. BB, CC, DD)
|
||||
final double coeff;
|
||||
if ( alleles.alleleIndex1 == alleles.alleleIndex2 ) {
|
||||
coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1];
|
||||
coeff = MathUtils.Log10Cache.get(k_i) + MathUtils.Log10Cache.get(k_i - 1);
|
||||
}
|
||||
// the het non-ref case (e.g. BC, BD, CD)
|
||||
else {
|
||||
final int k_j = ACcounts[alleles.alleleIndex2-1];
|
||||
coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j];
|
||||
coeff = LOG10_OF_2 + MathUtils.Log10Cache.get(k_i) + MathUtils.Log10Cache.get(k_j);
|
||||
}
|
||||
|
||||
return coeff;
|
||||
|
|
|
|||
|
|
@ -150,19 +150,19 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
|
|||
|
||||
for ( int j=1; j <= numSamples; j++ ) {
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||
final double logDenominator = MathUtils.Log10Cache.get(2*j) + MathUtils.Log10Cache.get(2*j-1);
|
||||
|
||||
double aa = Double.NEGATIVE_INFINITY;
|
||||
double ab = Double.NEGATIVE_INFINITY;
|
||||
if (k < 2*j-1)
|
||||
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0];
|
||||
aa = MathUtils.Log10Cache.get(2*j-k) + MathUtils.Log10Cache.get(2*j-k-1) + kMinus0[j-1] + gl[0];
|
||||
|
||||
if (k < 2*j)
|
||||
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1];
|
||||
ab = MathUtils.Log10Cache.get(2*k) + MathUtils.Log10Cache.get(2*j-k)+ kMinus1[j-1] + gl[1];
|
||||
|
||||
double log10Max;
|
||||
if (k > 1) {
|
||||
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2];
|
||||
final double bb = MathUtils.Log10Cache.get(k) + MathUtils.Log10Cache.get(k-1) + kMinus2[j-1] + gl[2];
|
||||
log10Max = MathUtils.approximateLog10SumLog10(aa, ab, bb);
|
||||
} else {
|
||||
// we know we aren't considering the BB case, so we can use an optimized log10 function
|
||||
|
|
|
|||
|
|
@ -48,16 +48,6 @@ public class MathUtils {
|
|||
private MathUtils() {
|
||||
}
|
||||
|
||||
public static final double[] log10Cache;
|
||||
public static final double[] log10FactorialCache;
|
||||
private static final double[] jacobianLogTable;
|
||||
private static final double JACOBIAN_LOG_TABLE_STEP = 0.0001;
|
||||
private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP;
|
||||
private static final double MAX_JACOBIAN_TOLERANCE = 8.0;
|
||||
private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1;
|
||||
private static final int MAXN = 100_000;
|
||||
private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients
|
||||
|
||||
/**
|
||||
* The smallest log10 value we'll emit from normalizeFromLog10 and other functions
|
||||
* where the real-space value is 0.0.
|
||||
|
|
@ -69,22 +59,46 @@ public class MathUtils {
|
|||
private static final double NATURAL_LOG_OF_TEN = Math.log(10.0);
|
||||
private static final double SQUARE_ROOT_OF_TWO_TIMES_PI = Math.sqrt(2.0 * Math.PI);
|
||||
|
||||
static {
|
||||
log10Cache = new double[LOG10_CACHE_SIZE];
|
||||
log10FactorialCache = new double[LOG10_CACHE_SIZE];
|
||||
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
|
||||
|
||||
log10Cache[0] = Double.NEGATIVE_INFINITY;
|
||||
log10FactorialCache[0] = 0.0;
|
||||
for (int k = 1; k < LOG10_CACHE_SIZE; k++) {
|
||||
log10Cache[k] = Math.log10(k);
|
||||
log10FactorialCache[k] = log10FactorialCache[k-1] + log10Cache[k];
|
||||
/**
|
||||
* A helper class to maintain a cache of log10 values
|
||||
*/
|
||||
public static class Log10Cache {
|
||||
/**
|
||||
* Get the value of log10(n), expanding the cache as necessary
|
||||
* @param n operand
|
||||
* @return log10(n)
|
||||
*/
|
||||
public static double get(final int n) {
|
||||
if (n < 0)
|
||||
throw new ReviewedGATKException(String.format("Can't take the log of a negative number: %d", n));
|
||||
if (n >= cache.length)
|
||||
ensureCacheContains(Math.max(n+10, 2*cache.length));
|
||||
/*
|
||||
Array lookups are not atomic. It's possible that the reference to cache could be
|
||||
changed between the time the reference is loaded and the data is fetched from the correct
|
||||
offset. However, the value retrieved can't change, and it's guaranteed to be present in the
|
||||
old reference by the conditional above.
|
||||
*/
|
||||
return cache[n];
|
||||
}
|
||||
|
||||
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
||||
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
|
||||
|
||||
/**
|
||||
* Ensures that the cache contains a value for n. After completion of ensureCacheContains(n),
|
||||
* #get(n) is guaranteed to return without causing a cache expansion
|
||||
* @param n desired value to be precomputed
|
||||
*/
|
||||
public static synchronized void ensureCacheContains(final int n) {
|
||||
if (n < cache.length)
|
||||
return;
|
||||
final double[] newCache = new double[n + 1];
|
||||
System.arraycopy(cache, 0, newCache, 0, cache.length);
|
||||
for (int i=cache.length; i < newCache.length; i++)
|
||||
newCache[i] = Math.log10(i);
|
||||
cache = newCache;
|
||||
}
|
||||
|
||||
//initialize with the special case: log10(0) = NEGATIVE_INFINITY
|
||||
private static double[] cache = new double[] { Double.NEGATIVE_INFINITY };
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -98,6 +112,34 @@ public class MathUtils {
|
|||
return GenomeAnalysisEngine.getRandomGenerator().nextInt(max - min + 1) + min;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encapsulates the second term of Jacobian log identity for differences up to MAX_TOLERANCE
|
||||
*/
|
||||
private static class JacobianLogTable {
|
||||
|
||||
public static final double MAX_TOLERANCE = 8.0;
|
||||
|
||||
public static double get(final double difference) {
|
||||
if (cache == null)
|
||||
initialize();
|
||||
final int index = fastRound(difference * INV_STEP);
|
||||
return cache[index];
|
||||
}
|
||||
|
||||
private static synchronized void initialize() {
|
||||
if (cache == null) {
|
||||
final int tableSize = (int) (MAX_TOLERANCE / TABLE_STEP) + 1;
|
||||
cache = new double[tableSize];
|
||||
for (int k = 0; k < cache.length; k++)
|
||||
cache[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * TABLE_STEP));
|
||||
}
|
||||
}
|
||||
|
||||
private static final double TABLE_STEP = 0.0001;
|
||||
private static final double INV_STEP = 1.0 / TABLE_STEP;
|
||||
private static double[] cache = null;
|
||||
}
|
||||
|
||||
// A fast implementation of the Math.round() method. This method does not perform
|
||||
// under/overflow checking, so this shouldn't be used in the general case (but is fine
|
||||
// if one is already make those checks before calling in to the rounding).
|
||||
|
|
@ -119,10 +161,9 @@ public class MathUtils {
|
|||
continue;
|
||||
|
||||
final double diff = approxSum - vals[i];
|
||||
if (diff < MathUtils.MAX_JACOBIAN_TOLERANCE) {
|
||||
if (diff < JacobianLogTable.MAX_TOLERANCE) {
|
||||
// See notes from the 2-inout implementation below
|
||||
final int ind = fastRound(diff * MathUtils.JACOBIAN_LOG_TABLE_INV_STEP); // hard rounding
|
||||
approxSum += MathUtils.jacobianLogTable[ind];
|
||||
approxSum += JacobianLogTable.get(diff);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -145,7 +186,7 @@ public class MathUtils {
|
|||
return big;
|
||||
|
||||
final double diff = big - small;
|
||||
if (diff >= MathUtils.MAX_JACOBIAN_TOLERANCE)
|
||||
if (diff >= JacobianLogTable.MAX_TOLERANCE)
|
||||
return big;
|
||||
|
||||
// OK, so |y-x| < tol: we use the following identity then:
|
||||
|
|
@ -154,8 +195,7 @@ public class MathUtils {
|
|||
// max(x,y) + log10(1+10^-abs(x-y))
|
||||
// we compute the second term as a table lookup with integer quantization
|
||||
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||
final int ind = fastRound(diff * MathUtils.JACOBIAN_LOG_TABLE_INV_STEP); // hard rounding
|
||||
return big + MathUtils.jacobianLogTable[ind];
|
||||
return big + JacobianLogTable.get(diff);
|
||||
}
|
||||
|
||||
public static double sum(final double[] values) {
|
||||
|
|
@ -1408,10 +1448,41 @@ public class MathUtils {
|
|||
}
|
||||
|
||||
public static double log10Factorial(final int x) {
|
||||
if (x >= log10FactorialCache.length || x < 0)
|
||||
if (x >= Log10FactorialCache.size() || x < 0)
|
||||
return log10Gamma(x + 1);
|
||||
else
|
||||
return log10FactorialCache[x];
|
||||
return Log10FactorialCache.get(x);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper class so that the log10Factorial array is only calculated if it's used
|
||||
*/
|
||||
private static class Log10FactorialCache {
|
||||
|
||||
/**
|
||||
* The size of the precomputed cache. Must be a positive number!
|
||||
*/
|
||||
private static final int CACHE_SIZE = 10_000;
|
||||
|
||||
public static int size() { return CACHE_SIZE; }
|
||||
|
||||
public static double get(final int n) {
|
||||
if (cache == null)
|
||||
initialize();
|
||||
return cache[n];
|
||||
}
|
||||
|
||||
private static synchronized void initialize() {
|
||||
if (cache == null) {
|
||||
Log10Cache.ensureCacheContains(CACHE_SIZE);
|
||||
cache = new double[CACHE_SIZE];
|
||||
cache[0] = 0.0;
|
||||
for (int k = 1; k < cache.length; k++)
|
||||
cache[k] = cache[k-1] + Log10Cache.get(k);
|
||||
}
|
||||
}
|
||||
|
||||
private static double[] cache = null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue