More efficient implementation of the sum of the allele frequency posteriors matrix using a pre-allocated cache as discussed in group meeting last week. Now, when the cache is filled, we safely collapse down to a single value in real space and put the un-re-centered log10 value back into the front of the cache. Thanks to all for the help and advice.
This commit is contained in:
parent
87e6bea6c1
commit
6ddf2170b6
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
|
|
@ -42,12 +41,13 @@ public class AlleleFrequencyCalculationResult {
|
|||
// These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles
|
||||
private double log10MLE;
|
||||
private double log10MAP;
|
||||
final private int[] alleleCountsOfMLE;
|
||||
final private int[] alleleCountsOfMAP;
|
||||
private final int[] alleleCountsOfMLE;
|
||||
private final int[] alleleCountsOfMAP;
|
||||
|
||||
// The posteriors seen, not including that of AF=0
|
||||
// TODO -- better implementation needed here (see below)
|
||||
private ArrayList<Double> log10PosteriorMatrixValues = new ArrayList<Double>(100000);
|
||||
private static final int POSTERIORS_CACHE_SIZE = 5000;
|
||||
private final double[] log10PosteriorMatrixValues = new double[POSTERIORS_CACHE_SIZE];
|
||||
private int currentPosteriorsCacheIndex = 0;
|
||||
private Double log10PosteriorMatrixSum = null;
|
||||
|
||||
// These variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
|
||||
|
|
@ -69,14 +69,9 @@ public class AlleleFrequencyCalculationResult {
|
|||
return log10MAP;
|
||||
}
|
||||
|
||||
public double getLog10PosteriorMatrixSum() {
|
||||
public double getLog10PosteriorsMatrixSumWithoutAFzero() {
|
||||
if ( log10PosteriorMatrixSum == null ) {
|
||||
// TODO -- we absolutely need a better implementation here as we don't want to store all values from the matrix in memory;
|
||||
// TODO -- will discuss with the team what the best option is
|
||||
final double[] tmp = new double[log10PosteriorMatrixValues.size()];
|
||||
for ( int i = 0; i < tmp.length; i++ )
|
||||
tmp[i] = log10PosteriorMatrixValues.get(i);
|
||||
log10PosteriorMatrixSum = MathUtils.log10sumLog10(tmp);
|
||||
log10PosteriorMatrixSum = MathUtils.log10sumLog10(log10PosteriorMatrixValues, 0, currentPosteriorsCacheIndex);
|
||||
}
|
||||
return log10PosteriorMatrixSum;
|
||||
}
|
||||
|
|
@ -103,7 +98,7 @@ public class AlleleFrequencyCalculationResult {
|
|||
alleleCountsOfMLE[i] = 0;
|
||||
alleleCountsOfMAP[i] = 0;
|
||||
}
|
||||
log10PosteriorMatrixValues.clear();
|
||||
currentPosteriorsCacheIndex = 0;
|
||||
log10PosteriorMatrixSum = null;
|
||||
}
|
||||
|
||||
|
|
@ -116,7 +111,8 @@ public class AlleleFrequencyCalculationResult {
|
|||
}
|
||||
|
||||
public void updateMAPifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
||||
log10PosteriorMatrixValues.add(log10LofK);
|
||||
addToPosteriorsCache(log10LofK);
|
||||
|
||||
if ( log10LofK > log10MAP ) {
|
||||
log10MAP = log10LofK;
|
||||
for ( int i = 0; i < alleleCountsForK.length; i++ )
|
||||
|
|
@ -124,6 +120,18 @@ public class AlleleFrequencyCalculationResult {
|
|||
}
|
||||
}
|
||||
|
||||
private void addToPosteriorsCache(final double log10LofK) {
|
||||
// add to the cache
|
||||
log10PosteriorMatrixValues[currentPosteriorsCacheIndex++] = log10LofK;
|
||||
|
||||
// if we've filled up the cache, then condense by summing up all of the values and placing the sum back into the first cell
|
||||
if ( currentPosteriorsCacheIndex == POSTERIORS_CACHE_SIZE ) {
|
||||
final double temporarySum = MathUtils.log10sumLog10(log10PosteriorMatrixValues, 0, currentPosteriorsCacheIndex);
|
||||
log10PosteriorMatrixValues[0] = temporarySum;
|
||||
currentPosteriorsCacheIndex = 1;
|
||||
}
|
||||
}
|
||||
|
||||
public void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) {
|
||||
this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero;
|
||||
if ( log10LikelihoodOfAFzero > log10MLE ) {
|
||||
|
|
|
|||
|
|
@ -326,7 +326,7 @@ public class UnifiedGenotyperEngine {
|
|||
} else {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
|
||||
if ( Double.isInfinite(phredScaledConfidence) ) {
|
||||
final double sum = AFresult.getLog10PosteriorMatrixSum();
|
||||
final double sum = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
|
||||
}
|
||||
}
|
||||
|
|
@ -369,7 +369,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// the overall lod
|
||||
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
|
||||
double overallLog10PofF = AFresult.getLog10PosteriorMatrixSum();
|
||||
double overallLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
List<Allele> alternateAllelesToUse = builder.make().getAlternateAlleles();
|
||||
|
|
@ -380,7 +380,7 @@ public class UnifiedGenotyperEngine {
|
|||
afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double forwardLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
|
||||
double forwardLog10PofF = AFresult.getLog10PosteriorMatrixSum();
|
||||
double forwardLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
|
|
@ -389,7 +389,7 @@ public class UnifiedGenotyperEngine {
|
|||
afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double reverseLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
|
||||
double reverseLog10PofF = AFresult.getLog10PosteriorMatrixSum();
|
||||
double reverseLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||
|
||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||
|
|
@ -424,7 +424,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
public static double[] generateNormalizedPosteriors(final AlleleFrequencyCalculationResult AFresult, final double[] normalizedPosteriors) {
|
||||
normalizedPosteriors[0] = AFresult.getLog10PosteriorOfAFzero();
|
||||
normalizedPosteriors[1] = AFresult.getLog10PosteriorMatrixSum();
|
||||
normalizedPosteriors[1] = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
return MathUtils.normalizeFromLog10(normalizedPosteriors);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -237,7 +237,7 @@ public class MathUtils {
|
|||
public static double log10sumLog10(double[] log10p, int start, int finish) {
|
||||
double sum = 0.0;
|
||||
|
||||
double maxValue = Utils.findMaxEntry(log10p);
|
||||
double maxValue = arrayMax(log10p, finish);
|
||||
if(maxValue == Double.NEGATIVE_INFINITY)
|
||||
return sum;
|
||||
|
||||
|
|
@ -554,7 +554,7 @@ public class MathUtils {
|
|||
|
||||
// for precision purposes, we need to add (or really subtract, since they're
|
||||
// all negative) the largest value; also, we need to convert to normal-space.
|
||||
double maxValue = Utils.findMaxEntry(array);
|
||||
double maxValue = arrayMax(array);
|
||||
|
||||
// we may decide to just normalize in log space without converting to linear space
|
||||
if (keepInLogSpace) {
|
||||
|
|
@ -627,10 +627,14 @@ public class MathUtils {
|
|||
return maxI;
|
||||
}
|
||||
|
||||
public static double arrayMax(double[] array) {
|
||||
public static double arrayMax(final double[] array) {
|
||||
return array[maxElementIndex(array)];
|
||||
}
|
||||
|
||||
public static double arrayMax(final double[] array, final int endIndex) {
|
||||
return array[maxElementIndex(array, endIndex)];
|
||||
}
|
||||
|
||||
public static double arrayMin(double[] array) {
|
||||
return array[minElementIndex(array)];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -290,32 +290,6 @@ public class Utils {
|
|||
return m;
|
||||
}
|
||||
|
||||
|
||||
// returns the maximum value in the array
|
||||
public static double findMaxEntry(double[] array) {
|
||||
return findIndexAndMaxEntry(array).first;
|
||||
}
|
||||
|
||||
// returns the index of the maximum value in the array
|
||||
public static int findIndexOfMaxEntry(double[] array) {
|
||||
return findIndexAndMaxEntry(array).second;
|
||||
}
|
||||
|
||||
// returns the the maximum value and its index in the array
|
||||
private static Pair<Double, Integer> findIndexAndMaxEntry(double[] array) {
|
||||
if ( array.length == 0 )
|
||||
return new Pair<Double, Integer>(0.0, -1);
|
||||
int index = 0;
|
||||
double max = array[0];
|
||||
for (int i = 1; i < array.length; i++) {
|
||||
if ( array[i] > max ) {
|
||||
max = array[i];
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
return new Pair<Double, Integer>(max, index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits expressions in command args by spaces and returns the array of expressions.
|
||||
* Expressions may use single or double quotes to group any individual expression, but not both.
|
||||
|
|
|
|||
Loading…
Reference in New Issue