diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java index 0867d949e..c93e780bf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.MathUtils; -import java.util.ArrayList; import java.util.Arrays; /** @@ -42,12 +41,13 @@ public class AlleleFrequencyCalculationResult { // These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles private double log10MLE; private double log10MAP; - final private int[] alleleCountsOfMLE; - final private int[] alleleCountsOfMAP; + private final int[] alleleCountsOfMLE; + private final int[] alleleCountsOfMAP; // The posteriors seen, not including that of AF=0 - // TODO -- better implementation needed here (see below) - private ArrayList log10PosteriorMatrixValues = new ArrayList(100000); + private static final int POSTERIORS_CACHE_SIZE = 5000; + private final double[] log10PosteriorMatrixValues = new double[POSTERIORS_CACHE_SIZE]; + private int currentPosteriorsCacheIndex = 0; private Double log10PosteriorMatrixSum = null; // These variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles) @@ -69,14 +69,9 @@ public class AlleleFrequencyCalculationResult { return log10MAP; } - public double getLog10PosteriorMatrixSum() { + public double getLog10PosteriorsMatrixSumWithoutAFzero() { if ( log10PosteriorMatrixSum == null ) { - // TODO -- we absolutely need a better implementation here as we don't want to store all values from the matrix in memory; - // TODO -- will discuss with the team what the best option is - final double[] tmp = new double[log10PosteriorMatrixValues.size()]; - for ( int i = 0; i < tmp.length; i++ ) - tmp[i] = log10PosteriorMatrixValues.get(i); - log10PosteriorMatrixSum = MathUtils.log10sumLog10(tmp); + log10PosteriorMatrixSum = MathUtils.log10sumLog10(log10PosteriorMatrixValues, 0, currentPosteriorsCacheIndex); } return log10PosteriorMatrixSum; } @@ -103,7 +98,7 @@ public class AlleleFrequencyCalculationResult { alleleCountsOfMLE[i] = 0; alleleCountsOfMAP[i] = 0; } - log10PosteriorMatrixValues.clear(); + currentPosteriorsCacheIndex = 0; log10PosteriorMatrixSum = null; } @@ -116,7 +111,8 @@ public class AlleleFrequencyCalculationResult { } public void updateMAPifNeeded(final double log10LofK, final int[] alleleCountsForK) { - log10PosteriorMatrixValues.add(log10LofK); + addToPosteriorsCache(log10LofK); + if ( log10LofK > log10MAP ) { log10MAP = log10LofK; for ( int i = 0; i < alleleCountsForK.length; i++ ) @@ -124,6 +120,18 @@ public class AlleleFrequencyCalculationResult { } } + private void addToPosteriorsCache(final double log10LofK) { + // add to the cache + log10PosteriorMatrixValues[currentPosteriorsCacheIndex++] = log10LofK; + + // if we've filled up the cache, then condense by summing up all of the values and placing the sum back into the first cell + if ( currentPosteriorsCacheIndex == POSTERIORS_CACHE_SIZE ) { + final double temporarySum = MathUtils.log10sumLog10(log10PosteriorMatrixValues, 0, currentPosteriorsCacheIndex); + log10PosteriorMatrixValues[0] = temporarySum; + currentPosteriorsCacheIndex = 1; + } + } + public void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) { this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero; if ( log10LikelihoodOfAFzero > log10MLE ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index d4206e8ef..5d926a865 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -326,7 +326,7 @@ public class UnifiedGenotyperEngine { } else { phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); if ( Double.isInfinite(phredScaledConfidence) ) { - final double sum = AFresult.getLog10PosteriorMatrixSum(); + final double sum = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); } } @@ -369,7 +369,7 @@ public class UnifiedGenotyperEngine { // the overall lod //double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0]; - double overallLog10PofF = AFresult.getLog10PosteriorMatrixSum(); + double overallLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); List alternateAllelesToUse = builder.make().getAlternateAlleles(); @@ -380,7 +380,7 @@ public class UnifiedGenotyperEngine { afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); double forwardLog10PofNull = AFresult.getLog10PosteriorOfAFzero(); - double forwardLog10PofF = AFresult.getLog10PosteriorMatrixSum(); + double forwardLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); // the reverse lod @@ -389,7 +389,7 @@ public class UnifiedGenotyperEngine { afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); double reverseLog10PofNull = AFresult.getLog10PosteriorOfAFzero(); - double reverseLog10PofF = AFresult.getLog10PosteriorMatrixSum(); + double reverseLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; @@ -424,7 +424,7 @@ public class UnifiedGenotyperEngine { public static double[] generateNormalizedPosteriors(final AlleleFrequencyCalculationResult AFresult, final double[] normalizedPosteriors) { normalizedPosteriors[0] = AFresult.getLog10PosteriorOfAFzero(); - normalizedPosteriors[1] = AFresult.getLog10PosteriorMatrixSum(); + normalizedPosteriors[1] = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); return MathUtils.normalizeFromLog10(normalizedPosteriors); } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index c4b0165ca..5e3160452 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -237,7 +237,7 @@ public class MathUtils { public static double log10sumLog10(double[] log10p, int start, int finish) { double sum = 0.0; - double maxValue = Utils.findMaxEntry(log10p); + double maxValue = arrayMax(log10p, finish); if(maxValue == Double.NEGATIVE_INFINITY) return sum; @@ -554,7 +554,7 @@ public class MathUtils { // for precision purposes, we need to add (or really subtract, since they're // all negative) the largest value; also, we need to convert to normal-space. - double maxValue = Utils.findMaxEntry(array); + double maxValue = arrayMax(array); // we may decide to just normalize in log space without converting to linear space if (keepInLogSpace) { @@ -627,10 +627,14 @@ public class MathUtils { return maxI; } - public static double arrayMax(double[] array) { + public static double arrayMax(final double[] array) { return array[maxElementIndex(array)]; } + public static double arrayMax(final double[] array, final int endIndex) { + return array[maxElementIndex(array, endIndex)]; + } + public static double arrayMin(double[] array) { return array[minElementIndex(array)]; } diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 4817966fe..c2c608903 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -290,32 +290,6 @@ public class Utils { return m; } - - // returns the maximum value in the array - public static double findMaxEntry(double[] array) { - return findIndexAndMaxEntry(array).first; - } - - // returns the index of the maximum value in the array - public static int findIndexOfMaxEntry(double[] array) { - return findIndexAndMaxEntry(array).second; - } - - // returns the the maximum value and its index in the array - private static Pair findIndexAndMaxEntry(double[] array) { - if ( array.length == 0 ) - return new Pair(0.0, -1); - int index = 0; - double max = array[0]; - for (int i = 1; i < array.length; i++) { - if ( array[i] > max ) { - max = array[i]; - index = i; - } - } - return new Pair(max, index); - } - /** * Splits expressions in command args by spaces and returns the array of expressions. * Expressions may use single or double quotes to group any individual expression, but not both.