diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java index 85f80d5be..ce5bb349c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java @@ -123,7 +123,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { for ( final int nSamples : Arrays.asList(1, 2, 3, 4) ) { final ExactAFCalc diploidCalc = new ReferenceDiploidExactAFCalc(nSamples, 4); - final ExactAFCalc optDiploidCalc = new ConstrainedDiploidExactAFCalc(nSamples, 4); +// final ExactAFCalc optDiploidCalc = new ConstrainedDiploidExactAFCalc(nSamples, 4); final ExactAFCalc generalCalc = new GeneralPloidyExactAFCalc(nSamples, 4, 2); final ExactAFCalc indCalc = new IndependentAllelesDiploidExactAFCalc(nSamples, 4); @@ -133,7 +133,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001); for ( final double[] priors : Arrays.asList(flatPriors, humanPriors) ) { // , humanPriors) ) { - for ( ExactAFCalc model : Arrays.asList(diploidCalc, generalCalc, optDiploidCalc, indCalc) ) { + for ( ExactAFCalc model : Arrays.asList(diploidCalc, generalCalc, indCalc) ) { final String priorName = priors == humanPriors ? "human" : "flat"; // bi-allelic @@ -181,13 +181,13 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final int nSamples = samples.size(); final ExactAFCalc diploidCalc = new ReferenceDiploidExactAFCalc(nSamples, 4); - final ExactAFCalc optDiploidCalc = new ConstrainedDiploidExactAFCalc(nSamples, 4); +// final ExactAFCalc optDiploidCalc = new ConstrainedDiploidExactAFCalc(nSamples, 4); final ExactAFCalc generalCalc = new GeneralPloidyExactAFCalc(nSamples, 4, 2); final ExactAFCalc indCalc = new IndependentAllelesDiploidExactAFCalc(nSamples, 4); final double[] priors = MathUtils.normalizeFromLog10(new double[2*nSamples+1], true); // flat priors - for ( ExactAFCalc model : Arrays.asList(diploidCalc, optDiploidCalc, generalCalc, indCalc) ) { + for ( ExactAFCalc model : Arrays.asList(diploidCalc, generalCalc, indCalc) ) { final GetGLsTest onlyInformative = new GetGLsTest(model, testData.nAltAlleles, testData.called, priors, "flat"); for ( int rotation = 0; rotation < nSamples; rotation++ ) { @@ -213,14 +213,14 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final AFCalcResult actual = withNonInformative.execute(); testResultSimple(withNonInformative); - compareAFCalcResults(actual, expected); + compareAFCalcResults(actual, expected, onlyInformative.getCalc()); } private void testResultSimple(final GetGLsTest cfg) { final AFCalcResult refResultTracker = cfg.executeRef(); final AFCalcResult resultTracker = cfg.execute(); - compareAFCalcResults(resultTracker, refResultTracker); + compareAFCalcResults(resultTracker, refResultTracker, cfg.getCalc()); // final int minNumberOfEvaluations = cfg.getVC().getCalledChrCount(); // Assert.assertTrue(result.getnEvaluations() >= minNumberOfEvaluations, @@ -247,7 +247,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { // } } - private void compareAFCalcResults(final AFCalcResult actual, final AFCalcResult expected) { + private void compareAFCalcResults(final AFCalcResult actual, final AFCalcResult expected, final ExactAFCalc calc) { final double TOLERANCE = 1; // TODO -- tighten up tolerances Assert.assertEquals(actual.getLog10PriorOfAFEq0(), expected.getLog10PriorOfAFEq0(), TOLERANCE); @@ -258,6 +258,15 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { Assert.assertEquals(actual.getLog10PosteriorOfAFGT0(), expected.getLog10PosteriorOfAFGT0(), TOLERANCE); Assert.assertEquals(actual.getAlleleCountsOfMLE(), expected.getAlleleCountsOfMLE()); Assert.assertEquals(actual.getAllelesUsedInGenotyping(), expected.getAllelesUsedInGenotyping()); + + for ( final Allele a : expected.getAllelesUsedInGenotyping() ) { + if ( ! a.isReference() ) { + Assert.assertEquals(actual.getAlleleCountAtMLE(a), expected.getAlleleCountAtMLE(a)); + if ( ! ( calc instanceof GeneralPloidyExactAFCalc ) ) + // TODO -- delete when general ploidy works properly with multi-allelics + Assert.assertEquals(actual.isPolymorphic(a, 0.0), expected.isPolymorphic(a, 0.0)); + } + } } @Test(enabled = true, dataProvider = "Models") @@ -429,7 +438,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior)); if ( nonRefPost < 0.1 ) - Assert.assertTrue(resultTracker.isPolymorphic(-1)); + Assert.assertTrue(resultTracker.isPolymorphic(C, -1)); final int expectedMLEAC = 1; // the MLE is independent of the prior Assert.assertEquals(actualAC, expectedMLEAC, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 8f1473121..bfdecfa68 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -374,27 +374,23 @@ public class UnifiedGenotyperEngine { myAlleles.add(vc.getReference()); for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { final Allele alternateAllele = vc.getAlternateAllele(i); - final int indexOfAllele = AFresult.getAllelesUsedInGenotyping().indexOf(alternateAllele); - // the genotyping model may have stripped it out - if ( indexOfAllele == -1 ) - continue; // we are non-ref if the probability of being non-ref > the emit confidence. // the emit confidence is phred-scaled, say 30 => 10^-3. // the posterior AF > 0 is log10: -5 => 10^-5 // we are non-ref if 10^-5 < 10^-3 => -5 < -3 - final boolean isNonRef = AFresult.isPolymorphic(UAC.STANDARD_CONFIDENCE_FOR_EMITTING / -10.0); + final boolean isNonRef = AFresult.isPolymorphic(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING / -10.0); // if the most likely AC is not 0, then this is a good alternate allele to use if ( ! isNonRef ) { myAlleles.add(alternateAllele); - alleleCountsofMLE.add(AFresult.getAlleleCountsOfMLE()[indexOfAllele-1]); + alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele)); bestGuessIsRef = false; } // if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { myAlleles.add(alternateAllele); - alleleCountsofMLE.add(AFresult.getAlleleCountsOfMLE()[indexOfAllele-1]); + alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele)); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index bf15e2039..787ca8372 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -32,7 +32,9 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; /** * Describes the results of the AFCalc @@ -52,6 +54,8 @@ public class AFCalcResult { private final double[] log10PriorsOfAC; private final double[] log10PosteriorsOfAC; + private final Map log10pNonRefByAllele; + /** * The AC values for all ALT alleles at the MLE */ @@ -71,13 +75,17 @@ public class AFCalcResult { final int nEvaluations, final List allelesUsedInGenotyping, final double[] log10LikelihoodsOfAC, - final double[] log10PriorsOfAC) { + final double[] log10PriorsOfAC, + final Map log10pNonRefByAllele) { if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.size() < 1 ) throw new IllegalArgumentException("allelesUsedInGenotyping must be non-null list of at least 1 value " + allelesUsedInGenotyping); if ( alleleCountsOfMLE == null ) throw new IllegalArgumentException("alleleCountsOfMLE cannot be null"); - if ( alleleCountsOfMLE.length != allelesUsedInGenotyping.size() ) throw new IllegalArgumentException("alleleCountsOfMLE.length " + alleleCountsOfMLE.length + " != allelesUsedInGenotyping.size() " + allelesUsedInGenotyping.size()); + if ( alleleCountsOfMLE.length != allelesUsedInGenotyping.size() - 1) throw new IllegalArgumentException("alleleCountsOfMLE.length " + alleleCountsOfMLE.length + " != allelesUsedInGenotyping.size() " + allelesUsedInGenotyping.size()); if ( nEvaluations < 0 ) throw new IllegalArgumentException("nEvaluations must be >= 0 but saw " + nEvaluations); if ( log10LikelihoodsOfAC.length != 2 ) throw new IllegalArgumentException("log10LikelihoodsOfAC must have length equal 2"); if ( log10PriorsOfAC.length != 2 ) throw new IllegalArgumentException("log10PriorsOfAC must have length equal 2"); + if ( log10pNonRefByAllele == null ) throw new IllegalArgumentException("log10pNonRefByAllele cannot be null"); + if ( log10pNonRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pNonRefByAllele has the wrong number of elements: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping); + if ( ! allelesUsedInGenotyping.containsAll(log10pNonRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pNonRefByAllele doesn't contain all of the alleles used in genotyping: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping); if ( ! goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC)); if ( ! goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC)); @@ -88,6 +96,7 @@ public class AFCalcResult { this.log10LikelihoodsOfAC = Arrays.copyOf(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES); this.log10PriorsOfAC = Arrays.copyOf(log10PriorsOfAC, LOG_10_ARRAY_SIZES); this.log10PosteriorsOfAC = computePosteriors(log10LikelihoodsOfAC, log10PriorsOfAC); + this.log10pNonRefByAllele = new HashMap(log10pNonRefByAllele); } /** @@ -105,6 +114,17 @@ public class AFCalcResult { return alleleCountsOfMLE; } + /** + * Returns the AC of allele a la #getAlleleCountsOfMLE + * + * @param allele the allele whose AC we want to know. Error if its not in allelesUsedInGenotyping + * @throws IllegalStateException if allele isn't in allelesUsedInGenotyping + * @return the AC of allele + */ + public int getAlleleCountAtMLE(final Allele allele) { + return getAlleleCountsOfMLE()[altAlleleIndex(allele)]; + } + /** * Returns the number of cycles used to evaluate the pNonRef for this AF calculation * @@ -124,58 +144,55 @@ public class AFCalcResult { */ @Ensures({"result != null", "! result.isEmpty()"}) public List getAllelesUsedInGenotyping() { - if ( allelesUsedInGenotyping == null ) - throw new IllegalStateException("allelesUsedInGenotyping requested but not yet set"); - return allelesUsedInGenotyping; } /** - * Get the log10 normalized -- across all ACs -- posterior probability of AC == 0 + * Get the log10 normalized -- across all ACs -- posterior probability of AC == 0 for all alleles * * @return */ - @Ensures({"goodLog10Value(result)"}) + @Ensures({"goodLog10Probability(result)"}) public double getLog10PosteriorOfAFEq0() { return log10PosteriorsOfAC[AF0]; } /** - * Get the log10 normalized -- across all ACs -- posterior probability of AC > 0 + * Get the log10 normalized -- across all ACs -- posterior probability of AC > 0 for any alleles * * @return */ - @Ensures({"goodLog10Value(result)"}) + @Ensures({"goodLog10Probability(result)"}) public double getLog10PosteriorOfAFGT0() { return log10PosteriorsOfAC[AF1p]; } /** - * Get the log10 unnormalized -- across all ACs -- likelihood of AC == 0 + * Get the log10 unnormalized -- across all ACs -- likelihood of AC == 0 for all alleles * * @return */ - @Ensures({"goodLog10Value(result)"}) + @Ensures({"goodLog10Probability(result)"}) public double getLog10LikelihoodOfAFEq0() { return log10LikelihoodsOfAC[AF0]; } /** - * Get the log10 unnormalized -- across all ACs -- likelihood of AC > 0 + * Get the log10 unnormalized -- across all ACs -- likelihood of AC > 0 for any alleles * * @return */ - @Ensures({"goodLog10Value(result)"}) + @Ensures({"goodLog10Probability(result)"}) public double getLog10LikelihoodOfAFGT0() { return log10LikelihoodsOfAC[AF1p]; } /** - * Get the log10 unnormalized -- across all ACs -- prior probability of AC == 0 + * Get the log10 unnormalized -- across all ACs -- prior probability of AC == 0 for all alleles * * @return */ - @Ensures({"goodLog10Value(result)"}) + @Ensures({"goodLog10Probability(result)"}) public double getLog10PriorOfAFEq0() { return log10PriorsOfAC[AF0]; } @@ -185,7 +202,7 @@ public class AFCalcResult { * * @return */ - @Ensures({"goodLog10Value(result)"}) + @Ensures({"goodLog10Probability(result)"}) public double getLog10PriorOfAFGT0() { return log10PriorsOfAC[AF1p]; } @@ -202,8 +219,27 @@ public class AFCalcResult { * * @return true if there's enough confidence (relative to log10minPNonRef) to reject AF == 0 */ - public boolean isPolymorphic(final double log10minPNonRef) { - return getLog10PosteriorOfAFGT0() < log10minPNonRef; + public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) { + return getLog10PosteriorOfAFGt0ForAllele(allele) < log10minPNonRef; + } + + /** + * Returns the log10 probability that allele is segregating + * + * Unlike the sites-level annotation, this calculation is specific to allele, and can be + * used to separately determine how much evidence there is that allele is independently + * segregating as opposed to the site being polymorphic with any allele. In the bi-allelic + * case these are obviously the same but for multiple alt alleles there can be lots of + * evidence for one allele but not so much for any other allele + * + * @param allele the allele we're interested in, must be in getAllelesUsedInGenotyping + * @return the log10 probability that allele is segregating at this site + */ + @Ensures("goodLog10Probability(result)") + public double getLog10PosteriorOfAFGt0ForAllele(final Allele allele) { + final Double log10pNonRef = log10pNonRefByAllele.get(allele); + if ( log10pNonRef == null ) throw new IllegalArgumentException("Unknown allele " + allele); + return log10pNonRef; } /** @@ -237,8 +273,8 @@ public class AFCalcResult { if ( vector.length != expectedSize ) return false; for ( final double pr : vector ) { - if ( pr > 0.0 ) return false; // log10 prob. vector should be < 0 - if ( Double.isInfinite(pr) || Double.isNaN(pr) ) return false; + if ( ! goodLog10Probability(pr) ) + return false; } if ( shouldSumToOne && MathUtils.compareDoubles(MathUtils.sumLog10(vector), 1.0, 1e-2) != 0 ) @@ -247,7 +283,35 @@ public class AFCalcResult { return true; // everything is good } - private static boolean goodLog10Value(final double result) { + /** + * Computes the offset into linear vectors indexed by alt allele for allele + * + * Things like our MLE allele count vector are indexed by alt allele index, with + * the first alt allele being 0, the second 1, etc. This function computes the index + * associated with allele. + * + * @param allele the allele whose alt index we'd like to know + * @throws IllegalArgumentException if allele isn't in allelesUsedInGenotyping + * @return an index value greater than 0 suitable for indexing into the MLE and other alt allele indexed arrays + */ + @Requires("allele != null") + @Ensures({"result >= 0", "result < allelesUsedInGenotyping.size() - 1"}) + private int altAlleleIndex(final Allele allele) { + if ( allele.isReference() ) throw new IllegalArgumentException("Cannot get the alt allele index for reference allele " + allele); + final int index = allelesUsedInGenotyping.indexOf(allele); + if ( index == -1 ) + throw new IllegalArgumentException("could not find allele " + allele + " in " + allelesUsedInGenotyping); + else + return index - 1; + } + + /** + * Checks that the result is a well-formed log10 probability + * + * @param result a supposedly well-formed log10 probability value + * @return true if result is really well formed + */ + private static boolean goodLog10Probability(final double result) { return result <= 0.0 && ! Double.isInfinite(result) && ! Double.isNaN(result); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java index d66d0b1d7..d1846b881 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java @@ -30,7 +30,9 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; /** * Created by IntelliJ IDEA. @@ -80,26 +82,6 @@ class AFCalcResultTracker { reset(); } - /** - * Get the log10 value of the probability mass at the MLE - * - * @return a log10 prob - */ - @Ensures("goodLog10Value(result)") - public double getLog10MLE() { - return log10MLE; - } - - /** - * Get the log10 value of the probability mass at the max. a posterior (MAP) - * - * @return a log10 prob - */ - @Ensures("goodLog10Value(result)") - public double getLog10MAP() { - return log10MAP; - } - /** * Returns a vector with maxAltAlleles values containing AC values at the MLE * @@ -127,15 +109,6 @@ class AFCalcResultTracker { return alleleCountsOfMAP; } - /** - * Returns the number of cycles used to evaluate the pNonRef for this AF calculation - * - * @return the number of evaluations required to produce the answer for this AF calculation - */ - public int getnEvaluations() { - return nEvaluations; - } - /** * TODO -- eric what is this supposed to return? my unit tests don't do what I think they should * @@ -170,60 +143,21 @@ class AFCalcResultTracker { return log10PosteriorOfAFzero; } - /** - * Get the list of alleles actually used in genotyping. - * - * Due to computational / implementation constraints this may be smaller than - * the actual list of alleles requested - * - * @return a non-empty list of alleles used during genotyping - */ - @Ensures({"result != null", "! result.isEmpty()"}) - public List getAllelesUsedInGenotyping() { - if ( allelesUsedInGenotyping == null ) - throw new IllegalStateException("allelesUsedInGenotyping requested but not yet set"); - - return allelesUsedInGenotyping; - } - - /** - * Get the normalized -- across all AFs -- of AC == 0, NOT LOG10 - * @return - */ - // TODO -- this ensure cannot be enabled right now because the log10 inputs can be infinity, etc. - // TODO -- we should own these values in a more meaningful way and return good values in the case - // TODO -- where this happens, or instead thrown an error and have a function to say "was this calculation successful -// @Ensures({"result >= 0.0", "result <= 1.0"}) - public double getNormalizedPosteriorOfAFzero() { - return getNormalizedPosteriors()[0]; - } - - /** - * Get the normalized -- across all AFs -- of AC > 0, NOT LOG10 - * @return - */ - // TODO -- this ensure cannot be enabled right now because the log10 inputs can be infinity, etc. - // TODO -- we should own these values in a more meaningful way and return good values in the case - // TODO -- where this happens, or instead thrown an error and have a function to say "was this calculation successful - //@Ensures({"result >= 0.0", "result <= 1.0"}) - public double getNormalizedPosteriorOfAFGTZero() { - return getNormalizedPosteriors()[1]; - } - - private double[] getNormalizedPosteriors() { - final double[] posteriors = new double[]{ getLog10PosteriorOfAFzero(), getLog10PosteriorsMatrixSumWithoutAFzero() }; - return MathUtils.normalizeFromLog10(posteriors); - } - - public int[] getAClimits() { - return AClimits; - } - protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) { - final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size()); + final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size() - 1); final double[] log10Likelihoods = new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}; final double[] log10Priors = new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}; - return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors); + + // TODO -- replace with more meaningful computation + // TODO -- refactor this calculation into the ref calculation + final Map log10pNonRefByAllele = new HashMap(allelesUsedInGenotyping.size()); + for ( int i = 0; i < subACOfMLE.length; i++ ) { + final Allele allele = allelesUsedInGenotyping.get(i+1); + final double log10PNonRef = getAlleleCountsOfMAP()[i] > 0 ? 0 : -10000; // TODO -- a total hack but in effect what the old behavior was + log10pNonRefByAllele.put(allele, log10PNonRef); + } + + return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pNonRefByAllele); } // -------------------------------------------------------------------------------- @@ -309,10 +243,6 @@ class AFCalcResultTracker { this.allelesUsedInGenotyping = allelesUsedInGenotyping; } - private static boolean goodLog10Value(final double result) { - return result <= 0.0 || Double.isInfinite(result) || Double.isNaN(result); - } - protected void setAClimits(int[] AClimits) { this.AClimits = AClimits; } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index b544b77a4..4abb73114 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -572,8 +572,22 @@ public class MathUtils { return normalizeFromLog10(array, takeLog10OfOutput, false); } - public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput, boolean keepInLogSpace) { + /** + * The smallest log10 value we'll emit from normalizeFromLog10 and other functions + * where the real-space value is 0.0. + */ + final static double LOG10_P_OF_ZERO = -10000; + /** + * See #normalizeFromLog10 but with the additional option to use an approximation that keeps the calculation always in log-space + * + * @param array + * @param takeLog10OfOutput + * @param keepInLogSpace + * + * @return + */ + public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput, boolean keepInLogSpace) { // for precision purposes, we need to add (or really subtract, since they're // all negative) the largest value; also, we need to convert to normal-space. double maxValue = arrayMax(array); @@ -598,7 +612,8 @@ public class MathUtils { for (int i = 0; i < array.length; i++) { double x = normalized[i] / sum; if (takeLog10OfOutput) - x = Math.log10(x); + x = Math.max(Math.log10(x), LOG10_P_OF_ZERO); + normalized[i] = x; }