From c74d7061fe389510187cf4aa362a0c6028d7591d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 16 Oct 2012 08:10:22 -0400 Subject: [PATCH] Added AFCalcResultUnitTest -- Ensures that the posteriors remain within reasonable ranges. Fixed bug where normalization of posteriors = {-1e30, 0.0} => {-100000, 0.0} which isn't good. Now tests ensure that the normalization process preserves log10 precision where possible -- Updated MathUtils to make this possible --- .../afcalc/AFCalcResultUnitTest.java | 77 +++++++++++++++++++ .../genotyper/afcalc/AFCalcUnitTest.java | 37 +++++---- .../genotyper/afcalc/AFCalcResult.java | 10 +-- .../broadinstitute/sting/utils/MathUtils.java | 8 +- 4 files changed, 103 insertions(+), 29 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java new file mode 100644 index 000000000..1070642e9 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java @@ -0,0 +1,77 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class AFCalcResultUnitTest extends BaseTest { + private static class MyTest { + final double[] Ls, expectedPosteriors; + + private MyTest(double[] ls, double[] expectedPosteriors) { + Ls = ls; + this.expectedPosteriors = expectedPosteriors; + } + + @Override + public String toString() { + return "Ls [" + Utils.join(",", Ls) + "] expectedPosteriors [" + Utils.join(",", expectedPosteriors) + "]"; + } + } + + @DataProvider(name = "TestComputePosteriors") + public Object[][] makeTestCombineGLs() { + List tests = new ArrayList(); + + tests.add(new Object[]{new MyTest(log10Even, log10Even)}); + + for ( double L0 = -1e9; L0 < 0.0; L0 /= 10.0 ) { + for ( double L1 = -1e2; L1 < 0.0; L1 /= 100.0 ) { + final double[] input = new double[]{L0, L1}; + final double[] expected = MathUtils.normalizeFromLog10(input, true); + tests.add(new Object[]{new MyTest(input, expected)}); + } + } + + for ( double bigBadL = -1e50; bigBadL < -1e200; bigBadL *= 10 ) { + // test that a huge bad likelihood remains, even with a massive better result + for ( final double betterL : Arrays.asList(-1000.0, -100.0, -10.0, -1.0, -0.1, -0.01, -0.001, 0.0)) { + tests.add(new Object[]{new MyTest(new double[]{bigBadL, betterL}, new double[]{bigBadL, 0.0})}); + tests.add(new Object[]{new MyTest(new double[]{betterL, bigBadL}, new double[]{0.0, bigBadL})}); + } + } + + // test that a modest bad likelihood with an ~0.0 value doesn't get lost + for ( final double badL : Arrays.asList(-10000.0, -1000.0, -100.0, -10.0)) { + tests.add(new Object[]{new MyTest(new double[]{badL, -1e-9}, new double[]{badL, 0.0})}); + tests.add(new Object[]{new MyTest(new double[]{-1e-9, badL}, new double[]{0.0, badL})}); + } + + return tests.toArray(new Object[][]{}); + } + + + final static double[] log10Even = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true); + final static Allele C = Allele.create("C"); + final static List alleles = Arrays.asList(Allele.create("A", true), C); + + @Test(enabled = true, dataProvider = "TestComputePosteriors") + private void testComputingPosteriors(final MyTest data) { + final AFCalcResult result = new AFCalcResult(new int[]{0}, 1, alleles, data.Ls, log10Even, Collections.singletonMap(C, -1.0)); + + Assert.assertEquals(result.getLog10PosteriorOfAFEq0(), data.expectedPosteriors[0], 1e-3, "AF = 0 not expected"); + Assert.assertEquals(result.getLog10PosteriorOfAFGT0(), data.expectedPosteriors[1], 1e-3, "AF > 0 not expected"); + + final double[] actualPosteriors = new double[]{result.getLog10PosteriorOfAFEq0(), result.getLog10PosteriorOfAFGT0()}; + Assert.assertEquals(MathUtils.sumLog10(actualPosteriors), 1.0, 1e-3, "Posteriors don't sum to 1 with 1e-3 precision"); + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java index e2407989b..25df0f6d2 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java @@ -446,7 +446,7 @@ public class AFCalcUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(enabled = true & ! DEBUG_ONLY, dataProvider = "Models") + @Test(enabled = true && !DEBUG_ONLY, dataProvider = "Models") public void testBiallelicPriors(final AFCalc model) { for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) { @@ -454,26 +454,29 @@ public class AFCalcUnitTest extends BaseTest { for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) { final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior); - final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, (1-refPrior) / 2, (1-refPrior) / 2}), true); - GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior); - final AFCalcResult resultTracker = cfg.execute(); - final int actualAC = resultTracker.getAlleleCountsOfMLE()[0]; + final double nonRefPrior = (1-refPrior) / 2; + final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior}), true); + if ( ! Double.isInfinite(priors[1]) ) { + GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior); + final AFCalcResult resultTracker = cfg.execute(); + final int actualAC = resultTracker.getAlleleCountsOfMLE()[0]; - final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0]; - final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1] - Math.log10(0.5); - final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior)); - final double log10NonRefPost = Math.log10(nonRefPost); + final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0]; + final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1] - Math.log10(0.5); + final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior)); + final double log10NonRefPost = Math.log10(nonRefPost); - if ( ! Double.isInfinite(log10NonRefPost) ) - Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), log10NonRefPost, 1e-2); + if ( ! Double.isInfinite(log10NonRefPost) ) + Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), log10NonRefPost, 1e-2); - if ( nonRefPost >= 0.9 ) - Assert.assertTrue(resultTracker.isPolymorphic(C, -1)); + if ( nonRefPost >= 0.9 ) + Assert.assertTrue(resultTracker.isPolymorphic(C, -1)); - final int expectedMLEAC = 1; // the MLE is independent of the prior - Assert.assertEquals(actualAC, expectedMLEAC, - "actual AC with priors " + log10NonRefPrior + " not expected " - + expectedMLEAC + " priors " + Utils.join(",", priors)); + final int expectedMLEAC = 1; // the MLE is independent of the prior + Assert.assertEquals(actualAC, expectedMLEAC, + "actual AC with priors " + log10NonRefPrior + " not expected " + + expectedMLEAC + " priors " + Utils.join(",", priors)); + } } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index 209a21d82..c737416c5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -271,15 +271,7 @@ public class AFCalcResult { final double[] log10UnnormalizedPosteriors = new double[log10LikelihoodsOfAC.length]; for ( int i = 0; i < log10LikelihoodsOfAC.length; i++ ) log10UnnormalizedPosteriors[i] = log10LikelihoodsOfAC[i] + log10PriorsOfAC[i]; - - // necessary because the posteriors may be so skewed that the log-space normalized value isn't - // good, so we have to try both log-space normalization as well as the real-space normalization if the - // result isn't good - final double[] logNormalized = MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false); - if ( goodLog10ProbVector(logNormalized, logNormalized.length, true) ) - return logNormalized; - else - return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, true); + return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index a1d6907a2..3740d5d7c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -596,7 +596,6 @@ public class MathUtils { if (keepInLogSpace) { for (int i = 0; i < array.length; i++) { array[i] -= maxValue; - array[i] = Math.max(array[i], LOG10_P_OF_ZERO); } return array; } @@ -613,8 +612,11 @@ public class MathUtils { sum += normalized[i]; for (int i = 0; i < array.length; i++) { double x = normalized[i] / sum; - if (takeLog10OfOutput) - x = Math.max(Math.log10(x), LOG10_P_OF_ZERO); + if (takeLog10OfOutput) { + x = Math.log10(x); + if ( x < LOG10_P_OF_ZERO || Double.isInfinite(x) ) + x = array[i] - maxValue; + } normalized[i] = x; }