From c74d7061fe389510187cf4aa362a0c6028d7591d Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 16 Oct 2012 08:10:22 -0400
Subject: [PATCH] Added AFCalcResultUnitTest

-- Ensures that the posteriors remain within reasonable ranges.  Fixed bug where normalization of posteriors = {-1e30, 0.0} => {-100000, 0.0} which isn't good.  Now tests ensure that the normalization process preserves log10 precision where possible
-- Updated MathUtils to make this possible
---
 .../afcalc/AFCalcResultUnitTest.java          | 77 +++++++++++++++++++
 .../genotyper/afcalc/AFCalcUnitTest.java      | 37 +++++----
 .../genotyper/afcalc/AFCalcResult.java        | 10 +--
 .../broadinstitute/sting/utils/MathUtils.java |  8 +-
 4 files changed, 103 insertions(+), 29 deletions(-)
 create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
new file mode 100644
index 000000000..1070642e9
--- /dev/null
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
@@ -0,0 +1,77 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+public class AFCalcResultUnitTest extends BaseTest {
+    private static class MyTest {
+        final double[] Ls, expectedPosteriors;
+
+        private MyTest(double[] ls, double[] expectedPosteriors) {
+            Ls = ls;
+            this.expectedPosteriors = expectedPosteriors;
+        }
+
+        @Override
+        public String toString() {
+            return "Ls [" + Utils.join(",", Ls) + "] expectedPosteriors [" + Utils.join(",", expectedPosteriors) + "]";
+        }
+    }
+
+    @DataProvider(name = "TestComputePosteriors")
+    public Object[][] makeTestCombineGLs() {
+        List<Object[]> tests = new ArrayList<Object[]>();
+
+        tests.add(new Object[]{new MyTest(log10Even, log10Even)});
+
+        for ( double L0 = -1e9; L0 < 0.0; L0 /= 10.0 ) {
+            for ( double L1 = -1e2; L1 < 0.0; L1 /= 100.0 ) {
+                final double[] input = new double[]{L0, L1};
+                final double[] expected = MathUtils.normalizeFromLog10(input, true);
+                tests.add(new Object[]{new MyTest(input, expected)});
+            }
+        }
+
+        for ( double bigBadL = -1e50; bigBadL < -1e200; bigBadL *= 10 ) {
+            // test that a huge bad likelihood remains, even with a massive better result
+            for ( final double betterL : Arrays.asList(-1000.0, -100.0, -10.0, -1.0, -0.1, -0.01, -0.001, 0.0)) {
+                tests.add(new Object[]{new MyTest(new double[]{bigBadL, betterL}, new double[]{bigBadL, 0.0})});
+                tests.add(new Object[]{new MyTest(new double[]{betterL, bigBadL}, new double[]{0.0, bigBadL})});
+            }
+        }
+
+        // test that a modest bad likelihood with an ~0.0 value doesn't get lost
+        for ( final double badL : Arrays.asList(-10000.0, -1000.0, -100.0, -10.0)) {
+            tests.add(new Object[]{new MyTest(new double[]{badL, -1e-9}, new double[]{badL, 0.0})});
+            tests.add(new Object[]{new MyTest(new double[]{-1e-9, badL}, new double[]{0.0, badL})});
+        }
+
+        return tests.toArray(new Object[][]{});
+    }
+
+
+    final static double[] log10Even = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
+    final static Allele C = Allele.create("C");
+    final static List<Allele> alleles = Arrays.asList(Allele.create("A", true), C);
+
+    @Test(enabled = true, dataProvider = "TestComputePosteriors")
+    private void testComputingPosteriors(final MyTest data) {
+        final AFCalcResult result = new AFCalcResult(new int[]{0}, 1, alleles, data.Ls, log10Even, Collections.singletonMap(C, -1.0));
+
+        Assert.assertEquals(result.getLog10PosteriorOfAFEq0(), data.expectedPosteriors[0], 1e-3, "AF = 0 not expected");
+        Assert.assertEquals(result.getLog10PosteriorOfAFGT0(), data.expectedPosteriors[1], 1e-3, "AF > 0 not expected");
+
+        final double[] actualPosteriors = new double[]{result.getLog10PosteriorOfAFEq0(), result.getLog10PosteriorOfAFGT0()};
+        Assert.assertEquals(MathUtils.sumLog10(actualPosteriors), 1.0, 1e-3, "Posteriors don't sum to 1 with 1e-3 precision");
+    }
+}
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java
index e2407989b..25df0f6d2 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java
@@ -446,7 +446,7 @@ public class AFCalcUnitTest extends BaseTest {
         return tests.toArray(new Object[][]{});
     }
 
-    @Test(enabled = true & ! DEBUG_ONLY, dataProvider = "Models")
+    @Test(enabled = true && !DEBUG_ONLY, dataProvider = "Models")
     public void testBiallelicPriors(final AFCalc model) {
 
         for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) {
@@ -454,26 +454,29 @@ public class AFCalcUnitTest extends BaseTest {
 
             for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) {
                 final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior);
-                final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, (1-refPrior) / 2, (1-refPrior) / 2}), true);
-                GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior);
-                final AFCalcResult resultTracker = cfg.execute();
-                final int actualAC = resultTracker.getAlleleCountsOfMLE()[0];
+                final double nonRefPrior = (1-refPrior) / 2;
+                final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior}), true);
+                if ( ! Double.isInfinite(priors[1]) ) {
+                    GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior);
+                    final AFCalcResult resultTracker = cfg.execute();
+                    final int actualAC = resultTracker.getAlleleCountsOfMLE()[0];
 
-                final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0];
-                final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1] - Math.log10(0.5);
-                final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior));
-                final double log10NonRefPost = Math.log10(nonRefPost);
+                    final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0];
+                    final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1] - Math.log10(0.5);
+                    final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior));
+                    final double log10NonRefPost = Math.log10(nonRefPost);
 
-                if ( ! Double.isInfinite(log10NonRefPost) )
-                    Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), log10NonRefPost, 1e-2);
+                    if ( ! Double.isInfinite(log10NonRefPost) )
+                        Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), log10NonRefPost, 1e-2);
 
-                if ( nonRefPost >= 0.9 )
-                    Assert.assertTrue(resultTracker.isPolymorphic(C, -1));
+                    if ( nonRefPost >= 0.9 )
+                        Assert.assertTrue(resultTracker.isPolymorphic(C, -1));
 
-                final int expectedMLEAC = 1; // the MLE is independent of the prior
-                Assert.assertEquals(actualAC, expectedMLEAC,
-                        "actual AC with priors " + log10NonRefPrior + " not expected "
-                                + expectedMLEAC + " priors " + Utils.join(",", priors));
+                    final int expectedMLEAC = 1; // the MLE is independent of the prior
+                    Assert.assertEquals(actualAC, expectedMLEAC,
+                            "actual AC with priors " + log10NonRefPrior + " not expected "
+                                    + expectedMLEAC + " priors " + Utils.join(",", priors));
+                }
             }
         }
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
index 209a21d82..c737416c5 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
@@ -271,15 +271,7 @@ public class AFCalcResult {
         final double[] log10UnnormalizedPosteriors = new double[log10LikelihoodsOfAC.length];
         for ( int i = 0; i < log10LikelihoodsOfAC.length; i++ )
             log10UnnormalizedPosteriors[i] = log10LikelihoodsOfAC[i] + log10PriorsOfAC[i];
-
-        // necessary because the posteriors may be so skewed that the log-space normalized value isn't
-        // good, so we have to try both log-space normalization as well as the real-space normalization if the
-        // result isn't good
-        final double[] logNormalized = MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false);
-        if ( goodLog10ProbVector(logNormalized, logNormalized.length, true) )
-            return logNormalized;
-        else
-            return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, true);
+        return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false);
     }
 
     /**
diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
index a1d6907a2..3740d5d7c 100644
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@@ -596,7 +596,6 @@ public class MathUtils {
         if (keepInLogSpace) {
             for (int i = 0; i < array.length; i++) {
                 array[i] -= maxValue;
-                array[i] = Math.max(array[i], LOG10_P_OF_ZERO);
             }
             return array;
         }
@@ -613,8 +612,11 @@ public class MathUtils {
             sum += normalized[i];
         for (int i = 0; i < array.length; i++) {
             double x = normalized[i] / sum;
-            if (takeLog10OfOutput)
-                x = Math.max(Math.log10(x), LOG10_P_OF_ZERO);
+            if (takeLog10OfOutput) {
+                x = Math.log10(x);
+                if ( x < LOG10_P_OF_ZERO || Double.isInfinite(x) )
+                    x = array[i] - maxValue;
+            }
 
             normalized[i] = x;
         }