Added AFCalcResultUnitTest
-- Ensures that the posteriors remain within reasonable ranges. Fixed bug where normalization of posteriors = {-1e30, 0.0} => {-100000, 0.0} which isn't good. Now tests ensure that the normalization process preserves log10 precision where possible
-- Updated MathUtils to make this possible
This commit is contained in:
parent
9b0ab4e941
commit
c74d7061fe
|
|
@ -0,0 +1,77 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class AFCalcResultUnitTest extends BaseTest {
|
||||||
|
private static class MyTest {
|
||||||
|
final double[] Ls, expectedPosteriors;
|
||||||
|
|
||||||
|
private MyTest(double[] ls, double[] expectedPosteriors) {
|
||||||
|
Ls = ls;
|
||||||
|
this.expectedPosteriors = expectedPosteriors;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "Ls [" + Utils.join(",", Ls) + "] expectedPosteriors [" + Utils.join(",", expectedPosteriors) + "]";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "TestComputePosteriors")
|
||||||
|
public Object[][] makeTestCombineGLs() {
|
||||||
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
|
tests.add(new Object[]{new MyTest(log10Even, log10Even)});
|
||||||
|
|
||||||
|
for ( double L0 = -1e9; L0 < 0.0; L0 /= 10.0 ) {
|
||||||
|
for ( double L1 = -1e2; L1 < 0.0; L1 /= 100.0 ) {
|
||||||
|
final double[] input = new double[]{L0, L1};
|
||||||
|
final double[] expected = MathUtils.normalizeFromLog10(input, true);
|
||||||
|
tests.add(new Object[]{new MyTest(input, expected)});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( double bigBadL = -1e50; bigBadL < -1e200; bigBadL *= 10 ) {
|
||||||
|
// test that a huge bad likelihood remains, even with a massive better result
|
||||||
|
for ( final double betterL : Arrays.asList(-1000.0, -100.0, -10.0, -1.0, -0.1, -0.01, -0.001, 0.0)) {
|
||||||
|
tests.add(new Object[]{new MyTest(new double[]{bigBadL, betterL}, new double[]{bigBadL, 0.0})});
|
||||||
|
tests.add(new Object[]{new MyTest(new double[]{betterL, bigBadL}, new double[]{0.0, bigBadL})});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// test that a modest bad likelihood with an ~0.0 value doesn't get lost
|
||||||
|
for ( final double badL : Arrays.asList(-10000.0, -1000.0, -100.0, -10.0)) {
|
||||||
|
tests.add(new Object[]{new MyTest(new double[]{badL, -1e-9}, new double[]{badL, 0.0})});
|
||||||
|
tests.add(new Object[]{new MyTest(new double[]{-1e-9, badL}, new double[]{0.0, badL})});
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
final static double[] log10Even = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
|
||||||
|
final static Allele C = Allele.create("C");
|
||||||
|
final static List<Allele> alleles = Arrays.asList(Allele.create("A", true), C);
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "TestComputePosteriors")
|
||||||
|
private void testComputingPosteriors(final MyTest data) {
|
||||||
|
final AFCalcResult result = new AFCalcResult(new int[]{0}, 1, alleles, data.Ls, log10Even, Collections.singletonMap(C, -1.0));
|
||||||
|
|
||||||
|
Assert.assertEquals(result.getLog10PosteriorOfAFEq0(), data.expectedPosteriors[0], 1e-3, "AF = 0 not expected");
|
||||||
|
Assert.assertEquals(result.getLog10PosteriorOfAFGT0(), data.expectedPosteriors[1], 1e-3, "AF > 0 not expected");
|
||||||
|
|
||||||
|
final double[] actualPosteriors = new double[]{result.getLog10PosteriorOfAFEq0(), result.getLog10PosteriorOfAFGT0()};
|
||||||
|
Assert.assertEquals(MathUtils.sumLog10(actualPosteriors), 1.0, 1e-3, "Posteriors don't sum to 1 with 1e-3 precision");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -446,7 +446,7 @@ public class AFCalcUnitTest extends BaseTest {
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true & ! DEBUG_ONLY, dataProvider = "Models")
|
@Test(enabled = true && !DEBUG_ONLY, dataProvider = "Models")
|
||||||
public void testBiallelicPriors(final AFCalc model) {
|
public void testBiallelicPriors(final AFCalc model) {
|
||||||
|
|
||||||
for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) {
|
for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) {
|
||||||
|
|
@ -454,7 +454,9 @@ public class AFCalcUnitTest extends BaseTest {
|
||||||
|
|
||||||
for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) {
|
for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) {
|
||||||
final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior);
|
final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior);
|
||||||
final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, (1-refPrior) / 2, (1-refPrior) / 2}), true);
|
final double nonRefPrior = (1-refPrior) / 2;
|
||||||
|
final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior}), true);
|
||||||
|
if ( ! Double.isInfinite(priors[1]) ) {
|
||||||
GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior);
|
GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior);
|
||||||
final AFCalcResult resultTracker = cfg.execute();
|
final AFCalcResult resultTracker = cfg.execute();
|
||||||
final int actualAC = resultTracker.getAlleleCountsOfMLE()[0];
|
final int actualAC = resultTracker.getAlleleCountsOfMLE()[0];
|
||||||
|
|
@ -477,6 +479,7 @@ public class AFCalcUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test(enabled = true && ! DEBUG_ONLY, dataProvider = "Models")
|
@Test(enabled = true && ! DEBUG_ONLY, dataProvider = "Models")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -271,15 +271,7 @@ public class AFCalcResult {
|
||||||
final double[] log10UnnormalizedPosteriors = new double[log10LikelihoodsOfAC.length];
|
final double[] log10UnnormalizedPosteriors = new double[log10LikelihoodsOfAC.length];
|
||||||
for ( int i = 0; i < log10LikelihoodsOfAC.length; i++ )
|
for ( int i = 0; i < log10LikelihoodsOfAC.length; i++ )
|
||||||
log10UnnormalizedPosteriors[i] = log10LikelihoodsOfAC[i] + log10PriorsOfAC[i];
|
log10UnnormalizedPosteriors[i] = log10LikelihoodsOfAC[i] + log10PriorsOfAC[i];
|
||||||
|
return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false);
|
||||||
// necessary because the posteriors may be so skewed that the log-space normalized value isn't
|
|
||||||
// good, so we have to try both log-space normalization as well as the real-space normalization if the
|
|
||||||
// result isn't good
|
|
||||||
final double[] logNormalized = MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false);
|
|
||||||
if ( goodLog10ProbVector(logNormalized, logNormalized.length, true) )
|
|
||||||
return logNormalized;
|
|
||||||
else
|
|
||||||
return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -596,7 +596,6 @@ public class MathUtils {
|
||||||
if (keepInLogSpace) {
|
if (keepInLogSpace) {
|
||||||
for (int i = 0; i < array.length; i++) {
|
for (int i = 0; i < array.length; i++) {
|
||||||
array[i] -= maxValue;
|
array[i] -= maxValue;
|
||||||
array[i] = Math.max(array[i], LOG10_P_OF_ZERO);
|
|
||||||
}
|
}
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
@ -613,8 +612,11 @@ public class MathUtils {
|
||||||
sum += normalized[i];
|
sum += normalized[i];
|
||||||
for (int i = 0; i < array.length; i++) {
|
for (int i = 0; i < array.length; i++) {
|
||||||
double x = normalized[i] / sum;
|
double x = normalized[i] / sum;
|
||||||
if (takeLog10OfOutput)
|
if (takeLog10OfOutput) {
|
||||||
x = Math.max(Math.log10(x), LOG10_P_OF_ZERO);
|
x = Math.log10(x);
|
||||||
|
if ( x < LOG10_P_OF_ZERO || Double.isInfinite(x) )
|
||||||
|
x = array[i] - maxValue;
|
||||||
|
}
|
||||||
|
|
||||||
normalized[i] = x;
|
normalized[i] = x;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue