From 326f42927050ad326ec41a64f6d8c72c17ecc22f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 19 Oct 2012 14:06:41 -0400 Subject: [PATCH] Bugfixes to make new AFCalc system pass integrationtests -- GeneralPloidyExactAFCalc turns -Infinity values into -Double.MAX_VALUE, so our calculations pass unit tests -- Bugfix for GeneralPloidyGenotypeLikelihoodsCalculationModel, return a null VC when the only allele we get from our final alleles to use method is the reference base -- Fix calculation of reference posteriors when P(AF == 0) = 0.0 and P(AF == 0) = X for some meaningful value of X. Added unit test to ensure this behavior is correct -- Fix horrible sorting bug in IndependentAllelesDiploidExactAFCalc that applied the theta^N priors in the wrong order. Add contract to ensure this doesn't ever happen again -- Bugfix in GLBasedSampleSelector, where VCs without any polymorphic alleles were being sent to the exact model -- --- ...dyGenotypeLikelihoodsCalculationModel.java | 2 +- .../afcalc/GeneralPloidyExactAFCalc.java | 4 +-- .../afcalc/AFCalcResultUnitTest.java | 5 ++++ .../genotyper/afcalc/AFCalcUnitTest.java | 2 +- .../IndependentAllelesDiploidExactAFCalc.java | 28 +++++++++++++++++-- .../GLBasedSampleSelector.java | 3 ++ 6 files changed, 37 insertions(+), 7 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java index 4c20700ac..2522fc16e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java @@ -245,7 +245,7 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G // find the alternate allele(s) that we should be using final List alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs); - if (alleles == null || alleles.isEmpty()) + if (alleles == null || alleles.isEmpty() || (alleles.size() == 1 && alleles.get(0).isReference())) return null; // start making the VariantContext final GenomeLoc loc = ref.getLocus(); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java index 9c7883ab8..51b7fb633 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java @@ -448,7 +448,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { // update the MLE if necessary final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length); // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY - getStateTracker().updateMLEifNeeded(MathUtils.goodLog10Probability(log10LofK) ? log10LofK : MathUtils.LOG10_P_OF_ZERO, altCounts); + getStateTracker().updateMLEifNeeded(Math.max(Math.min(log10LofK, 0.0), -Double.MAX_VALUE), altCounts); // apply the priors over each alternate allele for (final int ACcount : altCounts ) { @@ -456,7 +456,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { log10LofK += log10AlleleFrequencyPriors[ACcount]; } // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY - getStateTracker().updateMAPifNeeded(MathUtils.goodLog10Probability(log10LofK) ? log10LofK : MathUtils.LOG10_P_OF_ZERO, altCounts); + getStateTracker().updateMAPifNeeded(Math.max(Math.min(log10LofK, 0.0), -Double.MAX_VALUE), altCounts); return log10LofK; } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java index 1070642e9..cbe2eb268 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java @@ -56,6 +56,11 @@ public class AFCalcResultUnitTest extends BaseTest { tests.add(new Object[]{new MyTest(new double[]{-1e-9, badL}, new double[]{0.0, badL})}); } + // test that a non-ref site gets reasonable posteriors with an ~0.0 value doesn't get lost + for ( final double nonRefL : Arrays.asList(-100.0, -50.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0)) { + tests.add(new Object[]{new MyTest(new double[]{0.0, nonRefL}, new double[]{0.0, nonRefL})}); + } + return tests.toArray(new Object[][]{}); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java index 25df0f6d2..9c6c8e8ab 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java @@ -185,7 +185,7 @@ public class AFCalcUnitTest extends BaseTest { testResultSimple(cfg); } - @Test(enabled = true, dataProvider = "badGLs") + @Test(enabled = true && !DEBUG_ONLY, dataProvider = "badGLs") public void testBadGLs(GetGLsTest cfg) { testResultSimple(cfg); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java index 2f85a5246..ea89d3802 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java @@ -100,7 +100,7 @@ import java.util.*; private final static class CompareAFCalcResultsByPNonRef implements Comparator { @Override public int compare(AFCalcResult o1, AFCalcResult o2) { - return Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0()); + return -1 * Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0()); } } @@ -313,6 +313,7 @@ import java.util.*; * * @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently */ + @Requires("sortedByPosteriorGT(sortedResultsWithThetaNPriors)") protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc, final List sortedResultsWithThetaNPriors) { int nEvaluations = 0; @@ -321,8 +322,9 @@ import java.util.*; final double[] log10PriorsOfAC = new double[2]; final Map log10pNonRefByAllele = new HashMap(nAltAlleles); - // this value is a sum in log space + // the sum of the log10 posteriors for AF == 0 and AF > 0 to determine joint probs double log10PosteriorOfACEq0Sum = 0.0; + double log10PosteriorOfACGt0Sum = 0.0; for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) { final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1); @@ -337,6 +339,7 @@ import java.util.*; // the AF > 0 case requires us to store the normalized likelihood for later summation if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR ) log10PosteriorOfACEq0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0(); + log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0(); // bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0()); @@ -348,7 +351,16 @@ import java.util.*; // In principle, if B_p = x and C_p = y are the probabilities of being poly for alleles B and C, // the probability of being poly is (1 - B_p) * (1 - C_p) = (1 - x) * (1 - y). We want to estimate confidently // log10((1 - x) * (1 - y)) which is log10(1 - x) + log10(1 - y). This sum is log10PosteriorOfACEq0 - final double log10PosteriorOfACGt0 = Math.max(Math.log10(1 - Math.pow(10, log10PosteriorOfACEq0Sum)), MathUtils.LOG10_P_OF_ZERO); + // + // note we need to handle the case where the posterior of AF == 0 is 0.0, in which case we + // use the summed log10PosteriorOfACGt0Sum directly. This happens in cases where + // AF > 0 : 0.0 and AF == 0 : -16, and if you use the inverse calculation you get 0.0 and MathUtils.LOG10_P_OF_ZERO + final double log10PosteriorOfACGt0; + if ( log10PosteriorOfACEq0Sum == 0.0 ) + log10PosteriorOfACGt0 = log10PosteriorOfACGt0Sum; + else + log10PosteriorOfACGt0 = Math.max(Math.log10(1 - Math.pow(10, log10PosteriorOfACEq0Sum)), MathUtils.LOG10_P_OF_ZERO); + final double[] log10LikelihoodsOfAC = new double[] { // L + prior = posterior => L = poster - prior log10PosteriorOfACEq0Sum - log10PriorsOfAC[0], @@ -362,4 +374,14 @@ import java.util.*; MathUtils.normalizeFromLog10(log10PriorsOfAC, true), log10pNonRefByAllele, sortedResultsWithThetaNPriors); } + + private static boolean sortedByPosteriorGT(final List sortedVCs) { + double lastPosteriorGt0 = sortedVCs.get(0).getLog10PosteriorOfAFGT0(); + for ( final AFCalcResult vc : sortedVCs ) { + if ( vc.getLog10PosteriorOfAFGT0() > lastPosteriorGt0 ) + return false; + lastPosteriorGt0 = vc.getLog10PosteriorOfAFGT0(); + } + return true; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java index f8c871e7d..48a2d2700 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java @@ -48,6 +48,9 @@ public class GLBasedSampleSelector extends SampleSelector { // first subset to the samples VariantContext subContext = vc.subContextFromSamples(samples); + if ( ! subContext.isPolymorphicInSamples() ) + return false; + // now check to see (using EXACT model) whether this should be variant // do we want to apply a prior? maybe user-spec? if ( flatPriors == null ) {