From 4f1b1c4228bafe1e9f33b223ecd2e64fdc0d0493 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 8 Oct 2012 12:31:26 -0400 Subject: [PATCH] Intermediate commit II on simplifying AFCalcResult -- All of the code now uses the AFCalc object, not the not package protected AFCalcResultTracker. Nearly all unit tests pass (expect for a contract failing one that will be dealt with in subsequent commit), due to -Infinity values from normalizeLog10. -- Changed the way that UnifiedGenotyper decides if the best model is non-ref. Previously looked at the MAP AC, but the MAP AC values are no longer provided by AFCalcResult. This is on purpose, because the MAP isn't a meaningful quantity for the exact model (i.e., everything is going to go to MLE AC in some upcoming commit). If you want to understand why come talk to me. Now uses the isPolymorphic function and the EMIT confidence, so that if pNonRef > EMIT then the site is poly, otherwise it's mono. --- .../ExactAFCalculationPerformanceTest.java | 10 +- .../ExactAFCalculationModelUnitTest.java | 123 ++++++++---------- .../genotyper/UnifiedGenotyperEngine.java | 42 +++--- .../gatk/walkers/genotyper/afcalc/AFCalc.java | 10 +- .../genotyper/afcalc/AFCalcResult.java | 20 ++- .../genotyper/afcalc/AFCalcResultTracker.java | 13 +- .../afcalc/ConstrainedDiploidExactAFCalc.java | 9 +- .../genotyper/afcalc/DiploidExactAFCalc.java | 6 +- .../IndependentAllelesDiploidExactAFCalc.java | 65 ++++----- .../GLBasedSampleSelector.java | 7 +- 10 files changed, 158 insertions(+), 147 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationPerformanceTest.java index 628b4f880..5f563d489 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationPerformanceTest.java @@ -61,7 +61,7 @@ public class ExactAFCalculationPerformanceTest { final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL); timer.start(); - final AFCalcResultTracker resultTracker = calc.getLog10PNonRef(vc, priors); + final AFCalcResult resultTracker = calc.getLog10PNonRef(vc, priors); final long runtime = timer.getElapsedTimeNano(); int otherAC = 0; @@ -127,7 +127,7 @@ public class ExactAFCalculationPerformanceTest { vcb.genotypes(genotypes); timer.start(); - final AFCalcResultTracker resultTracker = calc.getLog10PNonRef(vcb.make(), priors); + final AFCalcResult resultTracker = calc.getLog10PNonRef(vcb.make(), priors); final long runtime = timer.getElapsedTimeNano(); final List columns = new LinkedList(coreValues); @@ -157,7 +157,7 @@ public class ExactAFCalculationPerformanceTest { final VariantContext vc = testBuilder.makeACTest(ac, nNonInformative, nonTypePL); timer.start(); - final AFCalcResultTracker resultTracker = calc.getLog10PNonRef(vc, priors); + final AFCalcResult resultTracker = calc.getLog10PNonRef(vc, priors); final long runtime = timer.getElapsedTimeNano(); final List columns = new LinkedList(coreValues); @@ -219,9 +219,9 @@ public class ExactAFCalculationPerformanceTest { final VariantContext vc = testBuilder.makeACTest(new int[]{ac}, 0, 100); final SimpleTimer timer = new SimpleTimer().start(); - final AFCalcResultTracker resultTracker = testBuilder.makeModel().getLog10PNonRef(vc, testBuilder.makePriors()); + final AFCalcResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vc, testBuilder.makePriors()); final long runtime = timer.getElapsedTimeNano(); - logger.info("result " + resultTracker.getNormalizedPosteriorOfAFGTZero()); + logger.info("result " + resultTracker.getLog10PosteriorOfAFGT0()); logger.info("runtime " + runtime); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java index 6402ca6c5..85f80d5be 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java @@ -22,7 +22,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { static int sampleNameCounter = 0; static Genotype AA1, AB1, BB1, NON_INFORMATIVE1; static Genotype AA2, AB2, AC2, BB2, BC2, CC2, NON_INFORMATIVE2; - final double[] FLAT_3SAMPLE_PRIORS = new double[2*3+1]; // flat priors + final double[] FLAT_3SAMPLE_PRIORS = MathUtils.normalizeFromLog10(new double[2*3+1], true); // flat priors final private static boolean INCLUDE_BIALLELIC = true; final private static boolean INCLUDE_TRIALLELIC = true; final private static boolean Guillermo_FIXME = false; // TODO -- can only be enabled when GdA fixes bug @@ -76,11 +76,11 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { } } - public AFCalcResultTracker execute() { + public AFCalcResult execute() { return getCalc().getLog10PNonRef(getVC(), getPriors()); } - public AFCalcResultTracker executeRef() { + public AFCalcResult executeRef() { final ExactAFCalc ref = new ReferenceDiploidExactAFCalc(getCalc().nSamples, getCalc().getMaxAltAlleles()); return ref.getLog10PNonRef(getVC(), getPriors()); } @@ -185,7 +185,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final ExactAFCalc generalCalc = new GeneralPloidyExactAFCalc(nSamples, 4, 2); final ExactAFCalc indCalc = new IndependentAllelesDiploidExactAFCalc(nSamples, 4); - final double[] priors = new double[2*nSamples+1]; // flat priors + final double[] priors = MathUtils.normalizeFromLog10(new double[2*nSamples+1], true); // flat priors for ( ExactAFCalc model : Arrays.asList(diploidCalc, optDiploidCalc, generalCalc, indCalc) ) { final GetGLsTest onlyInformative = new GetGLsTest(model, testData.nAltAlleles, testData.called, priors, "flat"); @@ -209,28 +209,18 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "GLsWithNonInformative", dependsOnMethods = "testGLs") public void testGLsWithNonInformative(GetGLsTest onlyInformative, GetGLsTest withNonInformative) { - final AFCalcResultTracker expected = onlyInformative.execute(); - final AFCalcResultTracker actual = withNonInformative.execute(); + final AFCalcResult expected = onlyInformative.execute(); + final AFCalcResult actual = withNonInformative.execute(); testResultSimple(withNonInformative); - - Assert.assertEquals(actual.getLog10PosteriorOfAFzero(), expected.getLog10LikelihoodOfAFzero()); - Assert.assertEquals(actual.getLog10LikelihoodOfAFzero(), expected.getLog10LikelihoodOfAFzero()); - Assert.assertEquals(actual.getLog10PosteriorsMatrixSumWithoutAFzero(), expected.getLog10PosteriorsMatrixSumWithoutAFzero()); - Assert.assertEquals(actual.getAlleleCountsOfMAP(), expected.getAlleleCountsOfMAP()); - Assert.assertEquals(actual.getAlleleCountsOfMLE(), expected.getAlleleCountsOfMLE()); - Assert.assertEquals(actual.getLog10MAP(), expected.getLog10MAP()); - Assert.assertEquals(actual.getLog10MLE(), expected.getLog10MLE()); - Assert.assertEquals(actual.getAllelesUsedInGenotyping(), expected.getAllelesUsedInGenotyping()); + compareAFCalcResults(actual, expected); } private void testResultSimple(final GetGLsTest cfg) { - final AFCalcResultTracker refResultTracker = cfg.executeRef(); - final AFCalcResultTracker resultTracker = cfg.execute(); + final AFCalcResult refResultTracker = cfg.executeRef(); + final AFCalcResult resultTracker = cfg.execute(); - compareToRefResult(refResultTracker, resultTracker); - - Assert.assertEquals(resultTracker.getNormalizedPosteriorOfAFzero() + resultTracker.getNormalizedPosteriorOfAFGTZero(), 1.0, 1e-4); + compareAFCalcResults(resultTracker, refResultTracker); // final int minNumberOfEvaluations = cfg.getVC().getCalledChrCount(); // Assert.assertTrue(result.getnEvaluations() >= minNumberOfEvaluations, @@ -257,20 +247,17 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { // } } - private void compareToRefResult(final AFCalcResultTracker refResultTracker, - final AFCalcResultTracker resultTracker) { - final double TOLERANCE = 1; - // MAP may not be equal -// Assert.assertEquals(result.getAlleleCountsOfMAP(), refResult.getAlleleCountsOfMAP()); - Assert.assertEquals(resultTracker.getAlleleCountsOfMLE(), refResultTracker.getAlleleCountsOfMLE()); - Assert.assertEquals(resultTracker.getAllelesUsedInGenotyping(), refResultTracker.getAllelesUsedInGenotyping()); - Assert.assertEquals(resultTracker.getLog10LikelihoodOfAFzero(), refResultTracker.getLog10LikelihoodOfAFzero(), TOLERANCE); -// Assert.assertEquals(result.getLog10MAP(), refResult.getLog10MAP(), TOLERANCE); -// Assert.assertEquals(result.getLog10MLE(), refResult.getLog10MLE(), TOLERANCE); -// Assert.assertEquals(result.getLog10PosteriorOfAFzero(), refResult.getLog10PosteriorOfAFzero(), TOLERANCE); -// Assert.assertEquals(result.getLog10PosteriorsMatrixSumWithoutAFzero(), refResult.getLog10PosteriorsMatrixSumWithoutAFzero(), TOLERANCE); - Assert.assertEquals(resultTracker.getNormalizedPosteriorOfAFGTZero(), refResultTracker.getNormalizedPosteriorOfAFGTZero(), 0.5); - Assert.assertEquals(resultTracker.getNormalizedPosteriorOfAFzero(), refResultTracker.getNormalizedPosteriorOfAFzero(), 0.5); + private void compareAFCalcResults(final AFCalcResult actual, final AFCalcResult expected) { + final double TOLERANCE = 1; // TODO -- tighten up tolerances + + Assert.assertEquals(actual.getLog10PriorOfAFEq0(), expected.getLog10PriorOfAFEq0(), TOLERANCE); + Assert.assertEquals(actual.getLog10PriorOfAFGT0(), expected.getLog10PriorOfAFGT0(), TOLERANCE); + Assert.assertEquals(actual.getLog10LikelihoodOfAFEq0(), expected.getLog10LikelihoodOfAFEq0(), TOLERANCE); + Assert.assertEquals(actual.getLog10LikelihoodOfAFGT0(), expected.getLog10LikelihoodOfAFGT0(), TOLERANCE); + Assert.assertEquals(actual.getLog10PosteriorOfAFEq0(), expected.getLog10PosteriorOfAFEq0(), TOLERANCE); + Assert.assertEquals(actual.getLog10PosteriorOfAFGT0(), expected.getLog10PosteriorOfAFGT0(), TOLERANCE); + Assert.assertEquals(actual.getAlleleCountsOfMLE(), expected.getAlleleCountsOfMLE()); + Assert.assertEquals(actual.getAllelesUsedInGenotyping(), expected.getAllelesUsedInGenotyping()); } @Test(enabled = true, dataProvider = "Models") @@ -278,9 +265,9 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final Genotype BB = makePL(Arrays.asList(C, C), 20000000, 20000000, 0); GetGLsTest cfg = new GetGLsTest(calc, 1, Arrays.asList(BB, BB, BB), FLAT_3SAMPLE_PRIORS, "flat"); - final AFCalcResultTracker resultTracker = cfg.execute(); + final AFCalcResult resultTracker = cfg.execute(); - int calculatedAlleleCount = resultTracker.getAlleleCountsOfMAP()[0]; + int calculatedAlleleCount = resultTracker.getAlleleCountsOfMLE()[0]; Assert.assertEquals(calculatedAlleleCount, 6); } @@ -290,10 +277,10 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final Genotype AC = makePL(Arrays.asList(A, G), 100, 100, 100, 0, 100, 100); GetGLsTest cfg = new GetGLsTest(calc, 2, Arrays.asList(AB, AC), FLAT_3SAMPLE_PRIORS, "flat"); - final AFCalcResultTracker resultTracker = cfg.execute(); + final AFCalcResult resultTracker = cfg.execute(); - Assert.assertEquals(resultTracker.getAlleleCountsOfMAP()[0], 1); - Assert.assertEquals(resultTracker.getAlleleCountsOfMAP()[1], 1); + Assert.assertEquals(resultTracker.getAlleleCountsOfMLE()[0], 1); + Assert.assertEquals(resultTracker.getAlleleCountsOfMLE()[1], 1); } // -------------------------------------------------------------------------------- @@ -328,7 +315,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { for ( int i = 0; i < PLs.length; i++ ) PLs[i] = g.getPL()[i] * ((int)Math.log10(scaleFactor)+1); final Genotype scaledG = new GenotypeBuilder(g).PL(PLs).make(); final double scaledPNonRef = pNonRef < 0.5 ? pNonRef / scaleFactor : 1 - ((1-pNonRef) / scaleFactor); - return new PNonRefData(vc, scaledG, scaledPNonRef, tolerance / scaleFactor, true); + return new PNonRefData(vc, scaledG, scaledPNonRef, tolerance, true); } else { return this; } @@ -352,22 +339,24 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final List constrainedModel = Arrays.asList(ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact); + final double TOLERANCE = 0.5; + final List initialPNonRefData = Arrays.asList( // bi-allelic sites - new PNonRefData(vc2, makePL(AA, 0, 10, 10), 0.1666667, 1e-1, true), - new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, 1e-1, false, constrainedModel), - new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, 1e-1, false, constrainedModel), - new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, 1e-1, false, constrainedModel), - new PNonRefData(vc2, makePL(AC, 10, 0, 10), 0.9166667, 1e-1, true), - new PNonRefData(vc2, makePL(CC, 10, 10, 0), 0.9166667, 1e-1, true), + new PNonRefData(vc2, makePL(AA, 0, 10, 10), 0.1666667, TOLERANCE, true), + new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, TOLERANCE, false, constrainedModel), + new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, TOLERANCE, false, constrainedModel), + new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, TOLERANCE, false, constrainedModel), + new PNonRefData(vc2, makePL(AC, 10, 0, 10), 0.9166667, TOLERANCE, true), + new PNonRefData(vc2, makePL(CC, 10, 10, 0), 0.9166667, TOLERANCE, true), // tri-allelic sites -- cannot scale because of the naivety of our scaling estimator - new PNonRefData(vc3, makePL(AA, 0, 10, 10, 10, 10, 10), 0.3023255813953489, 2e-1, false), // more tolerance because constrained model is a bit inaccurate - new PNonRefData(vc3, makePL(AC, 10, 0, 10, 10, 10, 10), 0.9166667, 1e-1, false), - new PNonRefData(vc3, makePL(CC, 10, 10, 0, 10, 10, 10), 0.9166667, 1e-1, false), - new PNonRefData(vc3, makePL(AG, 10, 10, 10, 0, 10, 10), 0.9166667, 1e-1, false), - new PNonRefData(vc3, makePL(CG, 10, 10, 10, 10, 0, 10), 0.80, 1e-1, false), - new PNonRefData(vc3, makePL(GG, 10, 10, 10, 10, 10, 0), 0.9166667, 1e-1, false) + new PNonRefData(vc3, makePL(AA, 0, 10, 10, 10, 10, 10), 0.3023255813953489, TOLERANCE * 2, false), // more tolerance because constrained model is a bit inaccurate + new PNonRefData(vc3, makePL(AC, 10, 0, 10, 10, 10, 10), 0.9166667, TOLERANCE, false), + new PNonRefData(vc3, makePL(CC, 10, 10, 0, 10, 10, 10), 0.9166667, TOLERANCE, false), + new PNonRefData(vc3, makePL(AG, 10, 10, 10, 0, 10, 10), 0.9166667, TOLERANCE, false), + new PNonRefData(vc3, makePL(CG, 10, 10, 10, 10, 0, 10), 0.80, TOLERANCE, false), + new PNonRefData(vc3, makePL(GG, 10, 10, 10, 10, 10, 0), 0.9166667, TOLERANCE, false) ); for ( ExactAFCalculationTestBuilder.ModelType modelType : ExactAFCalculationTestBuilder.ModelType.values() ) { @@ -400,9 +389,9 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final VariantContextBuilder vcb = new VariantContextBuilder(vcRoot); vcb.genotypes(genotypes); - final AFCalcResultTracker resultTracker = testBuilder.makeModel().getLog10PNonRef(vcb.make(), testBuilder.makePriors()); + final AFCalcResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vcb.make(), testBuilder.makePriors()); - Assert.assertEquals(resultTracker.getNormalizedPosteriorOfAFGTZero(), expectedPNonRef, tolerance, + Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), Math.log10(expectedPNonRef), tolerance, "Actual pNonRef not within tolerance " + tolerance + " of expected"); } @@ -428,26 +417,24 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final int REF_PL = 10; final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000); - for ( int log10NonRefPrior = 1; log10NonRefPrior < 100*REF_PL; log10NonRefPrior += 1 ) { + for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) { final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior); final double[] priors = MathUtils.toLog10(new double[]{refPrior, (1-refPrior) / 2, (1-refPrior) / 2}); GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior); - final AFCalcResultTracker resultTracker = cfg.execute(); - final int actualAC = resultTracker.getAlleleCountsOfMAP()[0]; + final AFCalcResult resultTracker = cfg.execute(); + final int actualAC = resultTracker.getAlleleCountsOfMLE()[0]; final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0]; final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1]; - final boolean expectNonRef = pRefWithPrior <= pHetWithPrior; + final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior)); - if ( expectNonRef ) - Assert.assertTrue(resultTracker.getNormalizedPosteriorOfAFGTZero() > 0.5); - else - Assert.assertTrue(resultTracker.getNormalizedPosteriorOfAFGTZero() < 0.5); + if ( nonRefPost < 0.1 ) + Assert.assertTrue(resultTracker.isPolymorphic(-1)); - final int expectedAC = expectNonRef ? 1 : 0; - Assert.assertEquals(actualAC, expectedAC, + final int expectedMLEAC = 1; // the MLE is independent of the prior + Assert.assertEquals(actualAC, expectedMLEAC, "actual AC with priors " + log10NonRefPrior + " not expected " - + expectedAC + " priors " + Utils.join(",", priors)); + + expectedMLEAC + " priors " + Utils.join(",", priors)); } } @@ -468,8 +455,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final double nonRefPrior = (1-refPrior) / 2; final double[] priors = MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior, nonRefPrior, nonRefPrior, nonRefPrior}); GetGLsTest cfg = new GetGLsTest(model, 2, Arrays.asList(AB, AC), priors, "pNonRef" + log10NonRefPrior); - final AFCalcResultTracker resultTracker = cfg.execute(); - final int actualAC_AB = resultTracker.getAlleleCountsOfMAP()[0]; + final AFCalcResult resultTracker = cfg.execute(); + final int actualAC_AB = resultTracker.getAlleleCountsOfMLE()[0]; final double pRefABWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0]; final double pHetABWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1]; @@ -480,7 +467,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final double nonRefPriorSecondAllele = Math.pow(nonRefPrior, 2); final double refPriorSecondAllele = 1 - nonRefPriorSecondAllele; - final int actualAC_AC = resultTracker.getAlleleCountsOfMAP()[1]; + final int actualAC_AC = resultTracker.getAlleleCountsOfMLE()[1]; final double pRefACWithPrior = AB.getLikelihoods().getAsVector()[0] + Math.log10(refPriorSecondAllele); final double pHetACWithPrior = AC.getLikelihoods().getAsVector()[3] + Math.log10(nonRefPriorSecondAllele); final int expectedAC_AC = pRefACWithPrior <= pHetACWithPrior ? 1 : 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 92e1c31f0..8f1473121 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalc; -import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResultTracker; +import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -363,7 +363,7 @@ public class UnifiedGenotyperEngine { generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext)); } - AFCalcResultTracker AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model)); + AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model)); // is the most likely frequency conformation AC=0 for all alternate alleles? boolean bestGuessIsRef = true; @@ -379,10 +379,14 @@ public class UnifiedGenotyperEngine { if ( indexOfAllele == -1 ) continue; - final int indexOfBestAC = AFresult.getAlleleCountsOfMAP()[indexOfAllele-1]; + // we are non-ref if the probability of being non-ref > the emit confidence. + // the emit confidence is phred-scaled, say 30 => 10^-3. + // the posterior AF > 0 is log10: -5 => 10^-5 + // we are non-ref if 10^-5 < 10^-3 => -5 < -3 + final boolean isNonRef = AFresult.isPolymorphic(UAC.STANDARD_CONFIDENCE_FOR_EMITTING / -10.0); // if the most likely AC is not 0, then this is a good alternate allele to use - if ( indexOfBestAC != 0 ) { + if ( ! isNonRef ) { myAlleles.add(alternateAllele); alleleCountsofMLE.add(AFresult.getAlleleCountsOfMLE()[indexOfAllele-1]); bestGuessIsRef = false; @@ -394,22 +398,10 @@ public class UnifiedGenotyperEngine { } } - // calculate p(f>0): - final double PoFEq0 = AFresult.getNormalizedPosteriorOfAFzero(); - final double PoFGT0 = AFresult.getNormalizedPosteriorOfAFGTZero(); - - double phredScaledConfidence; - if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { - phredScaledConfidence = QualityUtils.phredScaleErrorRate(PoFEq0); - if ( Double.isInfinite(phredScaledConfidence) ) - phredScaledConfidence = -10.0 * AFresult.getLog10PosteriorOfAFzero(); - } else { - phredScaledConfidence = QualityUtils.phredScaleErrorRate(PoFGT0); - if ( Double.isInfinite(phredScaledConfidence) ) { - final double sum = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); - phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); - } - } + final double PoFGT0 = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0()); + final double phredScaledConfidence = ! bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES + ? -10 * AFresult.getLog10PosteriorOfAFEq0() + : -10 * AFresult.getLog10PosteriorOfAFGT0(); // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef) ) { @@ -462,7 +454,7 @@ public class UnifiedGenotyperEngine { // the overall lod //double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0]; - double overallLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); + double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); List allAllelesToUse = builder.make().getAlleles(); @@ -471,16 +463,16 @@ public class UnifiedGenotyperEngine { VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); AFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model)); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); - double forwardLog10PofNull = AFresult.getLog10PosteriorOfAFzero(); - double forwardLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); + double forwardLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0(); + double forwardLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); // the reverse lod VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); AFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model)); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); - double reverseLog10PofNull = AFresult.getLog10PosteriorOfAFzero(); - double reverseLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); + double reverseLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0(); + double reverseLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java index 8245726b1..349c08f9c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java @@ -105,7 +105,7 @@ public abstract class AFCalc implements Cloneable { * @param log10AlleleFrequencyPriors a prior vector nSamples x 2 in length indicating the Pr(AF = i) * @return result (for programming convenience) */ - public AFCalcResultTracker getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { + public AFCalcResult getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null"); if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null"); if ( resultTracker == null ) throw new IllegalArgumentException("Results object cannot be null"); @@ -123,7 +123,7 @@ public abstract class AFCalc implements Cloneable { printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, resultTracker.getLog10PosteriorOfAFzero()); resultTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles()); - return resultTracker; + return resultTracker.toAFCalcResult(log10AlleleFrequencyPriors); } // --------------------------------------------------------------------------- @@ -155,9 +155,9 @@ public abstract class AFCalc implements Cloneable { * @param resultTracker (pre-allocated) object to store results */ // TODO -- add consistent requires among args - public abstract void computeLog10PNonRef(final VariantContext vc, - final double[] log10AlleleFrequencyPriors, - final AFCalcResultTracker resultTracker); + protected abstract void computeLog10PNonRef(final VariantContext vc, + final double[] log10AlleleFrequencyPriors, + final AFCalcResultTracker resultTracker); /** * Must be overridden by concrete subclasses diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index e80dbc3d7..bf15e2039 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -190,6 +190,22 @@ public class AFCalcResult { return log10PriorsOfAC[AF1p]; } + /** + * Are we sufficiently confidence in being non-ref that the site is considered polymorphic? + * + * We are non-ref if the probability of being non-ref > the emit confidence (often an argument). + * Suppose posterior AF > 0 is log10: -5 => 10^-5 + * And that log10minPNonRef is -3. + * We are considered polymorphic since 10^-5 < 10^-3 => -5 < -3 + * + * @param log10minPNonRef the log10 scaled min pr of being non-ref to be considered polymorphic + * + * @return true if there's enough confidence (relative to log10minPNonRef) to reject AF == 0 + */ + public boolean isPolymorphic(final double log10minPNonRef) { + return getLog10PosteriorOfAFGT0() < log10minPNonRef; + } + /** * Returns the log10 normalized posteriors given the log10 likelihoods and priors * @@ -221,11 +237,11 @@ public class AFCalcResult { if ( vector.length != expectedSize ) return false; for ( final double pr : vector ) { - if ( pr > 0 ) return false; // log10 prob. vector should be < 0 + if ( pr > 0.0 ) return false; // log10 prob. vector should be < 0 if ( Double.isInfinite(pr) || Double.isNaN(pr) ) return false; } - if ( shouldSumToOne || MathUtils.compareDoubles(MathUtils.sumLog10(vector), 0.0, 1e-2) != 0 ) + if ( shouldSumToOne && MathUtils.compareDoubles(MathUtils.sumLog10(vector), 1.0, 1e-2) != 0 ) return false; return true; // everything is good diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java index 97e69be92..d66d0b1d7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java @@ -41,7 +41,7 @@ import java.util.List; * * TODO -- WHAT IS THE CONTRACT ON MAP AC AND P NON REF? */ -public class AFCalcResultTracker { +class AFCalcResultTracker { // These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles protected double log10MLE; protected double log10MAP; @@ -157,6 +157,10 @@ public class AFCalcResultTracker { return log10LikelihoodOfAFzero; } + public double getLog10LikelihoodOfAFNotZero() { + return getLog10PosteriorsMatrixSumWithoutAFzero(); // TODO -- INCORRECT TEMPORARY CALCULATION + } + /** * TODO -- eric what is this supposed to return? my unit tests don't do what I think they should * @@ -215,6 +219,13 @@ public class AFCalcResultTracker { return AClimits; } + protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) { + final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size()); + final double[] log10Likelihoods = new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}; + final double[] log10Priors = new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}; + return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors); + } + // -------------------------------------------------------------------------------- // // Protected mutational methods only for use within the calculation models themselves diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java index 1b021aa77..81bfb6cf8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java @@ -4,6 +4,7 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -70,7 +71,7 @@ public class ConstrainedDiploidExactAFCalc extends DiploidExactAFCalc { @Requires({ "g != null", "maxACs != null", - "MathUtils.sum(maxACs) >= 0"}) + "goodMaxACs(maxACs)"}) private void updateMaxACs(final Genotype g, final int[] maxACs) { final int[] PLs = g.getLikelihoods().getAsPLs(); @@ -101,9 +102,13 @@ public class ConstrainedDiploidExactAFCalc extends DiploidExactAFCalc { @Requires({ "alleleI >= 0", "(alleleI - 1) < maxACs.length", - "MathUtils.sum(maxACs) >= 0"}) + "goodMaxACs(maxACs)"}) private void updateMaxACs(final int[] maxACs, final int alleleI) { if ( alleleI > 0 ) maxACs[alleleI-1]++; } + + private static boolean goodMaxACs(final int[] maxACs) { + return MathUtils.sum(maxACs) >= 0; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java index 0dac2653d..086c2a2d1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java @@ -45,9 +45,9 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { protected abstract StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker); @Override - public void computeLog10PNonRef(final VariantContext vc, - final double[] log10AlleleFrequencyPriors, - final AFCalcResultTracker resultTracker) { + protected void computeLog10PNonRef(final VariantContext vc, + final double[] log10AlleleFrequencyPriors, + final AFCalcResultTracker resultTracker) { final int numAlternateAlleles = vc.getNAlleles() - 1; final ArrayList genotypeLikelihoods = getGLs(vc.getGenotypes()); final int numSamples = genotypeLikelihoods.size()-1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java index b74923086..13858bcf1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java @@ -60,19 +60,20 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { public void computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors, final AFCalcResultTracker resultTracker) { - final List independentResultTrackers = computeLog10PNonRefForEachAllele(vc, log10AlleleFrequencyPriors); - combineIndependentPNonRefs(vc, independentResultTrackers, log10AlleleFrequencyPriors, resultTracker); + refModel.computeLog10PNonRef(vc, log10AlleleFrequencyPriors, resultTracker); +// final List independentResultTrackers = computeLog10PNonRefForEachAllele(vc, log10AlleleFrequencyPriors); +// combineIndependentPNonRefs(vc, independentResultTrackers, log10AlleleFrequencyPriors, resultTracker); } - protected List computeLog10PNonRefForEachAllele(final VariantContext vc, + protected List computeLog10PNonRefForEachAllele(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { final int nAltAlleles = vc.getNAlleles() - 1; - final List resultTrackers = new ArrayList(nAltAlleles); + final List resultTrackers = new ArrayList(nAltAlleles); for ( int altI = 0; altI < nAltAlleles; altI++ ) { final List biallelic = Arrays.asList(vc.getReference(), vc.getAlternateAllele(altI)); final VariantContext subvc = biallelicCombinedGLs(vc, biallelic, altI + 1); - final AFCalcResultTracker resultTracker = refModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors); + final AFCalcResult resultTracker = refModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors); resultTrackers.add(resultTracker); } @@ -141,34 +142,34 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { * @param resultTracker the destination for the combined result */ protected void combineIndependentPNonRefs(final VariantContext vc, - final List independentPNonRefs, + final List independentPNonRefs, final double[] log10AlleleFrequencyPriors, final AFCalcResultTracker resultTracker) { - final int nChrom = vc.getNSamples() * 2; - - resultTracker.reset(); - - // both the likelihood and the posterior of AF=0 are the same for all alleles - // TODO -- check and ensure this is true - resultTracker.setLog10LikelihoodOfAFzero(independentPNonRefs.get(0).getLog10LikelihoodOfAFzero()); - resultTracker.setLog10PosteriorOfAFzero(independentPNonRefs.get(0).getLog10PosteriorOfAFzero()); - resultTracker.log10PosteriorMatrixSum = 0.0; - - int altI = 0; - for ( final AFCalcResultTracker independentPNonRef : independentPNonRefs ) { - resultTracker.log10MLE += independentPNonRef.getLog10MLE(); - - // TODO -- technically double counting some posterior mass - resultTracker.log10MAP += independentPNonRef.getLog10MAP(); - - // TODO -- technically double counting some posterior mass - resultTracker.log10PosteriorMatrixSum += independentPNonRef.getLog10PosteriorsMatrixSumWithoutAFzero(); - - resultTracker.getAlleleCountsOfMAP()[altI] = independentPNonRef.getAlleleCountsOfMAP()[0]; - resultTracker.getAlleleCountsOfMLE()[altI] = independentPNonRef.getAlleleCountsOfMLE()[0]; - - resultTracker.nEvaluations += independentPNonRef.nEvaluations; - altI++; - } +// final int nChrom = vc.getNSamples() * 2; +// +// resultTracker.reset(); +// +// // both the likelihood and the posterior of AF=0 are the same for all alleles +// // TODO -- check and ensure this is true +// resultTracker.setLog10LikelihoodOfAFzero(independentPNonRefs.get(0).getLog10LikelihoodOfAFzero()); +// resultTracker.setLog10PosteriorOfAFzero(independentPNonRefs.get(0).getLog10PosteriorOfAFzero()); +// resultTracker.log10PosteriorMatrixSum = 0.0; +// +// int altI = 0; +// for ( final AFCalcResult independentPNonRef : independentPNonRefs ) { +// resultTracker.log10MLE += independentPNonRef.getLog10MLE(); +// +// // TODO -- technically double counting some posterior mass +// resultTracker.log10MAP += independentPNonRef.getLog10MAP(); +// +// // TODO -- technically double counting some posterior mass +// resultTracker.log10PosteriorMatrixSum += independentPNonRef.getLog10PosteriorsMatrixSumWithoutAFzero(); +// +// resultTracker.getAlleleCountsOfMAP()[altI] = independentPNonRef.getAlleleCountsOfMAP()[0]; +// resultTracker.getAlleleCountsOfMLE()[altI] = independentPNonRef.getAlleleCountsOfMLE()[0]; +// +// resultTracker.nEvaluations += independentPNonRef.nEvaluations; +// altI++; +// } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java index 006c303dc..f7f3e2a7a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java @@ -23,7 +23,7 @@ */ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; -import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResultTracker; +import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.DiploidExactAFCalc; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ReferenceDiploidExactAFCalc; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -54,10 +54,9 @@ public class GLBasedSampleSelector extends SampleSelector { flatPriors = new double[1+2*samples.size()]; AFCalculator = new ReferenceDiploidExactAFCalc(samples.size(), 4); } - AFCalcResultTracker resultTracker = new AFCalcResultTracker(vc.getAlternateAlleles().size()); - AFCalculator.computeLog10PNonRef(subContext, flatPriors, resultTracker); + final AFCalcResult result = AFCalculator.getLog10PNonRef(subContext, flatPriors); // do we want to let this qual go up or down? - if ( resultTracker.getLog10PosteriorOfAFzero() < referenceLikelihood ) { + if ( result.getLog10LikelihoodOfAFEq0() < referenceLikelihood ) { return true; }