diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationPerformanceTest.java index 628b4f880..5f563d489 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationPerformanceTest.java @@ -61,7 +61,7 @@ public class ExactAFCalculationPerformanceTest { final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL); timer.start(); - final AFCalcResultTracker resultTracker = calc.getLog10PNonRef(vc, priors); + final AFCalcResult resultTracker = calc.getLog10PNonRef(vc, priors); final long runtime = timer.getElapsedTimeNano(); int otherAC = 0; @@ -127,7 +127,7 @@ public class ExactAFCalculationPerformanceTest { vcb.genotypes(genotypes); timer.start(); - final AFCalcResultTracker resultTracker = calc.getLog10PNonRef(vcb.make(), priors); + final AFCalcResult resultTracker = calc.getLog10PNonRef(vcb.make(), priors); final long runtime = timer.getElapsedTimeNano(); final List columns = new LinkedList(coreValues); @@ -157,7 +157,7 @@ public class ExactAFCalculationPerformanceTest { final VariantContext vc = testBuilder.makeACTest(ac, nNonInformative, nonTypePL); timer.start(); - final AFCalcResultTracker resultTracker = calc.getLog10PNonRef(vc, priors); + final AFCalcResult resultTracker = calc.getLog10PNonRef(vc, priors); final long runtime = timer.getElapsedTimeNano(); final List columns = new LinkedList(coreValues); @@ -219,9 +219,9 @@ public class ExactAFCalculationPerformanceTest { final VariantContext vc = testBuilder.makeACTest(new int[]{ac}, 0, 100); final SimpleTimer timer = new SimpleTimer().start(); - final AFCalcResultTracker resultTracker = testBuilder.makeModel().getLog10PNonRef(vc, testBuilder.makePriors()); + final AFCalcResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vc, testBuilder.makePriors()); final long runtime = timer.getElapsedTimeNano(); - logger.info("result " + resultTracker.getNormalizedPosteriorOfAFGTZero()); + logger.info("result " + resultTracker.getLog10PosteriorOfAFGT0()); logger.info("runtime " + runtime); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java index 6402ca6c5..85f80d5be 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java @@ -22,7 +22,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { static int sampleNameCounter = 0; static Genotype AA1, AB1, BB1, NON_INFORMATIVE1; static Genotype AA2, AB2, AC2, BB2, BC2, CC2, NON_INFORMATIVE2; - final double[] FLAT_3SAMPLE_PRIORS = new double[2*3+1]; // flat priors + final double[] FLAT_3SAMPLE_PRIORS = MathUtils.normalizeFromLog10(new double[2*3+1], true); // flat priors final private static boolean INCLUDE_BIALLELIC = true; final private static boolean INCLUDE_TRIALLELIC = true; final private static boolean Guillermo_FIXME = false; // TODO -- can only be enabled when GdA fixes bug @@ -76,11 +76,11 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { } } - public AFCalcResultTracker execute() { + public AFCalcResult execute() { return getCalc().getLog10PNonRef(getVC(), getPriors()); } - public AFCalcResultTracker executeRef() { + public AFCalcResult executeRef() { final ExactAFCalc ref = new ReferenceDiploidExactAFCalc(getCalc().nSamples, getCalc().getMaxAltAlleles()); return ref.getLog10PNonRef(getVC(), getPriors()); } @@ -185,7 +185,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final ExactAFCalc generalCalc = new GeneralPloidyExactAFCalc(nSamples, 4, 2); final ExactAFCalc indCalc = new IndependentAllelesDiploidExactAFCalc(nSamples, 4); - final double[] priors = new double[2*nSamples+1]; // flat priors + final double[] priors = MathUtils.normalizeFromLog10(new double[2*nSamples+1], true); // flat priors for ( ExactAFCalc model : Arrays.asList(diploidCalc, optDiploidCalc, generalCalc, indCalc) ) { final GetGLsTest onlyInformative = new GetGLsTest(model, testData.nAltAlleles, testData.called, priors, "flat"); @@ -209,28 +209,18 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "GLsWithNonInformative", dependsOnMethods = "testGLs") public void testGLsWithNonInformative(GetGLsTest onlyInformative, GetGLsTest withNonInformative) { - final AFCalcResultTracker expected = onlyInformative.execute(); - final AFCalcResultTracker actual = withNonInformative.execute(); + final AFCalcResult expected = onlyInformative.execute(); + final AFCalcResult actual = withNonInformative.execute(); testResultSimple(withNonInformative); - - Assert.assertEquals(actual.getLog10PosteriorOfAFzero(), expected.getLog10LikelihoodOfAFzero()); - Assert.assertEquals(actual.getLog10LikelihoodOfAFzero(), expected.getLog10LikelihoodOfAFzero()); - Assert.assertEquals(actual.getLog10PosteriorsMatrixSumWithoutAFzero(), expected.getLog10PosteriorsMatrixSumWithoutAFzero()); - Assert.assertEquals(actual.getAlleleCountsOfMAP(), expected.getAlleleCountsOfMAP()); - Assert.assertEquals(actual.getAlleleCountsOfMLE(), expected.getAlleleCountsOfMLE()); - Assert.assertEquals(actual.getLog10MAP(), expected.getLog10MAP()); - Assert.assertEquals(actual.getLog10MLE(), expected.getLog10MLE()); - Assert.assertEquals(actual.getAllelesUsedInGenotyping(), expected.getAllelesUsedInGenotyping()); + compareAFCalcResults(actual, expected); } private void testResultSimple(final GetGLsTest cfg) { - final AFCalcResultTracker refResultTracker = cfg.executeRef(); - final AFCalcResultTracker resultTracker = cfg.execute(); + final AFCalcResult refResultTracker = cfg.executeRef(); + final AFCalcResult resultTracker = cfg.execute(); - compareToRefResult(refResultTracker, resultTracker); - - Assert.assertEquals(resultTracker.getNormalizedPosteriorOfAFzero() + resultTracker.getNormalizedPosteriorOfAFGTZero(), 1.0, 1e-4); + compareAFCalcResults(resultTracker, refResultTracker); // final int minNumberOfEvaluations = cfg.getVC().getCalledChrCount(); // Assert.assertTrue(result.getnEvaluations() >= minNumberOfEvaluations, @@ -257,20 +247,17 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { // } } - private void compareToRefResult(final AFCalcResultTracker refResultTracker, - final AFCalcResultTracker resultTracker) { - final double TOLERANCE = 1; - // MAP may not be equal -// Assert.assertEquals(result.getAlleleCountsOfMAP(), refResult.getAlleleCountsOfMAP()); - Assert.assertEquals(resultTracker.getAlleleCountsOfMLE(), refResultTracker.getAlleleCountsOfMLE()); - Assert.assertEquals(resultTracker.getAllelesUsedInGenotyping(), refResultTracker.getAllelesUsedInGenotyping()); - Assert.assertEquals(resultTracker.getLog10LikelihoodOfAFzero(), refResultTracker.getLog10LikelihoodOfAFzero(), TOLERANCE); -// Assert.assertEquals(result.getLog10MAP(), refResult.getLog10MAP(), TOLERANCE); -// Assert.assertEquals(result.getLog10MLE(), refResult.getLog10MLE(), TOLERANCE); -// Assert.assertEquals(result.getLog10PosteriorOfAFzero(), refResult.getLog10PosteriorOfAFzero(), TOLERANCE); -// Assert.assertEquals(result.getLog10PosteriorsMatrixSumWithoutAFzero(), refResult.getLog10PosteriorsMatrixSumWithoutAFzero(), TOLERANCE); - Assert.assertEquals(resultTracker.getNormalizedPosteriorOfAFGTZero(), refResultTracker.getNormalizedPosteriorOfAFGTZero(), 0.5); - Assert.assertEquals(resultTracker.getNormalizedPosteriorOfAFzero(), refResultTracker.getNormalizedPosteriorOfAFzero(), 0.5); + private void compareAFCalcResults(final AFCalcResult actual, final AFCalcResult expected) { + final double TOLERANCE = 1; // TODO -- tighten up tolerances + + Assert.assertEquals(actual.getLog10PriorOfAFEq0(), expected.getLog10PriorOfAFEq0(), TOLERANCE); + Assert.assertEquals(actual.getLog10PriorOfAFGT0(), expected.getLog10PriorOfAFGT0(), TOLERANCE); + Assert.assertEquals(actual.getLog10LikelihoodOfAFEq0(), expected.getLog10LikelihoodOfAFEq0(), TOLERANCE); + Assert.assertEquals(actual.getLog10LikelihoodOfAFGT0(), expected.getLog10LikelihoodOfAFGT0(), TOLERANCE); + Assert.assertEquals(actual.getLog10PosteriorOfAFEq0(), expected.getLog10PosteriorOfAFEq0(), TOLERANCE); + Assert.assertEquals(actual.getLog10PosteriorOfAFGT0(), expected.getLog10PosteriorOfAFGT0(), TOLERANCE); + Assert.assertEquals(actual.getAlleleCountsOfMLE(), expected.getAlleleCountsOfMLE()); + Assert.assertEquals(actual.getAllelesUsedInGenotyping(), expected.getAllelesUsedInGenotyping()); } @Test(enabled = true, dataProvider = "Models") @@ -278,9 +265,9 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final Genotype BB = makePL(Arrays.asList(C, C), 20000000, 20000000, 0); GetGLsTest cfg = new GetGLsTest(calc, 1, Arrays.asList(BB, BB, BB), FLAT_3SAMPLE_PRIORS, "flat"); - final AFCalcResultTracker resultTracker = cfg.execute(); + final AFCalcResult resultTracker = cfg.execute(); - int calculatedAlleleCount = resultTracker.getAlleleCountsOfMAP()[0]; + int calculatedAlleleCount = resultTracker.getAlleleCountsOfMLE()[0]; Assert.assertEquals(calculatedAlleleCount, 6); } @@ -290,10 +277,10 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final Genotype AC = makePL(Arrays.asList(A, G), 100, 100, 100, 0, 100, 100); GetGLsTest cfg = new GetGLsTest(calc, 2, Arrays.asList(AB, AC), FLAT_3SAMPLE_PRIORS, "flat"); - final AFCalcResultTracker resultTracker = cfg.execute(); + final AFCalcResult resultTracker = cfg.execute(); - Assert.assertEquals(resultTracker.getAlleleCountsOfMAP()[0], 1); - Assert.assertEquals(resultTracker.getAlleleCountsOfMAP()[1], 1); + Assert.assertEquals(resultTracker.getAlleleCountsOfMLE()[0], 1); + Assert.assertEquals(resultTracker.getAlleleCountsOfMLE()[1], 1); } // -------------------------------------------------------------------------------- @@ -328,7 +315,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { for ( int i = 0; i < PLs.length; i++ ) PLs[i] = g.getPL()[i] * ((int)Math.log10(scaleFactor)+1); final Genotype scaledG = new GenotypeBuilder(g).PL(PLs).make(); final double scaledPNonRef = pNonRef < 0.5 ? pNonRef / scaleFactor : 1 - ((1-pNonRef) / scaleFactor); - return new PNonRefData(vc, scaledG, scaledPNonRef, tolerance / scaleFactor, true); + return new PNonRefData(vc, scaledG, scaledPNonRef, tolerance, true); } else { return this; } @@ -352,22 +339,24 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final List constrainedModel = Arrays.asList(ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact); + final double TOLERANCE = 0.5; + final List initialPNonRefData = Arrays.asList( // bi-allelic sites - new PNonRefData(vc2, makePL(AA, 0, 10, 10), 0.1666667, 1e-1, true), - new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, 1e-1, false, constrainedModel), - new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, 1e-1, false, constrainedModel), - new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, 1e-1, false, constrainedModel), - new PNonRefData(vc2, makePL(AC, 10, 0, 10), 0.9166667, 1e-1, true), - new PNonRefData(vc2, makePL(CC, 10, 10, 0), 0.9166667, 1e-1, true), + new PNonRefData(vc2, makePL(AA, 0, 10, 10), 0.1666667, TOLERANCE, true), + new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, TOLERANCE, false, constrainedModel), + new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, TOLERANCE, false, constrainedModel), + new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, TOLERANCE, false, constrainedModel), + new PNonRefData(vc2, makePL(AC, 10, 0, 10), 0.9166667, TOLERANCE, true), + new PNonRefData(vc2, makePL(CC, 10, 10, 0), 0.9166667, TOLERANCE, true), // tri-allelic sites -- cannot scale because of the naivety of our scaling estimator - new PNonRefData(vc3, makePL(AA, 0, 10, 10, 10, 10, 10), 0.3023255813953489, 2e-1, false), // more tolerance because constrained model is a bit inaccurate - new PNonRefData(vc3, makePL(AC, 10, 0, 10, 10, 10, 10), 0.9166667, 1e-1, false), - new PNonRefData(vc3, makePL(CC, 10, 10, 0, 10, 10, 10), 0.9166667, 1e-1, false), - new PNonRefData(vc3, makePL(AG, 10, 10, 10, 0, 10, 10), 0.9166667, 1e-1, false), - new PNonRefData(vc3, makePL(CG, 10, 10, 10, 10, 0, 10), 0.80, 1e-1, false), - new PNonRefData(vc3, makePL(GG, 10, 10, 10, 10, 10, 0), 0.9166667, 1e-1, false) + new PNonRefData(vc3, makePL(AA, 0, 10, 10, 10, 10, 10), 0.3023255813953489, TOLERANCE * 2, false), // more tolerance because constrained model is a bit inaccurate + new PNonRefData(vc3, makePL(AC, 10, 0, 10, 10, 10, 10), 0.9166667, TOLERANCE, false), + new PNonRefData(vc3, makePL(CC, 10, 10, 0, 10, 10, 10), 0.9166667, TOLERANCE, false), + new PNonRefData(vc3, makePL(AG, 10, 10, 10, 0, 10, 10), 0.9166667, TOLERANCE, false), + new PNonRefData(vc3, makePL(CG, 10, 10, 10, 10, 0, 10), 0.80, TOLERANCE, false), + new PNonRefData(vc3, makePL(GG, 10, 10, 10, 10, 10, 0), 0.9166667, TOLERANCE, false) ); for ( ExactAFCalculationTestBuilder.ModelType modelType : ExactAFCalculationTestBuilder.ModelType.values() ) { @@ -400,9 +389,9 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final VariantContextBuilder vcb = new VariantContextBuilder(vcRoot); vcb.genotypes(genotypes); - final AFCalcResultTracker resultTracker = testBuilder.makeModel().getLog10PNonRef(vcb.make(), testBuilder.makePriors()); + final AFCalcResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vcb.make(), testBuilder.makePriors()); - Assert.assertEquals(resultTracker.getNormalizedPosteriorOfAFGTZero(), expectedPNonRef, tolerance, + Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), Math.log10(expectedPNonRef), tolerance, "Actual pNonRef not within tolerance " + tolerance + " of expected"); } @@ -428,26 +417,24 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final int REF_PL = 10; final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000); - for ( int log10NonRefPrior = 1; log10NonRefPrior < 100*REF_PL; log10NonRefPrior += 1 ) { + for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) { final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior); final double[] priors = MathUtils.toLog10(new double[]{refPrior, (1-refPrior) / 2, (1-refPrior) / 2}); GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior); - final AFCalcResultTracker resultTracker = cfg.execute(); - final int actualAC = resultTracker.getAlleleCountsOfMAP()[0]; + final AFCalcResult resultTracker = cfg.execute(); + final int actualAC = resultTracker.getAlleleCountsOfMLE()[0]; final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0]; final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1]; - final boolean expectNonRef = pRefWithPrior <= pHetWithPrior; + final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior)); - if ( expectNonRef ) - Assert.assertTrue(resultTracker.getNormalizedPosteriorOfAFGTZero() > 0.5); - else - Assert.assertTrue(resultTracker.getNormalizedPosteriorOfAFGTZero() < 0.5); + if ( nonRefPost < 0.1 ) + Assert.assertTrue(resultTracker.isPolymorphic(-1)); - final int expectedAC = expectNonRef ? 1 : 0; - Assert.assertEquals(actualAC, expectedAC, + final int expectedMLEAC = 1; // the MLE is independent of the prior + Assert.assertEquals(actualAC, expectedMLEAC, "actual AC with priors " + log10NonRefPrior + " not expected " - + expectedAC + " priors " + Utils.join(",", priors)); + + expectedMLEAC + " priors " + Utils.join(",", priors)); } } @@ -468,8 +455,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final double nonRefPrior = (1-refPrior) / 2; final double[] priors = MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior, nonRefPrior, nonRefPrior, nonRefPrior}); GetGLsTest cfg = new GetGLsTest(model, 2, Arrays.asList(AB, AC), priors, "pNonRef" + log10NonRefPrior); - final AFCalcResultTracker resultTracker = cfg.execute(); - final int actualAC_AB = resultTracker.getAlleleCountsOfMAP()[0]; + final AFCalcResult resultTracker = cfg.execute(); + final int actualAC_AB = resultTracker.getAlleleCountsOfMLE()[0]; final double pRefABWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0]; final double pHetABWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1]; @@ -480,7 +467,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final double nonRefPriorSecondAllele = Math.pow(nonRefPrior, 2); final double refPriorSecondAllele = 1 - nonRefPriorSecondAllele; - final int actualAC_AC = resultTracker.getAlleleCountsOfMAP()[1]; + final int actualAC_AC = resultTracker.getAlleleCountsOfMLE()[1]; final double pRefACWithPrior = AB.getLikelihoods().getAsVector()[0] + Math.log10(refPriorSecondAllele); final double pHetACWithPrior = AC.getLikelihoods().getAsVector()[3] + Math.log10(nonRefPriorSecondAllele); final int expectedAC_AC = pRefACWithPrior <= pHetACWithPrior ? 1 : 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 92e1c31f0..8f1473121 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalc; -import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResultTracker; +import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -363,7 +363,7 @@ public class UnifiedGenotyperEngine { generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext)); } - AFCalcResultTracker AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model)); + AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model)); // is the most likely frequency conformation AC=0 for all alternate alleles? boolean bestGuessIsRef = true; @@ -379,10 +379,14 @@ public class UnifiedGenotyperEngine { if ( indexOfAllele == -1 ) continue; - final int indexOfBestAC = AFresult.getAlleleCountsOfMAP()[indexOfAllele-1]; + // we are non-ref if the probability of being non-ref > the emit confidence. + // the emit confidence is phred-scaled, say 30 => 10^-3. + // the posterior AF > 0 is log10: -5 => 10^-5 + // we are non-ref if 10^-5 < 10^-3 => -5 < -3 + final boolean isNonRef = AFresult.isPolymorphic(UAC.STANDARD_CONFIDENCE_FOR_EMITTING / -10.0); // if the most likely AC is not 0, then this is a good alternate allele to use - if ( indexOfBestAC != 0 ) { + if ( ! isNonRef ) { myAlleles.add(alternateAllele); alleleCountsofMLE.add(AFresult.getAlleleCountsOfMLE()[indexOfAllele-1]); bestGuessIsRef = false; @@ -394,22 +398,10 @@ public class UnifiedGenotyperEngine { } } - // calculate p(f>0): - final double PoFEq0 = AFresult.getNormalizedPosteriorOfAFzero(); - final double PoFGT0 = AFresult.getNormalizedPosteriorOfAFGTZero(); - - double phredScaledConfidence; - if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { - phredScaledConfidence = QualityUtils.phredScaleErrorRate(PoFEq0); - if ( Double.isInfinite(phredScaledConfidence) ) - phredScaledConfidence = -10.0 * AFresult.getLog10PosteriorOfAFzero(); - } else { - phredScaledConfidence = QualityUtils.phredScaleErrorRate(PoFGT0); - if ( Double.isInfinite(phredScaledConfidence) ) { - final double sum = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); - phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); - } - } + final double PoFGT0 = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0()); + final double phredScaledConfidence = ! bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES + ? -10 * AFresult.getLog10PosteriorOfAFEq0() + : -10 * AFresult.getLog10PosteriorOfAFGT0(); // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef) ) { @@ -462,7 +454,7 @@ public class UnifiedGenotyperEngine { // the overall lod //double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0]; - double overallLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); + double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); List allAllelesToUse = builder.make().getAlleles(); @@ -471,16 +463,16 @@ public class UnifiedGenotyperEngine { VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); AFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model)); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); - double forwardLog10PofNull = AFresult.getLog10PosteriorOfAFzero(); - double forwardLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); + double forwardLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0(); + double forwardLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); // the reverse lod VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); AFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model)); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); - double reverseLog10PofNull = AFresult.getLog10PosteriorOfAFzero(); - double reverseLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero(); + double reverseLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0(); + double reverseLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java index 8245726b1..349c08f9c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java @@ -105,7 +105,7 @@ public abstract class AFCalc implements Cloneable { * @param log10AlleleFrequencyPriors a prior vector nSamples x 2 in length indicating the Pr(AF = i) * @return result (for programming convenience) */ - public AFCalcResultTracker getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { + public AFCalcResult getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null"); if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null"); if ( resultTracker == null ) throw new IllegalArgumentException("Results object cannot be null"); @@ -123,7 +123,7 @@ public abstract class AFCalc implements Cloneable { printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, resultTracker.getLog10PosteriorOfAFzero()); resultTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles()); - return resultTracker; + return resultTracker.toAFCalcResult(log10AlleleFrequencyPriors); } // --------------------------------------------------------------------------- @@ -155,9 +155,9 @@ public abstract class AFCalc implements Cloneable { * @param resultTracker (pre-allocated) object to store results */ // TODO -- add consistent requires among args - public abstract void computeLog10PNonRef(final VariantContext vc, - final double[] log10AlleleFrequencyPriors, - final AFCalcResultTracker resultTracker); + protected abstract void computeLog10PNonRef(final VariantContext vc, + final double[] log10AlleleFrequencyPriors, + final AFCalcResultTracker resultTracker); /** * Must be overridden by concrete subclasses diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index e80dbc3d7..bf15e2039 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -190,6 +190,22 @@ public class AFCalcResult { return log10PriorsOfAC[AF1p]; } + /** + * Are we sufficiently confidence in being non-ref that the site is considered polymorphic? + * + * We are non-ref if the probability of being non-ref > the emit confidence (often an argument). + * Suppose posterior AF > 0 is log10: -5 => 10^-5 + * And that log10minPNonRef is -3. + * We are considered polymorphic since 10^-5 < 10^-3 => -5 < -3 + * + * @param log10minPNonRef the log10 scaled min pr of being non-ref to be considered polymorphic + * + * @return true if there's enough confidence (relative to log10minPNonRef) to reject AF == 0 + */ + public boolean isPolymorphic(final double log10minPNonRef) { + return getLog10PosteriorOfAFGT0() < log10minPNonRef; + } + /** * Returns the log10 normalized posteriors given the log10 likelihoods and priors * @@ -221,11 +237,11 @@ public class AFCalcResult { if ( vector.length != expectedSize ) return false; for ( final double pr : vector ) { - if ( pr > 0 ) return false; // log10 prob. vector should be < 0 + if ( pr > 0.0 ) return false; // log10 prob. vector should be < 0 if ( Double.isInfinite(pr) || Double.isNaN(pr) ) return false; } - if ( shouldSumToOne || MathUtils.compareDoubles(MathUtils.sumLog10(vector), 0.0, 1e-2) != 0 ) + if ( shouldSumToOne && MathUtils.compareDoubles(MathUtils.sumLog10(vector), 1.0, 1e-2) != 0 ) return false; return true; // everything is good diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java index 97e69be92..d66d0b1d7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java @@ -41,7 +41,7 @@ import java.util.List; * * TODO -- WHAT IS THE CONTRACT ON MAP AC AND P NON REF? */ -public class AFCalcResultTracker { +class AFCalcResultTracker { // These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles protected double log10MLE; protected double log10MAP; @@ -157,6 +157,10 @@ public class AFCalcResultTracker { return log10LikelihoodOfAFzero; } + public double getLog10LikelihoodOfAFNotZero() { + return getLog10PosteriorsMatrixSumWithoutAFzero(); // TODO -- INCORRECT TEMPORARY CALCULATION + } + /** * TODO -- eric what is this supposed to return? my unit tests don't do what I think they should * @@ -215,6 +219,13 @@ public class AFCalcResultTracker { return AClimits; } + protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) { + final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size()); + final double[] log10Likelihoods = new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}; + final double[] log10Priors = new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}; + return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors); + } + // -------------------------------------------------------------------------------- // // Protected mutational methods only for use within the calculation models themselves diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java index 1b021aa77..81bfb6cf8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java @@ -4,6 +4,7 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -70,7 +71,7 @@ public class ConstrainedDiploidExactAFCalc extends DiploidExactAFCalc { @Requires({ "g != null", "maxACs != null", - "MathUtils.sum(maxACs) >= 0"}) + "goodMaxACs(maxACs)"}) private void updateMaxACs(final Genotype g, final int[] maxACs) { final int[] PLs = g.getLikelihoods().getAsPLs(); @@ -101,9 +102,13 @@ public class ConstrainedDiploidExactAFCalc extends DiploidExactAFCalc { @Requires({ "alleleI >= 0", "(alleleI - 1) < maxACs.length", - "MathUtils.sum(maxACs) >= 0"}) + "goodMaxACs(maxACs)"}) private void updateMaxACs(final int[] maxACs, final int alleleI) { if ( alleleI > 0 ) maxACs[alleleI-1]++; } + + private static boolean goodMaxACs(final int[] maxACs) { + return MathUtils.sum(maxACs) >= 0; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java index 0dac2653d..086c2a2d1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java @@ -45,9 +45,9 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { protected abstract StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker); @Override - public void computeLog10PNonRef(final VariantContext vc, - final double[] log10AlleleFrequencyPriors, - final AFCalcResultTracker resultTracker) { + protected void computeLog10PNonRef(final VariantContext vc, + final double[] log10AlleleFrequencyPriors, + final AFCalcResultTracker resultTracker) { final int numAlternateAlleles = vc.getNAlleles() - 1; final ArrayList genotypeLikelihoods = getGLs(vc.getGenotypes()); final int numSamples = genotypeLikelihoods.size()-1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java index b74923086..13858bcf1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java @@ -60,19 +60,20 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { public void computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors, final AFCalcResultTracker resultTracker) { - final List independentResultTrackers = computeLog10PNonRefForEachAllele(vc, log10AlleleFrequencyPriors); - combineIndependentPNonRefs(vc, independentResultTrackers, log10AlleleFrequencyPriors, resultTracker); + refModel.computeLog10PNonRef(vc, log10AlleleFrequencyPriors, resultTracker); +// final List independentResultTrackers = computeLog10PNonRefForEachAllele(vc, log10AlleleFrequencyPriors); +// combineIndependentPNonRefs(vc, independentResultTrackers, log10AlleleFrequencyPriors, resultTracker); } - protected List computeLog10PNonRefForEachAllele(final VariantContext vc, + protected List computeLog10PNonRefForEachAllele(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { final int nAltAlleles = vc.getNAlleles() - 1; - final List resultTrackers = new ArrayList(nAltAlleles); + final List resultTrackers = new ArrayList(nAltAlleles); for ( int altI = 0; altI < nAltAlleles; altI++ ) { final List biallelic = Arrays.asList(vc.getReference(), vc.getAlternateAllele(altI)); final VariantContext subvc = biallelicCombinedGLs(vc, biallelic, altI + 1); - final AFCalcResultTracker resultTracker = refModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors); + final AFCalcResult resultTracker = refModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors); resultTrackers.add(resultTracker); } @@ -141,34 +142,34 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { * @param resultTracker the destination for the combined result */ protected void combineIndependentPNonRefs(final VariantContext vc, - final List independentPNonRefs, + final List independentPNonRefs, final double[] log10AlleleFrequencyPriors, final AFCalcResultTracker resultTracker) { - final int nChrom = vc.getNSamples() * 2; - - resultTracker.reset(); - - // both the likelihood and the posterior of AF=0 are the same for all alleles - // TODO -- check and ensure this is true - resultTracker.setLog10LikelihoodOfAFzero(independentPNonRefs.get(0).getLog10LikelihoodOfAFzero()); - resultTracker.setLog10PosteriorOfAFzero(independentPNonRefs.get(0).getLog10PosteriorOfAFzero()); - resultTracker.log10PosteriorMatrixSum = 0.0; - - int altI = 0; - for ( final AFCalcResultTracker independentPNonRef : independentPNonRefs ) { - resultTracker.log10MLE += independentPNonRef.getLog10MLE(); - - // TODO -- technically double counting some posterior mass - resultTracker.log10MAP += independentPNonRef.getLog10MAP(); - - // TODO -- technically double counting some posterior mass - resultTracker.log10PosteriorMatrixSum += independentPNonRef.getLog10PosteriorsMatrixSumWithoutAFzero(); - - resultTracker.getAlleleCountsOfMAP()[altI] = independentPNonRef.getAlleleCountsOfMAP()[0]; - resultTracker.getAlleleCountsOfMLE()[altI] = independentPNonRef.getAlleleCountsOfMLE()[0]; - - resultTracker.nEvaluations += independentPNonRef.nEvaluations; - altI++; - } +// final int nChrom = vc.getNSamples() * 2; +// +// resultTracker.reset(); +// +// // both the likelihood and the posterior of AF=0 are the same for all alleles +// // TODO -- check and ensure this is true +// resultTracker.setLog10LikelihoodOfAFzero(independentPNonRefs.get(0).getLog10LikelihoodOfAFzero()); +// resultTracker.setLog10PosteriorOfAFzero(independentPNonRefs.get(0).getLog10PosteriorOfAFzero()); +// resultTracker.log10PosteriorMatrixSum = 0.0; +// +// int altI = 0; +// for ( final AFCalcResult independentPNonRef : independentPNonRefs ) { +// resultTracker.log10MLE += independentPNonRef.getLog10MLE(); +// +// // TODO -- technically double counting some posterior mass +// resultTracker.log10MAP += independentPNonRef.getLog10MAP(); +// +// // TODO -- technically double counting some posterior mass +// resultTracker.log10PosteriorMatrixSum += independentPNonRef.getLog10PosteriorsMatrixSumWithoutAFzero(); +// +// resultTracker.getAlleleCountsOfMAP()[altI] = independentPNonRef.getAlleleCountsOfMAP()[0]; +// resultTracker.getAlleleCountsOfMLE()[altI] = independentPNonRef.getAlleleCountsOfMLE()[0]; +// +// resultTracker.nEvaluations += independentPNonRef.nEvaluations; +// altI++; +// } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java index 006c303dc..f7f3e2a7a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java @@ -23,7 +23,7 @@ */ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; -import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResultTracker; +import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.DiploidExactAFCalc; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ReferenceDiploidExactAFCalc; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -54,10 +54,9 @@ public class GLBasedSampleSelector extends SampleSelector { flatPriors = new double[1+2*samples.size()]; AFCalculator = new ReferenceDiploidExactAFCalc(samples.size(), 4); } - AFCalcResultTracker resultTracker = new AFCalcResultTracker(vc.getAlternateAlleles().size()); - AFCalculator.computeLog10PNonRef(subContext, flatPriors, resultTracker); + final AFCalcResult result = AFCalculator.getLog10PNonRef(subContext, flatPriors); // do we want to let this qual go up or down? - if ( resultTracker.getLog10PosteriorOfAFzero() < referenceLikelihood ) { + if ( result.getLog10LikelihoodOfAFEq0() < referenceLikelihood ) { return true; }