From f838815343d971d3a9ff2eda0df931aa69654c52 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 23 Oct 2012 06:47:53 -0400 Subject: [PATCH] Updating MD5s for confidence ref site estimation in IndependentAllelesDiploidExactAFCalc -- Included logic to only add priors for alleles with sufficient evidence to be called polymorphic. If no alleles are poly make sure to add priors of first allele --- .../afcalc/IndependentAllelesDiploidExactAFCalc.java | 8 ++++++++ .../genotyper/UnifiedGenotyperIntegrationTest.java | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java index 857b7e59e..d0b801a20 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java @@ -329,6 +329,7 @@ import java.util.*; double log10PosteriorOfACEq0Sum = 0.0; double log10PosteriorOfACGt0Sum = 0.0; + boolean anyPoly = false; for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) { final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1); final int altI = vc.getAlleles().indexOf(altAllele) - 1; @@ -338,6 +339,7 @@ import java.util.*; // the AF > 0 case requires us to store the normalized likelihood for later summation if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR ) { + anyPoly = true; log10PosteriorOfACEq0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0(); log10PriorsOfAC[0] += sortedResultWithThetaNPriors.getLog10PriorOfAFEq0(); log10PriorsOfAC[1] += sortedResultWithThetaNPriors.getLog10PriorOfAFGT0(); @@ -352,6 +354,12 @@ import java.util.*; nEvaluations += sortedResultWithThetaNPriors.nEvaluations; } + // If no alleles were polymorphic, make sure we have the proper priors (the defaults) for likelihood calculation + if ( ! anyPoly ) { + log10PriorsOfAC[0] = sortedResultsWithThetaNPriors.get(0).getLog10PriorOfAFEq0(); + log10PriorsOfAC[1] = sortedResultsWithThetaNPriors.get(0).getLog10PriorOfAFGT0(); + } + // In principle, if B_p = x and C_p = y are the probabilities of being poly for alleles B and C, // the probability of being poly is (1 - B_p) * (1 - C_p) = (1 - x) * (1 - y). We want to estimate confidently // log10((1 - x) * (1 - y)) which is log10(1 - x) + log10(1 - y). This sum is log10PosteriorOfACEq0 diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index a1701e3e5..dee54b9f8 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -192,12 +192,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameterAllConfident() { - testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "8800c58715c2bb434b69e1873cb77de6"); + testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "f9ea04d96eeef29e71d37e60518c2579"); } @Test public void testOutputParameterAllSites() { - testOutputParameters("--output_mode EMIT_ALL_SITES", "639df9f4c029792ac2e46069efc82b20"); + testOutputParameters("--output_mode EMIT_ALL_SITES", "41c046d38ea328421df924e37e017645"); } private void testOutputParameters(final String args, final String md5) {