From 0e52b8ba5ac974a3d3f67b42e5ec287cbad92cd5 Mon Sep 17 00:00:00 2001 From: Valentin Ruano-Rubio Date: Mon, 22 Sep 2014 15:15:38 -0400 Subject: [PATCH] Fixed MLEAC and QUAL inaccuracy in GeneralPloidyExactAFCalculator. The problem whas that the MLE table calculation aborted "unlikely" genotype combinations to aggresively. This also uncovered another bug where GeneralPloidyExactAFCalculation makes a slightly different use of StateTracker as compared to DiploidExactAFCalculation. We have changed StateTracker generalizing it to be able to work with both using code behaviors. Story: ----- * https://www.pivotaltracker.com/story/show/78920568 Changes: ------- * Fixes in GeneralPloidyExactAFCalculator. * Needed changes in StateTracker API and its consequences in DiploidExactAFCalculation. * Updated affected integrated tests' MD5s after fixing the GeneralPloidyExactAF. --- .../afcalc/DiploidExactAFCalculator.java | 2 +- .../afcalc/GeneralPloidyExactAFCalculator.java | 4 +--- .../walkers/genotyper/afcalc/StateTracker.java | 15 +++++++++------ ...notyperGeneralPloidySuite1IntegrationTest.java | 4 ++-- ...notyperGeneralPloidySuite2IntegrationTest.java | 2 +- .../HaplotypeCallerGVCFIntegrationTest.java | 6 +++--- .../HaplotypeCallerIntegrationTest.java | 6 +++--- 7 files changed, 20 insertions(+), 19 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalculator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalculator.java index dc07fc2ae..e106b4902 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalculator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalculator.java @@ -152,7 +152,7 @@ public abstract class DiploidExactAFCalculator extends ExactAFCalculator { final double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1]; // can we abort early because the log10Likelihoods are so small? - if ( stateTracker.abort(log10LofK, set.getACcounts(), true) ) { + if ( stateTracker.abort(log10LofK, set.getACcounts(), true, false) ) { //if ( DEBUG ) // System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); return log10LofK; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java index 316f6212d..d19b93f6b 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java @@ -266,10 +266,8 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator { if (!Double.isInfinite(log10LofK)) newPool.add(set); - // TODO -- change false to true this correct line when the implementation of this model is optimized (it's too slow now to handle this fix) - if ( stateTracker.abort(log10LofK, set.getACcounts(), false) ) { + if ( stateTracker.abort(log10LofK, set.getACcounts(), true, true) ) return log10LofK; - } // iterate over higher frequencies if possible // by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count. diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/StateTracker.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/StateTracker.java index 34df64d31..a128bdd23 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/StateTracker.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/StateTracker.java @@ -48,8 +48,8 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.broadinstitute.gatk.utils.MathUtils; import htsjdk.variant.variantcontext.Allele; +import org.broadinstitute.gatk.utils.MathUtils; import java.util.Arrays; import java.util.HashMap; @@ -148,11 +148,13 @@ final class StateTracker { /** * @return true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set */ - private boolean isLowerAC(final ExactACcounts otherACs) { + private boolean isLowerAC(final ExactACcounts otherACs, final boolean otherACsContainsReference) { final int[] otherACcounts = otherACs.getCounts(); - for ( int i = 0; i < otherACcounts.length; i++ ) { - if ( alleleCountsOfMLE[i] > otherACcounts[i] ) + final int firstAltAlleleIndex = otherACsContainsReference ? 1 : 0; + + for ( int i = firstAltAlleleIndex; i < otherACcounts.length; i++ ) { + if ( alleleCountsOfMLE[i - firstAltAlleleIndex] > otherACcounts[i] ) return false; } @@ -164,10 +166,11 @@ final class StateTracker { * * @param log10LofK the log10LofK of these ACs * @param ACs the ACs of this state + * @param exactACcountsContainReference whether the {@code ACs} contains the reference allele count (index == 0) beside all other alternative alleles. * @return return true if there's no reason to continue with subpaths of AC, or false otherwise */ - protected boolean abort( final double log10LofK, final ExactACcounts ACs, final boolean enforceLowerACs ) { - return tooLowLikelihood(log10LofK) && (!enforceLowerACs || isLowerAC(ACs)); + protected boolean abort(final double log10LofK, final ExactACcounts ACs, final boolean enforceLowerACs, final boolean exactACcountsContainReference) { + return tooLowLikelihood(log10LofK) && (!enforceLowerACs || isLowerAC(ACs,exactACcountsContainReference)); } @Ensures("result != null") diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 231e1512c..41c36d48c 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -69,7 +69,7 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testBOTH_GGA_Pools() { - executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "c731cd0ddb0fa153bffa7bc19586bfa6"); + executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "40d60700aae1a316620fd96064080dcd"); } @Test(enabled = true) @@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "54d718401c413fdbc1f801061bf8e7d1"); + executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "d5047d65422426cf3ddba07768d8f6af"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 54282b05c..1dbe1e8d1 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","1426e311501a7bd96ecb75ed839f7c64"); + executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","1e22151ac0c61860b06d1e5406dd09a6"); } @Test(enabled = true) diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 607910acb..060ff9cca 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -107,9 +107,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; // this functionality can be adapted to provide input data for whatever you might want in your data - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "df5b803750e604bedd6d713c59f46c33"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "8e06e6502d958cab79b9275460c06389"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "f32fb9a2485c583bc22ff4f09217318c"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "7db552463cf779644335bfa09fcddf82"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "90600d209cf778fdfca6a844b8ee4acb"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "db3c99786c5726b20dbfe47e31e50d60"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "eac793500fbc7de46259000dbbcdd27d"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "4a431e0e387de3f791318f67d8855b0b"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "1058d3fe6553e07f002f994759c9647d"}); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index c0b9140b2..c0b7ff730 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleHaploid() { HCTest(CEUTRIO_BAM, - "-ploidy 1", "c93650f842aa4dfa4ef2b5f1b908a576"); + "-ploidy 1", "5046d3f77a56fcc4ccc8a216670effac"); } @Test @@ -131,7 +131,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerGraphBasedMultiSampleHaploid() { - HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "0962d7723ef1e5e20fb42e869f12e1da"); + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "1425b46f3cd50040a1272c8775672fc0"); } @Test @@ -160,7 +160,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGATetraploid() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "874f5fc9d7eed8894f72b8d2587de4b6"); + "38ae7a6d082fc02f865f7a69a9c871d9"); } @Test