From f176c854c684dd2412b59c0767d344e62918d0be Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 13 Jun 2013 13:27:06 -0400 Subject: [PATCH] Swapping in logless Pair HMM for default usage with UG: -- Changed default HMM model. -- Removed check. -- Changed md5's: PL's in the high 100s change by a point or two due to new implementation. -- Resulting performance improvement is about 30 to 50% less runtime when using -glm INDEL. --- .../IndelGenotypeLikelihoodsCalculationModel.java | 12 +++++++----- .../walkers/genotyper/UnifiedArgumentCollection.java | 2 +- .../gatk/walkers/indels/PairHMMIndelErrorModel.java | 10 +++++----- ...dGenotyperGeneralPloidySuite1IntegrationTest.java | 2 +- ...dGenotyperGeneralPloidySuite2IntegrationTest.java | 2 +- .../UnifiedGenotyperIndelCallingIntegrationTest.java | 2 +- ...UnifiedGenotyperNormalCallingIntegrationTest.java | 2 +- 7 files changed, 17 insertions(+), 15 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index c6e9ea379..0f3f7739d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -76,7 +76,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private List alleleList = new ArrayList(); - protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { + protected IndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, + final Logger logger) { super(UAC, logger); pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY, UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.pairHMM); @@ -85,10 +86,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES; } - protected static List computeConsensusAlleles(ReferenceContext ref, - Map contexts, - AlignmentContextUtils.ReadOrientation contextType, - GenomeLocParser locParser, UnifiedArgumentCollection UAC) { + protected static List computeConsensusAlleles(final ReferenceContext ref, + final Map contexts, + final AlignmentContextUtils.ReadOrientation contextType, + final GenomeLocParser locParser, + final UnifiedArgumentCollection UAC) { ConsensusAlleleCounter counter = new ConsensusAlleleCounter(locParser, true, UAC.MIN_INDEL_COUNT_FOR_GENOTYPING, UAC.MIN_INDEL_FRACTION_PER_SAMPLE); return counter.computeConsensusAlleles(ref, contexts, contextType); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index b96b5733f..f156468cc 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -85,7 +85,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection * The PairHMM implementation to use for -glm INDEL genotype likelihood calculations. The various implementations balance a tradeoff of accuracy and runtime. */ @Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for -glm INDEL genotype likelihood calculations", required = false) - public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.ORIGINAL; + public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING; /** * The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 7b444c4bd..c77557da6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -54,6 +54,7 @@ import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.pairhmm.Log10PairHMM; +import org.broadinstitute.sting.utils.pairhmm.LoglessPairHMM; import org.broadinstitute.sting.utils.pairhmm.PairHMM; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -116,12 +117,11 @@ public class PairHMMIndelErrorModel { case ORIGINAL: pairHMM = new Log10PairHMM(false); break; - case LOGLESS_CACHING: //TODO: still not tested so please do not use yet - //pairHMM = new LoglessCachingPairHMM(); //TODO - add it back when the figure out how to use the protected LoglessCachingPairHMM class - throw new UserException.BadArgumentValue("pairHMM"," this option (LOGLESS_CACHING in UG) is still under development"); - //break; + case LOGLESS_CACHING: + pairHMM = new LoglessPairHMM(); + break; default: - throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL, EXACT or LOGLESS_CACHING (the third option is still under development)."); + throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL, EXACT or LOGLESS_CACHING."); } // fill gap penalty table, affine naive model: diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 2d36a27d1..aaa3b1284 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "dd28b14d732852bffbba4f22f7697227"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "98f4d78aad745c6e853b81b2e4e207b4"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 117e54ef8..0eb89adc7 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","369ad0ff28bb9ce7974dc2c7343c8737"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","25902d7a6a0c00c60c2d5845dfaa1a4c"); } @Test(enabled = true) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 49d429c0d..65a569cdc 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("587bf6bad368ed81189747a84f913ab2")); + Arrays.asList("5596851d19582dd1af3901b7d703ae0a")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 439039f9b..1bfbbac17 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("70a21812d4dd6b72c44f60c74d508d5b")); + Arrays.asList("06c85e8eab08b67244cf38fc785aca22")); executeTest("test Multiple SNP alleles", spec); }