From 403f9de12245696d52f9e687321dcb8f9f83b73b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 12 Apr 2013 10:42:03 -0400 Subject: [PATCH] Fix another caching issue with the PairHMM The Problem ---------- Some read x haplotype pairs were getting very low likelihood when caching is on. Turning it off seemed to give the right result. Solution -------- The HaplotypeCaller only initializes the PairHMM once and then feed it with a set of reads and haplotypes. The PairHMM always caches the matrix when the previous haplotype length is the same as the current one. This is not true when the read has changed. This commit adds another condition to zero the haplotype start index when the read changes. Summarized Changes ------------------ * Added the recacheReadValue check to flush the matrix (hapStartIndex = 0) * Updated related MD5's Bamboo link: http://gsabamboo.broadinstitute.org/browse/GSAUNSTABLE-PARALLEL9 --- ...rComplexAndSymbolicVariantsIntegrationTest.java | 6 +++--- .../HaplotypeCallerIntegrationTest.java | 14 +++++++------- .../sting/utils/pairhmm/PairHMM.java | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 6d85421c4..292760e89 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "490ecf6619740c01c81a463392ef23cf"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "7a035437f145b714cb844666b0736925"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "125e93deeb3b390a14d9b777aa2a220f"); + "aacfcc50c9aa5cfbec8ae8026d937ecd"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "6957fd0e8a5bc66d2572a6ca8626fa7a"); + "eae75a3dc5c2e0fbdf016dbbafe425e2"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 573cc83fd..9cd225df3 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -80,12 +80,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "6fa37c449a800bcd59069be03ad2fff2"); + HCTest(CEUTRIO_BAM, "", "c8598545d1c76b470a7784e6b5c2ad4a"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "6140447b34bd1d08b3ed4d473d2c2f23"); + HCTest(NA12878_BAM, "", "0b2ca4482e92b9606be904cc25ba0988"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "cbd119f3d37a9af0b3539c13b8053bd9"); + "d00a604abe02586f803b1bb9d63af0f7"); } @Test @@ -112,7 +112,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "9eeeada2f7145adfe08f538aad704982"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "1eab0eb7a184d981b021a249c3bd0401"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -149,7 +149,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerNearbySmallIntervals() { - HCTestNearbySmallIntervals(NA12878_BAM, "", "16ecd2f282bcb10dc32e7f3fe714a000"); + HCTestNearbySmallIntervals(NA12878_BAM, "", "6ab938dede6838c983f84225d4103852"); } // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -166,7 +166,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6fd839641ee038048626fbd1154f173")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("e8466846ca420bcbcd52b97f7a661aa3")); executeTest("HCTestStructuralIndels: ", spec); } @@ -188,7 +188,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("020b1a4feb82f050894f6066dc07cc4a")); + Arrays.asList("8a62597f2c005f373efbe398ab51a2f1")); executeTest("HC calling on a ReducedRead BAM", spec); } diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java index 33cd191f6..6b57a1354 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java @@ -134,7 +134,7 @@ public abstract class PairHMM { paddedReadLength = readBases.length + 1; paddedHaplotypeLength = haplotypeBases.length + 1; - final int hapStartIndex = (previousHaplotypeBases == null || haplotypeBases.length != previousHaplotypeBases.length ) ? 0 : findFirstPositionWhereHaplotypesDiffer(haplotypeBases, previousHaplotypeBases); + final int hapStartIndex = (previousHaplotypeBases == null || haplotypeBases.length != previousHaplotypeBases.length || recacheReadValues) ? 0 : findFirstPositionWhereHaplotypesDiffer(haplotypeBases, previousHaplotypeBases); double result = subComputeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP, hapStartIndex, recacheReadValues);