Fix another caching issue with the PairHMM

The Problem
----------
Some read x haplotype pairs were getting very low likelihood when caching is on. Turning it off seemed to give the right result.

Solution
--------
The HaplotypeCaller only initializes the PairHMM once and then feed it with a set of reads and haplotypes. The PairHMM always caches the matrix when the previous haplotype length is the same as the current one. This is not true when the read has changed. This commit adds another condition to zero the haplotype start index when the read changes.

Summarized Changes
------------------
   * Added the recacheReadValue check to flush the matrix (hapStartIndex = 0)
   * Updated related MD5's

Bamboo link: http://gsabamboo.broadinstitute.org/browse/GSAUNSTABLE-PARALLEL9
This commit is contained in:
Mauricio Carneiro 2013-04-12 10:42:03 -04:00
parent 35293cde49
commit 403f9de122
3 changed files with 11 additions and 11 deletions

View File

@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "490ecf6619740c01c81a463392ef23cf");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "7a035437f145b714cb844666b0736925");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"125e93deeb3b390a14d9b777aa2a220f");
"aacfcc50c9aa5cfbec8ae8026d937ecd");
}
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"6957fd0e8a5bc66d2572a6ca8626fa7a");
"eae75a3dc5c2e0fbdf016dbbafe425e2");
}
}

View File

@ -80,12 +80,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "6fa37c449a800bcd59069be03ad2fff2");
HCTest(CEUTRIO_BAM, "", "c8598545d1c76b470a7784e6b5c2ad4a");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "6140447b34bd1d08b3ed4d473d2c2f23");
HCTest(NA12878_BAM, "", "0b2ca4482e92b9606be904cc25ba0988");
}
@Test(enabled = false) // can't annotate the rsID's yet
@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"cbd119f3d37a9af0b3539c13b8053bd9");
"d00a604abe02586f803b1bb9d63af0f7");
}
@Test
@ -112,7 +112,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "9eeeada2f7145adfe08f538aad704982");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "1eab0eb7a184d981b021a249c3bd0401");
}
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
@ -149,7 +149,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerNearbySmallIntervals() {
HCTestNearbySmallIntervals(NA12878_BAM, "", "16ecd2f282bcb10dc32e7f3fe714a000");
HCTestNearbySmallIntervals(NA12878_BAM, "", "6ab938dede6838c983f84225d4103852");
}
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
@ -166,7 +166,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6fd839641ee038048626fbd1154f173"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("e8466846ca420bcbcd52b97f7a661aa3"));
executeTest("HCTestStructuralIndels: ", spec);
}
@ -188,7 +188,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("020b1a4feb82f050894f6066dc07cc4a"));
Arrays.asList("8a62597f2c005f373efbe398ab51a2f1"));
executeTest("HC calling on a ReducedRead BAM", spec);
}

View File

@ -134,7 +134,7 @@ public abstract class PairHMM {
paddedReadLength = readBases.length + 1;
paddedHaplotypeLength = haplotypeBases.length + 1;
final int hapStartIndex = (previousHaplotypeBases == null || haplotypeBases.length != previousHaplotypeBases.length ) ? 0 : findFirstPositionWhereHaplotypesDiffer(haplotypeBases, previousHaplotypeBases);
final int hapStartIndex = (previousHaplotypeBases == null || haplotypeBases.length != previousHaplotypeBases.length || recacheReadValues) ? 0 : findFirstPositionWhereHaplotypesDiffer(haplotypeBases, previousHaplotypeBases);
double result = subComputeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP, hapStartIndex, recacheReadValues);