From 74a7674d706bae94f3f0cac175ec8f995b70fdc1 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 8 Jul 2014 11:07:19 -0400 Subject: [PATCH] Improvements to genotyping accuracy. -- Global mismapping penalty was only applied to the reference haplotype. This led to problems with overlapping events, mostly STR haplotypes. Now the penalty is applied to every haplotype. -- We subset the reads down to only those which overlap the event (after assembly based realignment) for likelihood calculations. --- .../haplotypecaller/HaplotypeCaller.java | 4 +-- .../HaplotypeCallerGenotypingEngine.java | 36 +++++++++++++------ .../PairHMMLikelihoodCalculationEngine.java | 22 +++++------- ...lexAndSymbolicVariantsIntegrationTest.java | 8 ++--- .../HaplotypeCallerGVCFIntegrationTest.java | 16 ++++----- .../HaplotypeCallerIntegrationTest.java | 32 ++++++++--------- ...aplotypeCallerParallelIntegrationTest.java | 2 +- 7 files changed, 66 insertions(+), 54 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index 25edd49ef..994ba209d 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -385,8 +385,8 @@ public class HaplotypeCaller extends ActiveRegionWalker, In * from another location in the genome. Suppose a read has many mismatches from the reference, say like 5, but * has a very high mapping quality of 60. Without this parameter, the read would contribute 5 * Q30 evidence * in favor of its 5 mismatch haplotype compared to reference, potentially enough to make a call off that single - * read for all of these events. With this parameter set to Q30, though, the maximum evidence against the reference - * that this (and any) read could contribute against reference is Q30. + * read for all of these events. With this parameter set to Q30, though, the maximum evidence against any haplotype + * that this (and any) read could contribute is Q30. * * Set this term to any negative number to turn off the global mapping rate */ diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 5818cc1e2..90dda170e 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -74,6 +74,7 @@ import java.util.*; public class HaplotypeCallerGenotypingEngine extends GenotypingEngine { private final static List NO_CALL = Collections.singletonList(Allele.NO_CALL); + private final static int ALLELE_EXTENSION = 2; private MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger; @@ -170,7 +171,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine alternative allele to the result variation contexts. * * @return A CalledHaplotypes object containing a list of VC's with genotyped events and called haplotypes * @@ -249,7 +250,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine tests = new ArrayList<>(); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, "31a7bb9fb5bc512120b88c5ecdd81139"}); + tests.add(new Object[]{nct, "a2718251ffae9db885b7f74b33dd5b57"}); } return tests.toArray(new Object[][]{});