From f0e64850da5bb79013245d28299aeee638c33502 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Apr 2013 10:30:37 -0400 Subject: [PATCH] Two sensitivity / specificity improvements to the haplotype caller -- Reduce the min read length to 10 bp in the filterNonPassingReads in the HC. Now that we filter out reads before genotyping, we have to be more tolerant of shorter, but informative, reads, in order to avoid a few FNs in shallow read data -- Reduce the min usable base qual to 8 by default in the HC. In regions with low coverage we sometimes throw out our only informative kmers because we required a contiguous run of bases with >= 16 QUAL. This is a bit too aggressive of a requirement, so I lowered it to 8. -- Together with the previous commit this results in a significant improvement in the sensitivity and specificity of the caller NA12878 MEM chr20:10-11 Name VariantType TRUE_POSITIVE FALSE_POSITIVE FALSE_NEGATIVE TRUE_NEGATIVE CALLED_NOT_IN_DB_AT_ALL branch SNPS 1216 0 2 194 0 branch INDELS 312 2 13 71 7 master SNPS 1214 0 4 194 1 master INDELS 309 2 16 71 10 -- Update MD5s in the integration tests to reflect these two new changes --- .../walkers/haplotypecaller/HaplotypeCaller.java | 2 +- .../haplotypecaller/LocalAssemblyEngine.java | 2 +- ...omplexAndSymbolicVariantsIntegrationTest.java | 6 +++--- .../HaplotypeCallerIntegrationTest.java | 16 ++++++++-------- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 2ecc152df..a17e25f41 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -845,7 +845,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem private List filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { final List readsToRemove = new ArrayList(); for( final GATKSAMRecord rec : activeRegion.getReads() ) { - if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { + if( rec.getReadLength() < 10 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { readsToRemove.add(rec); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java index 23cbc3265..4c0483ad6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -60,7 +60,7 @@ import java.util.List; * Date: Mar 14, 2011 */ public abstract class LocalAssemblyEngine { - public static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = (byte) 16; + public static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = (byte) 8; protected PrintStream graphWriter = null; protected byte minBaseQualityToUseInAssembly = DEFAULT_MIN_BASE_QUALITY_TO_USE; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index f8580f271..57d8aa92c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "c0379d32c8c743d84c6da5956d67c004"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "57e13aed8dc483514ac15fb757aee1d1"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "2fb56d241baca3658af5811e680bde4c"); + "d89c8a32e9c54f66e0331382cac86b27"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "bd7d24e87776f939b36742c1fd33b25c"); + "89a28d4290523dd55117bc4e44212d73"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index a77304e57..4e291cb59 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -80,12 +80,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "943302eb9b9798d1ffeb9136612cbc85"); + HCTest(CEUTRIO_BAM, "", "aeab5f0d40852e6332b96481981a0e46"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "3199bebe4e34b5df7558f74b05fb3a4e"); + HCTest(NA12878_BAM, "", "c1530f2158cb41d50e830ca5be0f97a0"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "aef51f79d58634e4b35a1a98caba329c"); + "3e2e4a62c6c60d432fa1ca32aee2635b"); } @Test @@ -112,7 +112,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "5ac0d4b30a0c9a97a71ad014e63f11cf"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "bac6f98e910290722df28da44b41f06f"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -149,7 +149,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerNearbySmallIntervals() { - HCTestNearbySmallIntervals(NA12878_BAM, "", "a7e3b05fdc9866965e3ab71dbbd288ff"); + HCTestNearbySmallIntervals(NA12878_BAM, "", "65e7b1b72a2411d6360138049914aa3a"); } // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -166,7 +166,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("8252f956e94cb8538b18210e9350f0e3")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ab518ae32535714604a4ffc71fe42511")); executeTest("HCTestStructuralIndels: ", spec); } @@ -188,7 +188,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("7d4da215e86658e8da70fa0ade7f3eca")); + Arrays.asList("3c87eb93ffe3a0166aca753050b981e1")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -196,7 +196,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("b0f0467dd4bfc4cdc85fff85ffa6f0c1")); + Arrays.asList("8adfa8a27a312760dab50787da595c57")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } }