Two sensitivity / specificity improvements to the haplotype caller

-- Reduce the min read length to 10 bp in the filterNonPassingReads in the HC.  Now that we filter out reads before genotyping, we have to be more tolerant of shorter, but informative, reads, in order to avoid a few FNs in shallow read data
-- Reduce the min usable base qual to 8 by default in the HC.  In regions with low coverage we sometimes throw out our only informative kmers because we required a contiguous run of bases with >= 16 QUAL.  This is a bit too aggressive of a requirement, so I lowered it to 8.
-- Together with the previous commit this results in a significant improvement in the sensitivity and specificity of the caller

 NA12878 MEM chr20:10-11
 Name    VariantType  TRUE_POSITIVE  FALSE_POSITIVE  FALSE_NEGATIVE  TRUE_NEGATIVE  CALLED_NOT_IN_DB_AT_ALL
 branch  SNPS                  1216               0               2            194                        0
 branch  INDELS                 312               2              13             71                        7
 master  SNPS                  1214               0               4            194                        1
 master  INDELS                 309               2              16             71                       10

-- Update MD5s in the integration tests to reflect these two new changes
This commit is contained in:
Mark DePristo 2013-04-17 10:30:37 -04:00
parent b27706859a
commit f0e64850da
4 changed files with 13 additions and 13 deletions

View File

@ -845,7 +845,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
for( final GATKSAMRecord rec : activeRegion.getReads() ) {
if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
if( rec.getReadLength() < 10 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
readsToRemove.add(rec);
}
}

View File

@ -60,7 +60,7 @@ import java.util.List;
* Date: Mar 14, 2011
*/
public abstract class LocalAssemblyEngine {
public static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = (byte) 16;
public static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = (byte) 8;
protected PrintStream graphWriter = null;
protected byte minBaseQualityToUseInAssembly = DEFAULT_MIN_BASE_QUALITY_TO_USE;

View File

@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "c0379d32c8c743d84c6da5956d67c004");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "57e13aed8dc483514ac15fb757aee1d1");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"2fb56d241baca3658af5811e680bde4c");
"d89c8a32e9c54f66e0331382cac86b27");
}
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"bd7d24e87776f939b36742c1fd33b25c");
"89a28d4290523dd55117bc4e44212d73");
}
}

View File

@ -80,12 +80,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "943302eb9b9798d1ffeb9136612cbc85");
HCTest(CEUTRIO_BAM, "", "aeab5f0d40852e6332b96481981a0e46");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "3199bebe4e34b5df7558f74b05fb3a4e");
HCTest(NA12878_BAM, "", "c1530f2158cb41d50e830ca5be0f97a0");
}
@Test(enabled = false) // can't annotate the rsID's yet
@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"aef51f79d58634e4b35a1a98caba329c");
"3e2e4a62c6c60d432fa1ca32aee2635b");
}
@Test
@ -112,7 +112,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "5ac0d4b30a0c9a97a71ad014e63f11cf");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "bac6f98e910290722df28da44b41f06f");
}
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
@ -149,7 +149,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerNearbySmallIntervals() {
HCTestNearbySmallIntervals(NA12878_BAM, "", "a7e3b05fdc9866965e3ab71dbbd288ff");
HCTestNearbySmallIntervals(NA12878_BAM, "", "65e7b1b72a2411d6360138049914aa3a");
}
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
@ -166,7 +166,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("8252f956e94cb8538b18210e9350f0e3"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ab518ae32535714604a4ffc71fe42511"));
executeTest("HCTestStructuralIndels: ", spec);
}
@ -188,7 +188,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("7d4da215e86658e8da70fa0ade7f3eca"));
Arrays.asList("3c87eb93ffe3a0166aca753050b981e1"));
executeTest("HC calling on a ReducedRead BAM", spec);
}
@ -196,7 +196,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testReducedBamWithReadsNotFullySpanningDeletion() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
Arrays.asList("b0f0467dd4bfc4cdc85fff85ffa6f0c1"));
Arrays.asList("8adfa8a27a312760dab50787da595c57"));
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
}
}