Two sensitivity / specificity improvements to the haplotype caller
-- Reduce the min read length to 10 bp in the filterNonPassingReads in the HC. Now that we filter out reads before genotyping, we have to be more tolerant of shorter, but informative, reads, in order to avoid a few FNs in shallow read data -- Reduce the min usable base qual to 8 by default in the HC. In regions with low coverage we sometimes throw out our only informative kmers because we required a contiguous run of bases with >= 16 QUAL. This is a bit too aggressive of a requirement, so I lowered it to 8. -- Together with the previous commit this results in a significant improvement in the sensitivity and specificity of the caller NA12878 MEM chr20:10-11 Name VariantType TRUE_POSITIVE FALSE_POSITIVE FALSE_NEGATIVE TRUE_NEGATIVE CALLED_NOT_IN_DB_AT_ALL branch SNPS 1216 0 2 194 0 branch INDELS 312 2 13 71 7 master SNPS 1214 0 4 194 1 master INDELS 309 2 16 71 10 -- Update MD5s in the integration tests to reflect these two new changes
This commit is contained in:
parent
b27706859a
commit
f0e64850da
|
|
@ -845,7 +845,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
||||
final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
|
||||
for( final GATKSAMRecord rec : activeRegion.getReads() ) {
|
||||
if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
|
||||
if( rec.getReadLength() < 10 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
|
||||
readsToRemove.add(rec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ import java.util.List;
|
|||
* Date: Mar 14, 2011
|
||||
*/
|
||||
public abstract class LocalAssemblyEngine {
|
||||
public static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = (byte) 16;
|
||||
public static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = (byte) 8;
|
||||
|
||||
protected PrintStream graphWriter = null;
|
||||
protected byte minBaseQualityToUseInAssembly = DEFAULT_MIN_BASE_QUALITY_TO_USE;
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex1() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "c0379d32c8c743d84c6da5956d67c004");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "57e13aed8dc483514ac15fb757aee1d1");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"2fb56d241baca3658af5811e680bde4c");
|
||||
"d89c8a32e9c54f66e0331382cac86b27");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"bd7d24e87776f939b36742c1fd33b25c");
|
||||
"89a28d4290523dd55117bc4e44212d73");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -80,12 +80,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "943302eb9b9798d1ffeb9136612cbc85");
|
||||
HCTest(CEUTRIO_BAM, "", "aeab5f0d40852e6332b96481981a0e46");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "3199bebe4e34b5df7558f74b05fb3a4e");
|
||||
HCTest(NA12878_BAM, "", "c1530f2158cb41d50e830ca5be0f97a0");
|
||||
}
|
||||
|
||||
@Test(enabled = false) // can't annotate the rsID's yet
|
||||
|
|
@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"aef51f79d58634e4b35a1a98caba329c");
|
||||
"3e2e4a62c6c60d432fa1ca32aee2635b");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -112,7 +112,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "5ac0d4b30a0c9a97a71ad014e63f11cf");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "bac6f98e910290722df28da44b41f06f");
|
||||
}
|
||||
|
||||
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
|
||||
|
|
@ -149,7 +149,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerNearbySmallIntervals() {
|
||||
HCTestNearbySmallIntervals(NA12878_BAM, "", "a7e3b05fdc9866965e3ab71dbbd288ff");
|
||||
HCTestNearbySmallIntervals(NA12878_BAM, "", "65e7b1b72a2411d6360138049914aa3a");
|
||||
}
|
||||
|
||||
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
||||
|
|
@ -166,7 +166,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("8252f956e94cb8538b18210e9350f0e3"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ab518ae32535714604a4ffc71fe42511"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
@ -188,7 +188,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("7d4da215e86658e8da70fa0ade7f3eca"));
|
||||
Arrays.asList("3c87eb93ffe3a0166aca753050b981e1"));
|
||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -196,7 +196,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testReducedBamWithReadsNotFullySpanningDeletion() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
|
||||
Arrays.asList("b0f0467dd4bfc4cdc85fff85ffa6f0c1"));
|
||||
Arrays.asList("8adfa8a27a312760dab50787da595c57"));
|
||||
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue