Merge pull request #273 from broadinstitute/rp_readIsPoorlyModelled

Relaxing the constraints on the readIsPoorlyModelled function.
This commit is contained in:
Mark DePristo 2013-06-13 08:40:24 -07:00
commit c837d67b2f
6 changed files with 20 additions and 20 deletions

View File

@ -459,7 +459,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// the minimum length of a read we'd consider using for genotyping // the minimum length of a read we'd consider using for genotyping
private final static int MIN_READ_LENGTH = 10; private final static int MIN_READ_LENGTH = 10;
private List<String> samplesList = new ArrayList<String>(); private List<String> samplesList = new ArrayList<>();
private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
private final static Allele FAKE_ALT_ALLELE = Allele.create("<FAKE_ALT>", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file private final static Allele FAKE_ALT_ALLELE = Allele.create("<FAKE_ALT>", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file

View File

@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleComplex1() { public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "d21f15a5809fe5259af41ae6774af6f1"); HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "e7b28ea087e8624f1e596c9d65381fea");
} }
private void HCTestSymbolicVariants(String bam, String args, String md5) { private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleGGAComplex() { public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"d4a0797c2fd4c103bf9a137633376156"); "321dc9f3d330790bac7981ffae00cb0c");
} }
@Test @Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"a9872228d0275a30f5a1f7e070a9c9f4"); "2a72a9b5c6778b99bf155a7c5e90d11e");
} }
} }

View File

@ -78,12 +78,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerMultiSample() { public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "e9167a1bfc0fc276586788d1ce1be408"); HCTest(CEUTRIO_BAM, "", "f25b9cfc85995cbe8eb6ba5a126d713d");
} }
@Test @Test
public void testHaplotypeCallerSingleSample() { public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "b1d46afb9659ac3b92a3d131b58924ef"); HCTest(NA12878_BAM, "", "19d685727ec60b3568f313bc44f79b49");
} }
@Test(enabled = false) // can't annotate the rsID's yet @Test(enabled = false) // can't annotate the rsID's yet
@ -94,7 +94,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerMultiSampleGGA() { public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"d83856b8136776bd731a8037c16b71fa"); "6da65f1d396b9c709eb6246cf3f615c1");
} }
@Test @Test
@ -110,7 +110,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() { public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "70c4476816f5d35c9978c378dbeac09b"); HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "e3db7d56154e36eeb887259bea4b241d");
} }
private void HCTestNearbySmallIntervals(String bam, String args, String md5) { private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
@ -147,7 +147,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerNearbySmallIntervals() { public void testHaplotypeCallerNearbySmallIntervals() {
HCTestNearbySmallIntervals(NA12878_BAM, "", "947aae309ecab7cd3f17ff9810884924"); HCTestNearbySmallIntervals(NA12878_BAM, "", "6e170d03047caefc2fba3f1c1f8de132");
} }
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
@ -186,7 +186,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() { public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("0124c4923d96ec0f8222b596dd4ef534")); Arrays.asList("40416433baf96f4e84a058459717060b"));
executeTest("HC calling on a ReducedRead BAM", spec); executeTest("HC calling on a ReducedRead BAM", spec);
} }
@ -194,7 +194,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testReducedBamWithReadsNotFullySpanningDeletion() { public void testReducedBamWithReadsNotFullySpanningDeletion() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
Arrays.asList("0e020dcfdf249225714f5cd86ed3869f")); Arrays.asList("cf1461ce829023ea9920fbfeb534eb97"));
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
} }
@ -208,7 +208,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGS() { public void HCTestDBSNPAnnotationWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("446a786bb539f3ec2084dd75167568aa")); Arrays.asList("45ca324be3917655e645d6c290c9280f"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
} }
@ -217,7 +217,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132 "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132
+ " -L " + hg19Intervals + " -isr INTERSECTION", 1, + " -L " + hg19Intervals + " -isr INTERSECTION", 1,
Arrays.asList("9587029b702bb59bd4dfec69eac4c210")); Arrays.asList("b7037770b7953cdf858764b99fa243ed"));
executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
} }
} }

View File

@ -61,7 +61,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest {
List<Object[]> tests = new ArrayList<Object[]>(); List<Object[]> tests = new ArrayList<Object[]>();
for ( final int nct : Arrays.asList(1, 2, 4) ) { for ( final int nct : Arrays.asList(1, 2, 4) ) {
tests.add(new Object[]{nct, "ef42a438b82681d1c0f921c57e16ff12"}); tests.add(new Object[]{nct, "bd2a57e6b0cffb4cbdba609a6c1683dc"});
} }
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});

View File

@ -233,7 +233,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest {
tests.add(new Object[]{100, 0.01, true, Arrays.asList(-5.0, -10.0)}); tests.add(new Object[]{100, 0.01, true, Arrays.asList(-5.0, -10.0)});
tests.add(new Object[]{100, 0.01, false, Arrays.asList(-5.0, -10.0, -3.0)}); tests.add(new Object[]{100, 0.01, false, Arrays.asList(-5.0, -10.0, -3.0)});
tests.add(new Object[]{100, 0.01, false, Arrays.asList(-5.0, -10.0, -2.0)}); tests.add(new Object[]{100, 0.01, false, Arrays.asList(-5.0, -10.0, -2.0)});
tests.add(new Object[]{100, 0.01, true, Arrays.asList(-5.0, -10.0, -4.0)}); tests.add(new Object[]{100, 0.01, true, Arrays.asList(-5.0, -10.0, -4.2)});
tests.add(new Object[]{100, 0.001, true, Arrays.asList(-5.0, -10.0)}); tests.add(new Object[]{100, 0.001, true, Arrays.asList(-5.0, -10.0)});
tests.add(new Object[]{100, 0.001, false, Arrays.asList(-5.0, -10.0, 0.0)}); tests.add(new Object[]{100, 0.001, false, Arrays.asList(-5.0, -10.0, 0.0)});
@ -243,7 +243,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest {
@Test(dataProvider = "PoorlyModelledReadData") @Test(dataProvider = "PoorlyModelledReadData")
public void testPoorlyModelledRead(final int readLen, final double maxErrorRatePerBase, final boolean expected, final List<Double> log10likelihoods) { public void testPoorlyModelledRead(final int readLen, final double maxErrorRatePerBase, final boolean expected, final List<Double> log10likelihoods) {
final byte[] bases = Utils.dupBytes((byte)'A', readLen); final byte[] bases = Utils.dupBytes((byte)'A', readLen);
final byte[] quals = Utils.dupBytes((byte) 30, readLen); final byte[] quals = Utils.dupBytes((byte) 40, readLen);
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, readLen + "M"); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, readLen + "M");
@ -279,7 +279,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest {
final double likelihood = bad ? -100.0 : 0.0; final double likelihood = bad ? -100.0 : 0.0;
final byte[] bases = Utils.dupBytes((byte)'A', readLen); final byte[] bases = Utils.dupBytes((byte)'A', readLen);
final byte[] quals = Utils.dupBytes((byte) 30, readLen); final byte[] quals = Utils.dupBytes((byte) 40, readLen);
final Allele allele = Allele.create(Utils.dupString("A", readI+1)); final Allele allele = Allele.create(Utils.dupString("A", readI+1));

View File

@ -321,7 +321,7 @@ public class PerReadAlleleLikelihoodMap {
* @return the list of reads removed from this map because they are poorly modelled * @return the list of reads removed from this map because they are poorly modelled
*/ */
public List<GATKSAMRecord> filterPoorlyModelledReads(final double maxErrorRatePerBase) { public List<GATKSAMRecord> filterPoorlyModelledReads(final double maxErrorRatePerBase) {
final List<GATKSAMRecord> removedReads = new LinkedList<GATKSAMRecord>(); final List<GATKSAMRecord> removedReads = new LinkedList<>();
final Iterator<Map.Entry<GATKSAMRecord, Map<Allele, Double>>> it = likelihoodReadMap.entrySet().iterator(); final Iterator<Map.Entry<GATKSAMRecord, Map<Allele, Double>>> it = likelihoodReadMap.entrySet().iterator();
while ( it.hasNext() ) { while ( it.hasNext() ) {
final Map.Entry<GATKSAMRecord, Map<Allele, Double>> record = it.next(); final Map.Entry<GATKSAMRecord, Map<Allele, Double>> record = it.next();
@ -356,8 +356,8 @@ public class PerReadAlleleLikelihoodMap {
* @return true if none of the log10 likelihoods imply that the read truly originated from one of the haplotypes * @return true if none of the log10 likelihoods imply that the read truly originated from one of the haplotypes
*/ */
protected boolean readIsPoorlyModelled(final GATKSAMRecord read, final Collection<Double> log10Likelihoods, final double maxErrorRatePerBase) { protected boolean readIsPoorlyModelled(final GATKSAMRecord read, final Collection<Double> log10Likelihoods, final double maxErrorRatePerBase) {
final double maxErrorsForRead = Math.ceil(read.getReadLength() * maxErrorRatePerBase); final double maxErrorsForRead = Math.min(2.0, Math.ceil(read.getReadLength() * maxErrorRatePerBase));
final double log10QualPerBase = -3.0; final double log10QualPerBase = -4.0;
final double log10MaxLikelihoodForTrueAllele = maxErrorsForRead * log10QualPerBase; final double log10MaxLikelihoodForTrueAllele = maxErrorsForRead * log10QualPerBase;
for ( final double log10Likelihood : log10Likelihoods ) for ( final double log10Likelihood : log10Likelihoods )