Merge pull request #273 from broadinstitute/rp_readIsPoorlyModelled
Relaxing the constraints on the readIsPoorlyModelled function.
This commit is contained in:
commit
c837d67b2f
|
|
@ -459,7 +459,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
// the minimum length of a read we'd consider using for genotyping
|
||||
private final static int MIN_READ_LENGTH = 10;
|
||||
|
||||
private List<String> samplesList = new ArrayList<String>();
|
||||
private List<String> samplesList = new ArrayList<>();
|
||||
|
||||
private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
|
||||
private final static Allele FAKE_ALT_ALLELE = Allele.create("<FAKE_ALT>", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex1() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "d21f15a5809fe5259af41ae6774af6f1");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "e7b28ea087e8624f1e596c9d65381fea");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"d4a0797c2fd4c103bf9a137633376156");
|
||||
"321dc9f3d330790bac7981ffae00cb0c");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"a9872228d0275a30f5a1f7e070a9c9f4");
|
||||
"2a72a9b5c6778b99bf155a7c5e90d11e");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,12 +78,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "e9167a1bfc0fc276586788d1ce1be408");
|
||||
HCTest(CEUTRIO_BAM, "", "f25b9cfc85995cbe8eb6ba5a126d713d");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "b1d46afb9659ac3b92a3d131b58924ef");
|
||||
HCTest(NA12878_BAM, "", "19d685727ec60b3568f313bc44f79b49");
|
||||
}
|
||||
|
||||
@Test(enabled = false) // can't annotate the rsID's yet
|
||||
|
|
@ -94,7 +94,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"d83856b8136776bd731a8037c16b71fa");
|
||||
"6da65f1d396b9c709eb6246cf3f615c1");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -110,7 +110,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "70c4476816f5d35c9978c378dbeac09b");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "e3db7d56154e36eeb887259bea4b241d");
|
||||
}
|
||||
|
||||
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
|
||||
|
|
@ -147,7 +147,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerNearbySmallIntervals() {
|
||||
HCTestNearbySmallIntervals(NA12878_BAM, "", "947aae309ecab7cd3f17ff9810884924");
|
||||
HCTestNearbySmallIntervals(NA12878_BAM, "", "6e170d03047caefc2fba3f1c1f8de132");
|
||||
}
|
||||
|
||||
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
||||
|
|
@ -186,7 +186,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("0124c4923d96ec0f8222b596dd4ef534"));
|
||||
Arrays.asList("40416433baf96f4e84a058459717060b"));
|
||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -194,7 +194,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testReducedBamWithReadsNotFullySpanningDeletion() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
|
||||
Arrays.asList("0e020dcfdf249225714f5cd86ed3869f"));
|
||||
Arrays.asList("cf1461ce829023ea9920fbfeb534eb97"));
|
||||
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
||||
}
|
||||
|
||||
|
|
@ -208,7 +208,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("446a786bb539f3ec2084dd75167568aa"));
|
||||
Arrays.asList("45ca324be3917655e645d6c290c9280f"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -217,7 +217,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132
|
||||
+ " -L " + hg19Intervals + " -isr INTERSECTION", 1,
|
||||
Arrays.asList("9587029b702bb59bd4dfec69eac4c210"));
|
||||
Arrays.asList("b7037770b7953cdf858764b99fa243ed"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest {
|
|||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final int nct : Arrays.asList(1, 2, 4) ) {
|
||||
tests.add(new Object[]{nct, "ef42a438b82681d1c0f921c57e16ff12"});
|
||||
tests.add(new Object[]{nct, "bd2a57e6b0cffb4cbdba609a6c1683dc"});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
|
|
|
|||
|
|
@ -233,7 +233,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest {
|
|||
tests.add(new Object[]{100, 0.01, true, Arrays.asList(-5.0, -10.0)});
|
||||
tests.add(new Object[]{100, 0.01, false, Arrays.asList(-5.0, -10.0, -3.0)});
|
||||
tests.add(new Object[]{100, 0.01, false, Arrays.asList(-5.0, -10.0, -2.0)});
|
||||
tests.add(new Object[]{100, 0.01, true, Arrays.asList(-5.0, -10.0, -4.0)});
|
||||
tests.add(new Object[]{100, 0.01, true, Arrays.asList(-5.0, -10.0, -4.2)});
|
||||
tests.add(new Object[]{100, 0.001, true, Arrays.asList(-5.0, -10.0)});
|
||||
tests.add(new Object[]{100, 0.001, false, Arrays.asList(-5.0, -10.0, 0.0)});
|
||||
|
||||
|
|
@ -243,7 +243,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest {
|
|||
@Test(dataProvider = "PoorlyModelledReadData")
|
||||
public void testPoorlyModelledRead(final int readLen, final double maxErrorRatePerBase, final boolean expected, final List<Double> log10likelihoods) {
|
||||
final byte[] bases = Utils.dupBytes((byte)'A', readLen);
|
||||
final byte[] quals = Utils.dupBytes((byte) 30, readLen);
|
||||
final byte[] quals = Utils.dupBytes((byte) 40, readLen);
|
||||
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, readLen + "M");
|
||||
|
||||
|
|
@ -279,7 +279,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest {
|
|||
final double likelihood = bad ? -100.0 : 0.0;
|
||||
|
||||
final byte[] bases = Utils.dupBytes((byte)'A', readLen);
|
||||
final byte[] quals = Utils.dupBytes((byte) 30, readLen);
|
||||
final byte[] quals = Utils.dupBytes((byte) 40, readLen);
|
||||
|
||||
final Allele allele = Allele.create(Utils.dupString("A", readI+1));
|
||||
|
||||
|
|
|
|||
|
|
@ -321,7 +321,7 @@ public class PerReadAlleleLikelihoodMap {
|
|||
* @return the list of reads removed from this map because they are poorly modelled
|
||||
*/
|
||||
public List<GATKSAMRecord> filterPoorlyModelledReads(final double maxErrorRatePerBase) {
|
||||
final List<GATKSAMRecord> removedReads = new LinkedList<GATKSAMRecord>();
|
||||
final List<GATKSAMRecord> removedReads = new LinkedList<>();
|
||||
final Iterator<Map.Entry<GATKSAMRecord, Map<Allele, Double>>> it = likelihoodReadMap.entrySet().iterator();
|
||||
while ( it.hasNext() ) {
|
||||
final Map.Entry<GATKSAMRecord, Map<Allele, Double>> record = it.next();
|
||||
|
|
@ -356,8 +356,8 @@ public class PerReadAlleleLikelihoodMap {
|
|||
* @return true if none of the log10 likelihoods imply that the read truly originated from one of the haplotypes
|
||||
*/
|
||||
protected boolean readIsPoorlyModelled(final GATKSAMRecord read, final Collection<Double> log10Likelihoods, final double maxErrorRatePerBase) {
|
||||
final double maxErrorsForRead = Math.ceil(read.getReadLength() * maxErrorRatePerBase);
|
||||
final double log10QualPerBase = -3.0;
|
||||
final double maxErrorsForRead = Math.min(2.0, Math.ceil(read.getReadLength() * maxErrorRatePerBase));
|
||||
final double log10QualPerBase = -4.0;
|
||||
final double log10MaxLikelihoodForTrueAllele = maxErrorsForRead * log10QualPerBase;
|
||||
|
||||
for ( final double log10Likelihood : log10Likelihoods )
|
||||
|
|
|
|||
Loading…
Reference in New Issue