Bugfixes for BCF with VQSR

-- Old version converted doubles directly from strings.  New version uses VariantContext getAttributeAsDouble() that looks at the values directly to determine how to convert from Object to Double (via Double.valueOf, (Double), or (Double)(Integer)).
-- getAttributeAsDouble() is now smart in converting integers to doubles as needed
-- Removed unnecessary logging info in BCF2Codec
-- Added integration tests to ensure that VQSR works end-to-end with BCF2 using sites version of the file khalid sent to me
-- Added vqsr.bcf_test.snps.unfiltered.bcf file for this integration test
This commit is contained in:
Mark DePristo 2012-08-07 17:22:27 -04:00
parent 80b94a4f9a
commit cda8d944b7
4 changed files with 57 additions and 6 deletions

View File

@ -235,7 +235,7 @@ public class VariantDataManager {
double value;
try {
value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
value = vc.getAttributeAsDouble( annotationKey, Double.NaN );
if( Double.isInfinite(value) ) { value = Double.NaN; }
if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM
value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();

View File

@ -167,7 +167,6 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
// create the config offsets
if ( ! header.getContigLines().isEmpty() ) {
logger.info("Found contig lines in BCF2 file, using those");
contigNames.clear();
for ( final VCFContigHeaderLine contig : header.getContigLines()) {
if ( contig.getID() == null || contig.getID().equals("") )

View File

@ -216,6 +216,7 @@ final class CommonInfo {
Object x = getAttribute(key);
if ( x == null ) return defaultValue;
if ( x instanceof Double ) return (Double)x;
if ( x instanceof Integer ) return (Integer)x;
return Double.valueOf((String)x); // throws an exception if this isn't a string
}

View File

@ -13,7 +13,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
String recalMD5;
String cutVCFMD5;
public VRTest(String inVCF, String tranchesMD5, String recalMD5, String cutVCFMD5) {
this.inVCF = validationDataLocation + inVCF;
this.inVCF = inVCF;
this.tranchesMD5 = tranchesMD5;
this.recalMD5 = recalMD5;
this.cutVCFMD5 = cutVCFMD5;
@ -25,7 +25,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
}
}
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf",
"f360ce3eb2b0b887301be917a9843e2b", // tranches
"287fea5ea066bf3fdd71f5ce9b58eab3", // recal file
"356b9570817b9389da71fbe991d8b2f5"); // cut VCF
@ -74,14 +74,65 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
executeTest("testApplyRecalibration-"+params.inVCF, spec);
}
VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf",
"a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches
"1cdf8c9ee77d91d1ba7f002573108bad", // recal file
"62fda105e14b619a1c263855cf56af1d"); // cut VCF
@DataProvider(name = "VRBCFTest")
public Object[][] createVRBCFTest() {
return new Object[][]{ {bcfTest} };
//return new Object[][]{ {yriTrio}, {lowPass} }; // Add hg19 chr20 trio calls here
}
@Test(dataProvider = "VRBCFTest")
public void testVariantRecalibratorWithBCF(VRTest params) {
//System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b37KGReference +
" -resource:known=true,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
" -resource:truth=true,training=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
" -resource:training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
" -T VariantRecalibrator" +
" -input " + params.inVCF +
" -L 20:10,000,000-20,000,000" +
" --no_cmdline_in_header" +
" -an AC " + // integer value
" -an QD -an ReadPosRankSum -an FS -an InbreedingCoeff " + // floats value
" -mG 2 "+
" -recalFile %s" +
" -tranchesFile %s",
2,
Arrays.asList("bcf", "txt"),
Arrays.asList(params.recalMD5, params.tranchesMD5));
executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst();
}
@Test(dataProvider = "VRBCFTest", dependsOnMethods="testVariantRecalibratorWithBCF")
public void testApplyRecalibrationWithBCF(VRTest params) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b37KGReference +
" -T ApplyRecalibration" +
" -L 20:10,000,000-20,000,000" +
" --no_cmdline_in_header" +
" -input " + params.inVCF +
" -U LENIENT_VCF_PROCESSING -o %s" +
" -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
" -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
Arrays.asList(params.cutVCFMD5));
spec.disableShadowBCF();
executeTest("testApplyRecalibration-"+params.inVCF, spec);
}
VRTest indelUnfiltered = new VRTest(
"combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as .
validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as .
"b7589cd098dc153ec64c02dcff2838e4", // tranches
"a04a9001f62eff43d363f4d63769f3ee", // recal file
"64f576881e21323dd4078262604717a2"); // cut VCF
VRTest indelFiltered = new VRTest(
"combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS
validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS
"b7589cd098dc153ec64c02dcff2838e4", // tranches
"a04a9001f62eff43d363f4d63769f3ee", // recal file
"af22c55d91394c56a222fd40d6d54781"); // cut VCF