From cda8d944b741f5d9ee0cabb9b5bc3acdd4ec83aa Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 7 Aug 2012 17:22:27 -0400 Subject: [PATCH] Bugfixes for BCF with VQSR -- Old version converted doubles directly from strings. New version uses VariantContext getAttributeAsDouble() that looks at the values directly to determine how to convert from Object to Double (via Double.valueOf, (Double), or (Double)(Integer)). -- getAttributeAsDouble() is now smart in converting integers to doubles as needed -- Removed unnecessary logging info in BCF2Codec -- Added integration tests to ensure that VQSR works end-to-end with BCF2 using sites version of the file khalid sent to me -- Added vqsr.bcf_test.snps.unfiltered.bcf file for this integration test --- .../VariantDataManager.java | 2 +- .../sting/utils/codecs/bcf2/BCF2Codec.java | 1 - .../utils/variantcontext/CommonInfo.java | 1 + ...ntRecalibrationWalkersIntegrationTest.java | 59 +++++++++++++++++-- 4 files changed, 57 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 45fdad4f8..e88505f99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -235,7 +235,7 @@ public class VariantDataManager { double value; try { - value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) ); + value = vc.getAttributeAsDouble( annotationKey, Double.NaN ); if( Double.isInfinite(value) ) { value = Double.NaN; } if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 0776b3aa8..244af8517 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -167,7 +167,6 @@ public final class BCF2Codec implements FeatureCodec { // create the config offsets if ( ! header.getContigLines().isEmpty() ) { - logger.info("Found contig lines in BCF2 file, using those"); contigNames.clear(); for ( final VCFContigHeaderLine contig : header.getContigLines()) { if ( contig.getID() == null || contig.getID().equals("") ) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java index fb0d7140d..127f91677 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java @@ -216,6 +216,7 @@ final class CommonInfo { Object x = getAttribute(key); if ( x == null ) return defaultValue; if ( x instanceof Double ) return (Double)x; + if ( x instanceof Integer ) return (Integer)x; return Double.valueOf((String)x); // throws an exception if this isn't a string } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 74d071a90..d1ecbb0bf 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -13,7 +13,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { String recalMD5; String cutVCFMD5; public VRTest(String inVCF, String tranchesMD5, String recalMD5, String cutVCFMD5) { - this.inVCF = validationDataLocation + inVCF; + this.inVCF = inVCF; this.tranchesMD5 = tranchesMD5; this.recalMD5 = recalMD5; this.cutVCFMD5 = cutVCFMD5; @@ -25,7 +25,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { } } - VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", + VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf", "f360ce3eb2b0b887301be917a9843e2b", // tranches "287fea5ea066bf3fdd71f5ce9b58eab3", // recal file "356b9570817b9389da71fbe991d8b2f5"); // cut VCF @@ -74,14 +74,65 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { executeTest("testApplyRecalibration-"+params.inVCF, spec); } + VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf", + "a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches + "1cdf8c9ee77d91d1ba7f002573108bad", // recal file + "62fda105e14b619a1c263855cf56af1d"); // cut VCF + + @DataProvider(name = "VRBCFTest") + public Object[][] createVRBCFTest() { + return new Object[][]{ {bcfTest} }; + //return new Object[][]{ {yriTrio}, {lowPass} }; // Add hg19 chr20 trio calls here + } + + @Test(dataProvider = "VRBCFTest") + public void testVariantRecalibratorWithBCF(VRTest params) { + //System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile); + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b37KGReference + + " -resource:known=true,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" + + " -resource:truth=true,training=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" + + " -resource:training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" + + " -T VariantRecalibrator" + + " -input " + params.inVCF + + " -L 20:10,000,000-20,000,000" + + " --no_cmdline_in_header" + + " -an AC " + // integer value + " -an QD -an ReadPosRankSum -an FS -an InbreedingCoeff " + // floats value + " -mG 2 "+ + " -recalFile %s" + + " -tranchesFile %s", + 2, + Arrays.asList("bcf", "txt"), + Arrays.asList(params.recalMD5, params.tranchesMD5)); + executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst(); + } + + @Test(dataProvider = "VRBCFTest", dependsOnMethods="testVariantRecalibratorWithBCF") + public void testApplyRecalibrationWithBCF(VRTest params) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b37KGReference + + " -T ApplyRecalibration" + + " -L 20:10,000,000-20,000,000" + + " --no_cmdline_in_header" + + " -input " + params.inVCF + + " -U LENIENT_VCF_PROCESSING -o %s" + + " -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) + + " -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null), + Arrays.asList(params.cutVCFMD5)); + spec.disableShadowBCF(); + executeTest("testApplyRecalibration-"+params.inVCF, spec); + } + + VRTest indelUnfiltered = new VRTest( - "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as . + validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as . "b7589cd098dc153ec64c02dcff2838e4", // tranches "a04a9001f62eff43d363f4d63769f3ee", // recal file "64f576881e21323dd4078262604717a2"); // cut VCF VRTest indelFiltered = new VRTest( - "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS + validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS "b7589cd098dc153ec64c02dcff2838e4", // tranches "a04a9001f62eff43d363f4d63769f3ee", // recal file "af22c55d91394c56a222fd40d6d54781"); // cut VCF