From 0adf505b53320521b195b2cbf74325382cd7ffe4 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Thu, 2 Dec 2010 15:38:45 +0000 Subject: [PATCH] We no longer look at by-hapmap validation status in the VQSR because using the HapMap VCF file is higher quality. As a side effect we now support the dbsnp 132 vcf file. ApplyVariantCuts now requires that the input VCF rod bindings begin with input, matching the other VQSR walkers. Wiki updated with information about how to obtain the hapmap and 1kg truth sets. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4772 348d0f76-0448-11de-a6fe-93d51630548a --- .../ApplyVariantCuts.java | 31 ++++++++++---- .../GenerateVariantClustersWalker.java | 42 +++++++++++-------- .../VariantRecalibrator.java | 24 +++++------ ...ntRecalibrationWalkersIntegrationTest.java | 16 +++---- 4 files changed, 66 insertions(+), 47 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java index 1da542f5e..877013f87 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java @@ -30,8 +30,8 @@ import org.broad.tribble.vcf.*; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -73,7 +73,9 @@ public class ApplyVariantCuts extends RodWalker { ///////////////////////////// // Private Member Variables ///////////////////////////// - final List tranches = new ArrayList(); + final private List tranches = new ArrayList(); + final private Set inputNames = new HashSet(); + //--------------------------------------------------------------------------------------------------------------- // @@ -91,11 +93,24 @@ public class ApplyVariantCuts extends RodWalker { } Collections.reverse(tranches); // this algorithm wants the tranches ordered from worst to best + for( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { + if( d.getName().startsWith("input") ) { + inputNames.add(d.getName()); + logger.info("Found input variant track with name " + d.getName()); + } else { + logger.info("Not evaluating ROD binding " + d.getName()); + } + } + + if( inputNames.size() == 0 ) { + throw new UserException.BadInput( "No input variant tracks found. Input variant binding names must begin with 'input'." ); + } + // setup the header fields final Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); final TreeSet samples = new TreeSet(); - samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit())); + samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)); if( tranches.size() >= 2 ) { for( int iii = 0; iii < tranches.size() - 1; iii++ ) { @@ -127,8 +142,8 @@ public class ApplyVariantCuts extends RodWalker { return 1; } - for( VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) { - if( vc != null && !vc.getSource().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) && vc.isSNP() ) { + for( VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), false, false) ) { + if( vc != null && vc.isSNP() ) { String filterString = null; if( !vc.isFiltered() ) { try { @@ -174,11 +189,11 @@ public class ApplyVariantCuts extends RodWalker { //--------------------------------------------------------------------------------------------------------------- public Integer reduceInit() { - return 1; + return 1; // This value isn't used for anything } public Integer reduce( final Integer mapValue, final Integer reduceSum ) { - return 1; + return 1; // This value isn't used for anything } public void onTraversalDone( Integer reduceSum ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java index b2907e3cf..81995f5ac 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java @@ -25,16 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.commandline.Argument; @@ -99,9 +96,6 @@ public class GenerateVariantClustersWalker extends RodWalker vcsDbsnp = tracker.getVariantContexts(ref, "dbsnp", null, context.getLocation(), false, true); final Collection vcsHapMap = tracker.getVariantContexts(ref, "hapmap", null, context.getLocation(), false, true); final Collection vcs1KG = tracker.getVariantContexts(ref, "1kg", null, context.getLocation(), false, true); + final VariantContext vcDbsnp = ( vcsDbsnp.size() != 0 ? vcsDbsnp.iterator().next() : null ); final VariantContext vcHapMap = ( vcsHapMap.size() != 0 ? vcsHapMap.iterator().next() : null ); final VariantContext vc1KG = ( vcs1KG.size() != 0 ? vcs1KG.iterator().next() : null ); - variantDatum.isKnown = ( dbsnp != null ); - variantDatum.weight = WEIGHT_NOVELS; - if( vcHapMap != null || ( !NO_BY_HAPMAP_VALIDATION_STATUS && dbsnp != null && DbSNPHelper.isHapmap(dbsnp) ) ) { + variantDatum.isKnown = ( vcDbsnp != null ); + variantDatum.weight = WEIGHT_NOVELS; + if( vcHapMap != null ) { variantDatum.weight = WEIGHT_HAPMAP; } else if( vc1KG != null ) { variantDatum.weight = WEIGHT_1KG; - } else if( dbsnp != null ) { + } else if( vcDbsnp != null ) { variantDatum.weight = WEIGHT_DBSNP; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 0f9125690..3748ed7d2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.commandline.*; @@ -82,8 +81,6 @@ public class VariantRecalibrator extends RodWalker hInfo = new HashSet(); final TreeSet samples = new TreeSet(); @@ -227,19 +224,20 @@ public class VariantRecalibrator extends RodWalker vcsDbsnp = tracker.getVariantContexts(ref, "dbsnp", null, context.getLocation(), false, true); final Collection vcsHapMap = tracker.getVariantContexts(ref, "hapmap", null, context.getLocation(), false, true); final Collection vcs1KG = tracker.getVariantContexts(ref, "1kg", null, context.getLocation(), false, true); + final VariantContext vcDbsnp = ( vcsDbsnp.size() != 0 ? vcsDbsnp.iterator().next() : null ); final VariantContext vcHapMap = ( vcsHapMap.size() != 0 ? vcsHapMap.iterator().next() : null ); final VariantContext vc1KG = ( vcs1KG.size() != 0 ? vcs1KG.iterator().next() : null ); - variantDatum.isKnown = ( dbsnp != null ); + variantDatum.isKnown = ( vcDbsnp != null ); double knownPrior_qScore = PRIOR_NOVELS; - if( vcHapMap != null || ( !NO_BY_HAPMAP_VALIDATION_STATUS && dbsnp != null && DbSNPHelper.isHapmap(dbsnp) ) ) { + if( vcHapMap != null ) { knownPrior_qScore = PRIOR_HAPMAP; } else if( vc1KG != null ) { knownPrior_qScore = PRIOR_1KG; - } else if( dbsnp != null ) { + } else if( vcDbsnp != null ) { knownPrior_qScore = PRIOR_DBSNP; } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index f37a664f4..433090e9e 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -29,16 +29,16 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { } VRTest yriTrio = new VRTest("yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", - "ab2629d67e378fd3aceb8318f0fbfe04", // in vcf - "d8f0a92aa42989d332d3c65f61352978", // tranches - "3370e51f6e5035f2472c66762d13c821", // recalVCF - "4801e6cfaeadc81c5c1f9bf5948e2a1c"); // cut VCF + "4eeffa7a1965ce0c25c5edd0bae76290", // in vcf + "7407987a0148284ed910e1858116dd8d", // tranches + "15ab55be5b2f62627aea8546a4728d77", // recalVCF + "9435f1aed7313fbfff540a4d6d19d0c4"); // cut VCF VRTest lowPass = new VRTest("lowpass.N3.chr1.raw.vcf", - "725489156426e4ddd8d623ab3d4b1023", // in vcf - "3a7067247146f4a77bb4fd7bc36f94c4", // tranches - "9a525c6838bff695321fc7ac0e458f9c", // recalVCF - "41e0a16af150244454ee68948ced00fb"); // cut VCF + "8937a3ae7f176dacf47b8ee6c0023416", // in vcf + "2896657b5c30bfd8e82e62e58d94ef4e", // tranches + "a5fe2ee50144ef61121c42daf430381c", // recalVCF + "9a35b69bed93894306c87bc9a0bcc116"); // cut VCF @DataProvider(name = "VRTest") public Object[][] createData1() {