From a45011d7e70149574e55da8c9838fa8a0582f2d7 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 8 Aug 2013 15:39:30 -0400 Subject: [PATCH] Adding mode to VQSR to not output variant records that are filtered out after applying the recalibration. Necessary for 1000G calling. --- .../ApplyRecalibration.java | 17 ++++++---- ...ntRecalibrationWalkersIntegrationTest.java | 34 ++++++++++++++++++- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index e15b99824..3ae68edab 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -138,15 +138,17 @@ public class ApplyRecalibration extends RodWalker implements T protected double TS_FILTER_LEVEL = 99.0; @Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false) private String[] IGNORE_INPUT_FILTERS = null; + @Argument(fullName="excludeFiltered", shortName="ef", doc="Don't output filtered loci after applying the recalibration", required=false) + protected boolean EXCLUDE_FILTERED = false; @Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously.", required = false) public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP; ///////////////////////////// // Private Member Variables ///////////////////////////// - final private List tranches = new ArrayList(); - final private Set inputNames = new HashSet(); - final private Set ignoreInputFilterSet = new TreeSet(); + final private List tranches = new ArrayList<>(); + final private Set inputNames = new HashSet<>(); + final private Set ignoreInputFilterSet = new TreeSet<>(); //--------------------------------------------------------------------------------------------------------------- // @@ -172,10 +174,10 @@ public class ApplyRecalibration extends RodWalker implements T } // setup the header fields - final Set hInfo = new HashSet(); + final Set hInfo = new HashSet<>(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames)); addVQSRStandardHeaderLines(hInfo); - final TreeSet samples = new TreeSet(); + final TreeSet samples = new TreeSet<>(); samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)); if( tranches.size() >= 2 ) { @@ -272,7 +274,10 @@ public class ApplyRecalibration extends RodWalker implements T builder.filters(filterString); } - vcfWriter.add( builder.make() ); + final VariantContext outputVC = builder.make(); + if( !EXCLUDE_FILTERED || outputVC.isNotFiltered() ) { + vcfWriter.add( outputVC ); + } } else { // valid VC but not compatible with this mode, so just emit the variant untouched vcfWriter.add( vc ); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 3a6981bab..e4cd017b0 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -46,11 +46,19 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; +import org.apache.commons.collections.IteratorUtils; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFCodec; +import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; import java.util.Arrays; +import java.util.List; public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { private static class VRTest { @@ -220,7 +228,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { " -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null), Arrays.asList(params.cutVCFMD5)); spec.disableShadowBCF(); // has to be disabled because the input VCF is missing LowQual annotation - executeTest("testApplyRecalibrationIndel-"+params.inVCF, spec); + executeTest("testApplyRecalibrationIndel-" + params.inVCF, spec); } @Test @@ -238,5 +246,29 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { Arrays.asList("20c23643a78c5b95abd1526fdab8960d")); executeTest("testApplyRecalibrationSnpAndIndelTogether", spec); } + + @Test(enabled = true) + public void testApplyRecalibrationSnpAndIndelTogetherExcludeFiltered() throws Exception { + final String base = "-R " + b37KGReference + + " -T ApplyRecalibration" + + " -L 20:1000100-1000500" + + " -mode BOTH" + + " --excludeFiltered -ts_filter_level 90.0" + + " --no_cmdline_in_header" + + " -input " + privateTestDir + "VQSR.mixedTest.input" + + " -o %s" + + " -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" + + " -recalFile " + privateTestDir + "VQSR.mixedTest.recal"; + + final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("")); + spec.disableShadowBCF(); + final File VCF = executeTest("testApplyRecalibrationSnpAndIndelTogether", spec).first.get(0); + + for( final VariantContext VC : GATKVCFUtils.readAllVCs(VCF, new VCFCodec()).getSecond() ) { + if( VC != null ) { + Assert.assertTrue(VC.isNotFiltered()); // there should only be unfiltered records in the output VCF file + } + } + } }