Merge pull request #370 from broadinstitute/rp_dont_output_filtered_variants_in_VQSR

Adding mode to VQSR to not output variant records that are filtered out ...
This commit is contained in:
Eric Banks 2013-08-12 12:01:50 -07:00
commit bcf9a1cda5
2 changed files with 44 additions and 7 deletions

View File

@ -138,15 +138,17 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
protected double TS_FILTER_LEVEL = 99.0;
@Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false)
private String[] IGNORE_INPUT_FILTERS = null;
@Argument(fullName="excludeFiltered", shortName="ef", doc="Don't output filtered loci after applying the recalibration", required=false)
protected boolean EXCLUDE_FILTERED = false;
@Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously.", required = false)
public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP;
/////////////////////////////
// Private Member Variables
/////////////////////////////
final private List<Tranche> tranches = new ArrayList<Tranche>();
final private Set<String> inputNames = new HashSet<String>();
final private Set<String> ignoreInputFilterSet = new TreeSet<String>();
final private List<Tranche> tranches = new ArrayList<>();
final private Set<String> inputNames = new HashSet<>();
final private Set<String> ignoreInputFilterSet = new TreeSet<>();
//---------------------------------------------------------------------------------------------------------------
//
@ -172,10 +174,10 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
}
// setup the header fields
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
final Set<VCFHeaderLine> hInfo = new HashSet<>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames));
addVQSRStandardHeaderLines(hInfo);
final TreeSet<String> samples = new TreeSet<String>();
final TreeSet<String> samples = new TreeSet<>();
samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames));
if( tranches.size() >= 2 ) {
@ -272,7 +274,10 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
builder.filters(filterString);
}
vcfWriter.add( builder.make() );
final VariantContext outputVC = builder.make();
if( !EXCLUDE_FILTERED || outputVC.isNotFiltered() ) {
vcfWriter.add( outputVC );
}
} else { // valid VC but not compatible with this mode, so just emit the variant untouched
vcfWriter.add( vc );
}

View File

@ -46,11 +46,19 @@
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.commons.collections.IteratorUtils;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.vcf.VCFCodec;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.List;
public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
private static class VRTest {
@ -220,7 +228,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
Arrays.asList(params.cutVCFMD5));
spec.disableShadowBCF(); // has to be disabled because the input VCF is missing LowQual annotation
executeTest("testApplyRecalibrationIndel-"+params.inVCF, spec);
executeTest("testApplyRecalibrationIndel-" + params.inVCF, spec);
}
@Test
@ -238,5 +246,29 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
Arrays.asList("20c23643a78c5b95abd1526fdab8960d"));
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
}
@Test(enabled = true)
public void testApplyRecalibrationSnpAndIndelTogetherExcludeFiltered() throws Exception {
final String base = "-R " + b37KGReference +
" -T ApplyRecalibration" +
" -L 20:1000100-1000500" +
" -mode BOTH" +
" --excludeFiltered -ts_filter_level 90.0" +
" --no_cmdline_in_header" +
" -input " + privateTestDir + "VQSR.mixedTest.input" +
" -o %s" +
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal";
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
spec.disableShadowBCF();
final File VCF = executeTest("testApplyRecalibrationSnpAndIndelTogether", spec).first.get(0);
for( final VariantContext VC : GATKVCFUtils.readAllVCs(VCF, new VCFCodec()).getSecond() ) {
if( VC != null ) {
Assert.assertTrue(VC.isNotFiltered()); // there should only be unfiltered records in the output VCF file
}
}
}
}