Merge pull request #370 from broadinstitute/rp_dont_output_filtered_variants_in_VQSR
Adding mode to VQSR to not output variant records that are filtered out ...
This commit is contained in:
commit
bcf9a1cda5
|
|
@ -138,15 +138,17 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
|||
protected double TS_FILTER_LEVEL = 99.0;
|
||||
@Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false)
|
||||
private String[] IGNORE_INPUT_FILTERS = null;
|
||||
@Argument(fullName="excludeFiltered", shortName="ef", doc="Don't output filtered loci after applying the recalibration", required=false)
|
||||
protected boolean EXCLUDE_FILTERED = false;
|
||||
@Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously.", required = false)
|
||||
public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP;
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
/////////////////////////////
|
||||
final private List<Tranche> tranches = new ArrayList<Tranche>();
|
||||
final private Set<String> inputNames = new HashSet<String>();
|
||||
final private Set<String> ignoreInputFilterSet = new TreeSet<String>();
|
||||
final private List<Tranche> tranches = new ArrayList<>();
|
||||
final private Set<String> inputNames = new HashSet<>();
|
||||
final private Set<String> ignoreInputFilterSet = new TreeSet<>();
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -172,10 +174,10 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
|||
}
|
||||
|
||||
// setup the header fields
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<>();
|
||||
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames));
|
||||
addVQSRStandardHeaderLines(hInfo);
|
||||
final TreeSet<String> samples = new TreeSet<String>();
|
||||
final TreeSet<String> samples = new TreeSet<>();
|
||||
samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames));
|
||||
|
||||
if( tranches.size() >= 2 ) {
|
||||
|
|
@ -272,7 +274,10 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
|||
builder.filters(filterString);
|
||||
}
|
||||
|
||||
vcfWriter.add( builder.make() );
|
||||
final VariantContext outputVC = builder.make();
|
||||
if( !EXCLUDE_FILTERED || outputVC.isNotFiltered() ) {
|
||||
vcfWriter.add( outputVC );
|
||||
}
|
||||
} else { // valid VC but not compatible with this mode, so just emit the variant untouched
|
||||
vcfWriter.add( vc );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,11 +46,19 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.apache.commons.collections.IteratorUtils;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||
private static class VRTest {
|
||||
|
|
@ -220,7 +228,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
|
||||
Arrays.asList(params.cutVCFMD5));
|
||||
spec.disableShadowBCF(); // has to be disabled because the input VCF is missing LowQual annotation
|
||||
executeTest("testApplyRecalibrationIndel-"+params.inVCF, spec);
|
||||
executeTest("testApplyRecalibrationIndel-" + params.inVCF, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -238,5 +246,29 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
Arrays.asList("20c23643a78c5b95abd1526fdab8960d"));
|
||||
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testApplyRecalibrationSnpAndIndelTogetherExcludeFiltered() throws Exception {
|
||||
final String base = "-R " + b37KGReference +
|
||||
" -T ApplyRecalibration" +
|
||||
" -L 20:1000100-1000500" +
|
||||
" -mode BOTH" +
|
||||
" --excludeFiltered -ts_filter_level 90.0" +
|
||||
" --no_cmdline_in_header" +
|
||||
" -input " + privateTestDir + "VQSR.mixedTest.input" +
|
||||
" -o %s" +
|
||||
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
|
||||
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal";
|
||||
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
final File VCF = executeTest("testApplyRecalibrationSnpAndIndelTogether", spec).first.get(0);
|
||||
|
||||
for( final VariantContext VC : GATKVCFUtils.readAllVCs(VCF, new VCFCodec()).getSecond() ) {
|
||||
if( VC != null ) {
|
||||
Assert.assertTrue(VC.isNotFiltered()); // there should only be unfiltered records in the output VCF file
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue