From 9686e91a51819960a3c59a29560b3b8ad3b9c19c Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Mon, 25 Mar 2013 17:13:40 -0400 Subject: [PATCH] Added small feature to VariantFiltration to filter sites outside of a given mask: -- Sometimes it's desireable to specify a set of "good" regions and filter out other stuff (like say an alignability mask or a "good regions" mask). But by default, the -mask argument in VF will only filter sites inside a particular mask. New argument -filterNotInMask will reverse default logic and filter outside of a given mask. -- Added integration test, and made sure we also test with a BED rod. --- .../VariantFiltrationIntegrationTest.java | 9 ++++++++ .../walkers/filters/VariantFiltration.java | 21 +++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 84729647a..9de190f5f 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -98,6 +98,15 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { executeTest("test mask extend", spec3); } + @Test + public void testMaskReversed() { + WalkerTestSpec spec3 = new WalkerTestSpec( + baseTestString() + " -maskName outsideGoodSites -filterNotInMask --mask:BED " + privateTestDir + "goodMask.bed --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("e65d27c13953fc3a77dcad27a4357786")); + executeTest("test filter sites not in mask", spec3); + } + + @Test public void testFilter1() { WalkerTestSpec spec = new WalkerTestSpec( diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java index 8feb9101c..362b49f68 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java @@ -87,9 +87,10 @@ public class VariantFiltration extends RodWalker { protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); /** - * Any variant which overlaps entries from the provided mask rod will be filtered. + * Any variant which overlaps entries from the provided mask rod will be filtered. If the user wants logic to be reversed, + * i.e. filter variants that do not overlap with provided mask, then argument -filterNotInMask can be used. */ - @Input(fullName="mask", doc="Input ROD mask", required=false) + @Input(fullName="mask", shortName="mask", doc="Input ROD mask", required=false) public RodBinding mask; @Output(doc="File to which variants should be written") @@ -140,6 +141,14 @@ public class VariantFiltration extends RodWalker { @Argument(fullName="maskName", shortName="maskName", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false) protected String MASK_NAME = "Mask"; + /** + * By default, if the -mask argument is used, any variant falling in a mask will be filtered. + * If this argument is used, logic is reversed, and variants falling outside a given mask will be filtered. + * Use case is, for example, if we have an interval list or BED file with "good" sites. + */ + @Argument(fullName="filterNotInMask", shortName="filterNotInMask", doc="Filter records NOT in given input mask.", required=false) + protected boolean filterRecordsNotInMask = false; + /** * By default, if JEXL cannot evaluate your expression for a particular record because one of the annotations is not present, the whole expression evaluates as PASSing. * Use this argument to have it evaluate as failing filters instead for these cases. @@ -186,7 +195,9 @@ public class VariantFiltration extends RodWalker { hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY)); if ( mask.isBound() ) { - hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Overlaps a user-input mask")); + if (filterRecordsNotInMask) + hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Doesn't overlap a user-input mask")); + else hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Overlaps a user-input mask")); } writer.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames))); @@ -199,6 +210,8 @@ public class VariantFiltration extends RodWalker { if ( MASK_EXTEND < 0 ) throw new UserException.BadArgumentValue("maskExtension", "negative values are not allowed"); + if (filterRecordsNotInMask && !mask.isBound()) + throw new UserException.BadArgumentValue("filterNotInMask","argument not allowed if mask argument is not provided"); filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS); genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS); @@ -223,7 +236,7 @@ public class VariantFiltration extends RodWalker { Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation()); // is there a SNP mask present? - boolean hasMask = tracker.hasValues(mask); + boolean hasMask = (tracker.hasValues(mask) && !filterRecordsNotInMask) || (filterRecordsNotInMask && !tracker.hasValues(mask)); if ( hasMask ) previousMaskPosition = ref.getLocus(); // multi-base masks will get triggered over all bases of the mask