diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java index 89ffe527f..e58381991 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java @@ -211,21 +211,26 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { executeTest("testFilteringDPfromFORMAT", spec); } + // The current htsjdk implementation of JEXL matching on genotype fields is buggy. When the filter uses an + // annotation that is present in both FORMAT and INFO, and the FORMAT value is missing, the current code (Dec 2016) + // will look up the INFO value. Here we use a made-up annotation Z instead of DP to avoid having to rig the test + // so that the INFO value will give the same matching results as the FORMAT value. @Test - public void testFilteringDPfromFORMATWithMissing() { + public void testFilteringZfromFORMATWithMissing() { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference - + " --genotypeFilterExpression 'DP < 10' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1, - Arrays.asList("cc55e6a7bae2ab3503ecefc973ec1c2d")); + + " --genotypeFilterExpression 'Z < 10' --genotypeFilterName lowZ -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1, + Arrays.asList("47607708dee31b6033f14a3613c8acb8")); executeTest("testFilteringDPfromFORMATWithMissing", spec); } + // Same comment as above. @Test - public void testFilteringDPfromFORMATAndFailMissing() { + public void testFilteringZfromFORMATAndFailMissing() { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference - + " --missingValuesInExpressionsShouldEvaluateAsFailing --genotypeFilterExpression 'DP < 10' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1, - Arrays.asList("521e6f33325a051ced28152a1e7c273d")); + + " --missingValuesInExpressionsShouldEvaluateAsFailing --genotypeFilterExpression 'Z < 10' --genotypeFilterName lowZ -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1, + Arrays.asList("4f519e725203931841940707c50ab6a3")); executeTest("testFilteringDPfromFORMATAndFailMissing", spec); } diff --git a/public/gatk-root/pom.xml b/public/gatk-root/pom.xml index 1ece58126..3ab6c68ad 100644 --- a/public/gatk-root/pom.xml +++ b/public/gatk-root/pom.xml @@ -44,8 +44,8 @@ org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter - 2.6.1 - 2.6.0 + 2.8.1 + 2.7.2 diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java index 9618497b2..de2b349a6 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java @@ -424,17 +424,8 @@ public class VariantFiltration extends RodWalker implements Tr // Add if expression filters the variant context for ( final VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExpressions ) { - try { - if (Utils.invertLogic(VariantContextUtils.match(vc, g, exp), invertGenotypeFilterExpression)) { - filters.add(exp.name); - } - } catch (final IllegalArgumentException e) { - // logic: right now (2016/08/18) if a filter is applied based on specific annotation and some sample contains missing value for such annotation, - // lower level code will throw IllegalArgumentException, therefore we specifically catch this type of exception - // do nothing unless specifically asked to; it just means that the expression isn't defined for this context - if ( failIfMissingValues ) { - filters.add(exp.name); - } + if (matchesFilter(vc, g, exp, invertGenotypeFilterExpression, failIfMissingValues)){ + filters.add(exp.name); } } @@ -480,20 +471,18 @@ public class VariantFiltration extends RodWalker implements Tr final Set filters = new LinkedHashSet<>(vc.getFilters()); for ( final VariantContextUtils.JexlVCMatchExp exp : vcFilterExpressions ) { - try { - if ( Utils.invertLogic(VariantContextUtils.match(vc, exp), invertVCfilterExpression) ) { - filters.add(exp.name); - } - } catch (final Exception e) { - // do nothing unless specifically asked to; it just means that the expression isn't defined for this context - if ( failIfMissingValues ) { - filters.add(exp.name); - } + if (matchesFilter(vc, null, exp, invertVCfilterExpression, failIfMissingValues)) { + filters.add(exp.name); } } return filters; } + private static boolean matchesFilter(final VariantContext vc, final Genotype g, final VariantContextUtils.JexlVCMatchExp exp, final boolean invertVCfilterExpression, final boolean failIfMissingValues) { + final JexlMissingValueTreatment howToTreatMissingValues = failIfMissingValues ? JexlMissingValueTreatment.TREAT_AS_MATCH : JexlMissingValueTreatment.TREAT_AS_MISMATCH; + return Utils.invertLogic(VariantContextUtils.match(vc, g, exp, howToTreatMissingValues), invertVCfilterExpression); + } + // ----------------------------------------------------------------------------------------------- // some other complications besides main stuff // ----------------------------------------------------------------------------------------------- diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationUnitTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationUnitTest.java index 5262345fc..f06f75733 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationUnitTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationUnitTest.java @@ -113,8 +113,8 @@ public class VariantFiltrationUnitTest extends BaseTest { final VariantContext vc = buildDataForFilters().make(); - final String filterName = "LowDP"; - final String filterExpr = "DP < 10"; + final String filterName = "LowZ"; //an attribute that doesn't appear in the VariantContext, so there isn't any chance of confusion like with the INFO DP + final String filterExpr = "Z < 10"; final List genotypeFilterExps = VariantContextUtils.initializeMatchExps(Arrays.asList(filterName), Arrays.asList(filterExpr)); @@ -197,33 +197,34 @@ public class VariantFiltrationUnitTest extends BaseTest { vcBuilder.attribute("SOR", 0.693); GenotypeBuilder gtBuilder = new GenotypeBuilder("one", Arrays.asList(altC,altC)); + final String ATTRIBUTE_NOT_IN_VC = "Z"; final Genotype firstSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR) - .DP(9) // edge case not passing "DP < 10" + .attribute(ATTRIBUTE_NOT_IN_VC, 9) // edge case not passing "Z < 10" .make(); gtBuilder = new GenotypeBuilder("two", Arrays.asList(altC,altC)); final Genotype secondSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR) - .DP(10) // edge case passing "DP < 10" + .attribute(ATTRIBUTE_NOT_IN_VC, 10) // edge case passing "Z = 10" .make(); gtBuilder = new GenotypeBuilder("three", Arrays.asList(nocall,nocall)); final Genotype thirdSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL) - .DP(3) + .attribute(ATTRIBUTE_NOT_IN_VC, 3) .make(); gtBuilder = new GenotypeBuilder("four", Arrays.asList(nocall,nocall)); final Genotype fourthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL) - .DP(0) + .attribute(ATTRIBUTE_NOT_IN_VC, (0)) .make(); gtBuilder = new GenotypeBuilder("five", Arrays.asList(nocall,nocall)); final Genotype fifthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL) - .DP(0) + .attribute(ATTRIBUTE_NOT_IN_VC, 0) .make(); gtBuilder = new GenotypeBuilder("six", Arrays.asList(altC,altC)); final Genotype sixthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR) - .DP(-1) + //no Z .make(); vcBuilder.genotypes(firstSample, secondSample, thirdSample, fourthSample, fifthSample, sixthSample);