Merge pull request #1533 from broadinstitute/lb_update_jexl_behavior

Rev htsjdk + update jexl behavior
This commit is contained in:
Geraldine Van der Auwera 2016-12-10 13:33:06 -05:00 committed by GitHub
commit dca24a9fa4
4 changed files with 31 additions and 36 deletions

View File

@ -211,21 +211,26 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
executeTest("testFilteringDPfromFORMAT", spec); executeTest("testFilteringDPfromFORMAT", spec);
} }
// The current htsjdk implementation of JEXL matching on genotype fields is buggy. When the filter uses an
// annotation that is present in both FORMAT and INFO, and the FORMAT value is missing, the current code (Dec 2016)
// will look up the INFO value. Here we use a made-up annotation Z instead of DP to avoid having to rig the test
// so that the INFO value will give the same matching results as the FORMAT value.
@Test @Test
public void testFilteringDPfromFORMATWithMissing() { public void testFilteringZfromFORMATWithMissing() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --genotypeFilterExpression 'DP < 10' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1, + " --genotypeFilterExpression 'Z < 10' --genotypeFilterName lowZ -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1,
Arrays.asList("cc55e6a7bae2ab3503ecefc973ec1c2d")); Arrays.asList("47607708dee31b6033f14a3613c8acb8"));
executeTest("testFilteringDPfromFORMATWithMissing", spec); executeTest("testFilteringDPfromFORMATWithMissing", spec);
} }
// Same comment as above.
@Test @Test
public void testFilteringDPfromFORMATAndFailMissing() { public void testFilteringZfromFORMATAndFailMissing() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --missingValuesInExpressionsShouldEvaluateAsFailing --genotypeFilterExpression 'DP < 10' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1, + " --missingValuesInExpressionsShouldEvaluateAsFailing --genotypeFilterExpression 'Z < 10' --genotypeFilterName lowZ -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1,
Arrays.asList("521e6f33325a051ced28152a1e7c273d")); Arrays.asList("4f519e725203931841940707c50ab6a3"));
executeTest("testFilteringDPfromFORMATAndFailMissing", spec); executeTest("testFilteringDPfromFORMATAndFailMissing", spec);
} }

View File

@ -44,8 +44,8 @@
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners> <test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
<!-- Version numbers for picard and htsjdk --> <!-- Version numbers for picard and htsjdk -->
<htsjdk.version>2.6.1</htsjdk.version> <htsjdk.version>2.8.1</htsjdk.version>
<picard.version>2.6.0</picard.version> <picard.version>2.7.2</picard.version>
</properties> </properties>
<!-- Dependency configuration (versions, etc.) --> <!-- Dependency configuration (versions, etc.) -->

View File

@ -424,18 +424,9 @@ public class VariantFiltration extends RodWalker<Integer, Integer> implements Tr
// Add if expression filters the variant context // Add if expression filters the variant context
for ( final VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExpressions ) { for ( final VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExpressions ) {
try { if (matchesFilter(vc, g, exp, invertGenotypeFilterExpression, failIfMissingValues)){
if (Utils.invertLogic(VariantContextUtils.match(vc, g, exp), invertGenotypeFilterExpression)) {
filters.add(exp.name); filters.add(exp.name);
} }
} catch (final IllegalArgumentException e) {
// logic: right now (2016/08/18) if a filter is applied based on specific annotation and some sample contains missing value for such annotation,
// lower level code will throw IllegalArgumentException, therefore we specifically catch this type of exception
// do nothing unless specifically asked to; it just means that the expression isn't defined for this context
if ( failIfMissingValues ) {
filters.add(exp.name);
}
}
} }
// if sample is filtered and --setFilteredGtToNocall, set genotype to non-call // if sample is filtered and --setFilteredGtToNocall, set genotype to non-call
@ -480,20 +471,18 @@ public class VariantFiltration extends RodWalker<Integer, Integer> implements Tr
final Set<String> filters = new LinkedHashSet<>(vc.getFilters()); final Set<String> filters = new LinkedHashSet<>(vc.getFilters());
for ( final VariantContextUtils.JexlVCMatchExp exp : vcFilterExpressions ) { for ( final VariantContextUtils.JexlVCMatchExp exp : vcFilterExpressions ) {
try { if (matchesFilter(vc, null, exp, invertVCfilterExpression, failIfMissingValues)) {
if ( Utils.invertLogic(VariantContextUtils.match(vc, exp), invertVCfilterExpression) ) {
filters.add(exp.name); filters.add(exp.name);
} }
} catch (final Exception e) {
// do nothing unless specifically asked to; it just means that the expression isn't defined for this context
if ( failIfMissingValues ) {
filters.add(exp.name);
}
}
} }
return filters; return filters;
} }
private static boolean matchesFilter(final VariantContext vc, final Genotype g, final VariantContextUtils.JexlVCMatchExp exp, final boolean invertVCfilterExpression, final boolean failIfMissingValues) {
final JexlMissingValueTreatment howToTreatMissingValues = failIfMissingValues ? JexlMissingValueTreatment.TREAT_AS_MATCH : JexlMissingValueTreatment.TREAT_AS_MISMATCH;
return Utils.invertLogic(VariantContextUtils.match(vc, g, exp, howToTreatMissingValues), invertVCfilterExpression);
}
// ----------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------
// some other complications besides main stuff // some other complications besides main stuff
// ----------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------

View File

@ -113,8 +113,8 @@ public class VariantFiltrationUnitTest extends BaseTest {
final VariantContext vc = buildDataForFilters().make(); final VariantContext vc = buildDataForFilters().make();
final String filterName = "LowDP"; final String filterName = "LowZ"; //an attribute that doesn't appear in the VariantContext, so there isn't any chance of confusion like with the INFO DP
final String filterExpr = "DP < 10"; final String filterExpr = "Z < 10";
final List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExps = VariantContextUtils.initializeMatchExps(Arrays.asList(filterName), Arrays.asList(filterExpr)); final List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExps = VariantContextUtils.initializeMatchExps(Arrays.asList(filterName), Arrays.asList(filterExpr));
@ -197,33 +197,34 @@ public class VariantFiltrationUnitTest extends BaseTest {
vcBuilder.attribute("SOR", 0.693); vcBuilder.attribute("SOR", 0.693);
GenotypeBuilder gtBuilder = new GenotypeBuilder("one", Arrays.asList(altC,altC)); GenotypeBuilder gtBuilder = new GenotypeBuilder("one", Arrays.asList(altC,altC));
final String ATTRIBUTE_NOT_IN_VC = "Z";
final Genotype firstSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR) final Genotype firstSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR)
.DP(9) // edge case not passing "DP < 10" .attribute(ATTRIBUTE_NOT_IN_VC, 9) // edge case not passing "Z < 10"
.make(); .make();
gtBuilder = new GenotypeBuilder("two", Arrays.asList(altC,altC)); gtBuilder = new GenotypeBuilder("two", Arrays.asList(altC,altC));
final Genotype secondSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR) final Genotype secondSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR)
.DP(10) // edge case passing "DP < 10" .attribute(ATTRIBUTE_NOT_IN_VC, 10) // edge case passing "Z = 10"
.make(); .make();
gtBuilder = new GenotypeBuilder("three", Arrays.asList(nocall,nocall)); gtBuilder = new GenotypeBuilder("three", Arrays.asList(nocall,nocall));
final Genotype thirdSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL) final Genotype thirdSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL)
.DP(3) .attribute(ATTRIBUTE_NOT_IN_VC, 3)
.make(); .make();
gtBuilder = new GenotypeBuilder("four", Arrays.asList(nocall,nocall)); gtBuilder = new GenotypeBuilder("four", Arrays.asList(nocall,nocall));
final Genotype fourthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL) final Genotype fourthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL)
.DP(0) .attribute(ATTRIBUTE_NOT_IN_VC, (0))
.make(); .make();
gtBuilder = new GenotypeBuilder("five", Arrays.asList(nocall,nocall)); gtBuilder = new GenotypeBuilder("five", Arrays.asList(nocall,nocall));
final Genotype fifthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL) final Genotype fifthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL)
.DP(0) .attribute(ATTRIBUTE_NOT_IN_VC, 0)
.make(); .make();
gtBuilder = new GenotypeBuilder("six", Arrays.asList(altC,altC)); gtBuilder = new GenotypeBuilder("six", Arrays.asList(altC,altC));
final Genotype sixthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR) final Genotype sixthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR)
.DP(-1) //no Z
.make(); .make();
vcBuilder.genotypes(firstSample, secondSample, thirdSample, fourthSample, fifthSample, sixthSample); vcBuilder.genotypes(firstSample, secondSample, thirdSample, fourthSample, fifthSample, sixthSample);