Merge pull request #1533 from broadinstitute/lb_update_jexl_behavior

Rev htsjdk + update jexl behavior
This commit is contained in:
Geraldine Van der Auwera 2016-12-10 13:33:06 -05:00 committed by GitHub
commit dca24a9fa4
4 changed files with 31 additions and 36 deletions

View File

@ -211,21 +211,26 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
executeTest("testFilteringDPfromFORMAT", spec);
}
// The current htsjdk implementation of JEXL matching on genotype fields is buggy. When the filter uses an
// annotation that is present in both FORMAT and INFO, and the FORMAT value is missing, the current code (Dec 2016)
// will look up the INFO value. Here we use a made-up annotation Z instead of DP to avoid having to rig the test
// so that the INFO value will give the same matching results as the FORMAT value.
@Test
public void testFilteringDPfromFORMATWithMissing() {
public void testFilteringZfromFORMATWithMissing() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --genotypeFilterExpression 'DP < 10' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1,
Arrays.asList("cc55e6a7bae2ab3503ecefc973ec1c2d"));
+ " --genotypeFilterExpression 'Z < 10' --genotypeFilterName lowZ -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1,
Arrays.asList("47607708dee31b6033f14a3613c8acb8"));
executeTest("testFilteringDPfromFORMATWithMissing", spec);
}
// Same comment as above.
@Test
public void testFilteringDPfromFORMATAndFailMissing() {
public void testFilteringZfromFORMATAndFailMissing() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --missingValuesInExpressionsShouldEvaluateAsFailing --genotypeFilterExpression 'DP < 10' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1,
Arrays.asList("521e6f33325a051ced28152a1e7c273d"));
+ " --missingValuesInExpressionsShouldEvaluateAsFailing --genotypeFilterExpression 'Z < 10' --genotypeFilterName lowZ -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1,
Arrays.asList("4f519e725203931841940707c50ab6a3"));
executeTest("testFilteringDPfromFORMATAndFailMissing", spec);
}

View File

@ -44,8 +44,8 @@
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
<!-- Version numbers for picard and htsjdk -->
<htsjdk.version>2.6.1</htsjdk.version>
<picard.version>2.6.0</picard.version>
<htsjdk.version>2.8.1</htsjdk.version>
<picard.version>2.7.2</picard.version>
</properties>
<!-- Dependency configuration (versions, etc.) -->

View File

@ -424,18 +424,9 @@ public class VariantFiltration extends RodWalker<Integer, Integer> implements Tr
// Add if expression filters the variant context
for ( final VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExpressions ) {
try {
if (Utils.invertLogic(VariantContextUtils.match(vc, g, exp), invertGenotypeFilterExpression)) {
if (matchesFilter(vc, g, exp, invertGenotypeFilterExpression, failIfMissingValues)){
filters.add(exp.name);
}
} catch (final IllegalArgumentException e) {
// logic: right now (2016/08/18) if a filter is applied based on specific annotation and some sample contains missing value for such annotation,
// lower level code will throw IllegalArgumentException, therefore we specifically catch this type of exception
// do nothing unless specifically asked to; it just means that the expression isn't defined for this context
if ( failIfMissingValues ) {
filters.add(exp.name);
}
}
}
// if sample is filtered and --setFilteredGtToNocall, set genotype to non-call
@ -480,20 +471,18 @@ public class VariantFiltration extends RodWalker<Integer, Integer> implements Tr
final Set<String> filters = new LinkedHashSet<>(vc.getFilters());
for ( final VariantContextUtils.JexlVCMatchExp exp : vcFilterExpressions ) {
try {
if ( Utils.invertLogic(VariantContextUtils.match(vc, exp), invertVCfilterExpression) ) {
if (matchesFilter(vc, null, exp, invertVCfilterExpression, failIfMissingValues)) {
filters.add(exp.name);
}
} catch (final Exception e) {
// do nothing unless specifically asked to; it just means that the expression isn't defined for this context
if ( failIfMissingValues ) {
filters.add(exp.name);
}
}
}
return filters;
}
private static boolean matchesFilter(final VariantContext vc, final Genotype g, final VariantContextUtils.JexlVCMatchExp exp, final boolean invertVCfilterExpression, final boolean failIfMissingValues) {
final JexlMissingValueTreatment howToTreatMissingValues = failIfMissingValues ? JexlMissingValueTreatment.TREAT_AS_MATCH : JexlMissingValueTreatment.TREAT_AS_MISMATCH;
return Utils.invertLogic(VariantContextUtils.match(vc, g, exp, howToTreatMissingValues), invertVCfilterExpression);
}
// -----------------------------------------------------------------------------------------------
// some other complications besides main stuff
// -----------------------------------------------------------------------------------------------

View File

@ -113,8 +113,8 @@ public class VariantFiltrationUnitTest extends BaseTest {
final VariantContext vc = buildDataForFilters().make();
final String filterName = "LowDP";
final String filterExpr = "DP < 10";
final String filterName = "LowZ"; //an attribute that doesn't appear in the VariantContext, so there isn't any chance of confusion like with the INFO DP
final String filterExpr = "Z < 10";
final List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExps = VariantContextUtils.initializeMatchExps(Arrays.asList(filterName), Arrays.asList(filterExpr));
@ -197,33 +197,34 @@ public class VariantFiltrationUnitTest extends BaseTest {
vcBuilder.attribute("SOR", 0.693);
GenotypeBuilder gtBuilder = new GenotypeBuilder("one", Arrays.asList(altC,altC));
final String ATTRIBUTE_NOT_IN_VC = "Z";
final Genotype firstSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR)
.DP(9) // edge case not passing "DP < 10"
.attribute(ATTRIBUTE_NOT_IN_VC, 9) // edge case not passing "Z < 10"
.make();
gtBuilder = new GenotypeBuilder("two", Arrays.asList(altC,altC));
final Genotype secondSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR)
.DP(10) // edge case passing "DP < 10"
.attribute(ATTRIBUTE_NOT_IN_VC, 10) // edge case passing "Z = 10"
.make();
gtBuilder = new GenotypeBuilder("three", Arrays.asList(nocall,nocall));
final Genotype thirdSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL)
.DP(3)
.attribute(ATTRIBUTE_NOT_IN_VC, 3)
.make();
gtBuilder = new GenotypeBuilder("four", Arrays.asList(nocall,nocall));
final Genotype fourthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL)
.DP(0)
.attribute(ATTRIBUTE_NOT_IN_VC, (0))
.make();
gtBuilder = new GenotypeBuilder("five", Arrays.asList(nocall,nocall));
final Genotype fifthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL)
.DP(0)
.attribute(ATTRIBUTE_NOT_IN_VC, 0)
.make();
gtBuilder = new GenotypeBuilder("six", Arrays.asList(altC,altC));
final Genotype sixthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR)
.DP(-1)
//no Z
.make();
vcBuilder.genotypes(firstSample, secondSample, thirdSample, fourthSample, fifthSample, sixthSample);