From 5aaf4e64349e0021fcaaf0f8191b317bc9686e1f Mon Sep 17 00:00:00 2001 From: depristo Date: Fri, 29 Jan 2010 12:13:08 +0000 Subject: [PATCH] VariantFiltration now accepts any number of --name --filter expressions, and annotates the VCF file with each name that matches. Very useful git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2732 348d0f76-0448-11de-a6fe-93d51630548a --- .../filters/VariantFiltrationWalker.java | 49 +++++++++++++------ .../VariantFiltrationIntegrationTest.java | 8 +++ 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 21679e825..8fb655d30 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -18,9 +18,9 @@ import org.apache.commons.jexl.*; @Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class)) public class VariantFiltrationWalker extends RodWalker { @Argument(fullName="filterExpression", shortName="filter", doc="Expression used with INFO fields to filter (see wiki docs for more info)", required=false) - protected String FILTER_STRING = null; + protected String[] FILTER_STRINGS = new String[]{null}; @Argument(fullName="filterName", shortName="filterName", doc="The text to put in the FILTER field if a filter expression is provided and a variant call matches", required=false) - protected String FILTER_NAME = "GATK_filter"; + protected String[] FILTER_NAMES = new String[]{"GATK_filter"}; @Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize)", required=false) protected Integer clusterSize = 3; @@ -32,12 +32,23 @@ public class VariantFiltrationWalker extends RodWalker { public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster"; - private VCFWriter writer = null; private ClusteredSnps clusteredSNPs = null; - private Expression filterExpression = null; + class FilterExp { + String name; + String expStr; + Expression exp; + + public FilterExp(String name, String str, Expression exp) { + this.name = name; + this.expStr = str; + this.exp = exp; + } + } + + private List filterExpressions = new ArrayList(); // the structures necessary to initialize and maintain a windowed context private VariantContextWindow variantContextWindow; @@ -54,8 +65,11 @@ public class VariantFiltrationWalker extends RodWalker { if ( clusterWindow > 0 ) hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters")); - if ( filterExpression != null ) - hInfo.add(new VCFFilterHeaderLine(FILTER_NAME, FILTER_STRING)); + + for ( FilterExp exp : filterExpressions ) { + hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr)); + } + List dataSources = getToolkit().getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { if ( source.getReferenceOrderedData().getName().equals("mask") ) { @@ -72,11 +86,18 @@ public class VariantFiltrationWalker extends RodWalker { if ( clusterWindow > 0 ) clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow); - try { - if ( FILTER_STRING != null ) - filterExpression = ExpressionFactory.createExpression(FILTER_STRING); - } catch (Exception e) { - throw new StingException("Invalid expression used (" + FILTER_STRING + "). Please see the JEXL docs for correct syntax."); + if ( FILTER_NAMES.length != FILTER_STRINGS.length ) + throw new StingException("Inconsistent number of provided filter names and expressions."); + + for ( int i = 0; i < FILTER_NAMES.length; i++ ) { + if ( FILTER_STRINGS[i] != null ) { + try { + Expression filterExpression = ExpressionFactory.createExpression(FILTER_STRINGS[i]); + filterExpressions.add(new FilterExp(FILTER_NAMES[i], FILTER_STRINGS[i], filterExpression)); + } catch (Exception e) { + throw new StingException("Invalid expression used (" + FILTER_STRINGS[i] + "). Please see the JEXL docs for correct syntax."); + } + } } } @@ -134,7 +155,7 @@ public class VariantFiltrationWalker extends RodWalker { if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) ) addFilter(filterString, CLUSTERED_SNP_FILTER_NAME); - if ( filterExpression != null ) { + for ( FilterExp exp : filterExpressions ) { Map infoMap = new HashMap(context.second.mCurrentRecord.getInfoValues()); infoMap.put("QUAL", String.valueOf(context.second.mCurrentRecord.getQual())); @@ -142,8 +163,8 @@ public class VariantFiltrationWalker extends RodWalker { jContext.setVars(infoMap); try { - if ( (Boolean)filterExpression.evaluate(jContext) ) - addFilter(filterString, FILTER_NAME); + if ( (Boolean)exp.exp.evaluate(jContext) ) + addFilter(filterString, exp.name); } catch (Exception e) { throw new StingException(e.getMessage()); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index da75a7b70..419cd176e 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -51,4 +51,12 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { Arrays.asList("18ef67575a76c2be74af8bd8b9fdf86e")); executeTest("test filter #2", spec); } + + @Test + public void testFilterWithSeparateNames() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " --filterName ABF -filter 'AlleleBalance < 70.0' --filterName FSF -filter 'FisherStrand == 1.4' -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("7de66cac85cfe8a70219e51a5f6c251c")); + executeTest("test filter with separate names #2", spec); + } } \ No newline at end of file