From 4c4d048f1417c374e17569845996ec0de91b0d1c Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 19 Mar 2010 18:35:23 +0000 Subject: [PATCH] Moving VariantFiltration over to use VariantContext. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3048 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/filters/ClusteredSnps.java | 10 +-- .../walkers/filters/FiltrationContext.java | 50 +++++++++++++ .../walkers/filters/VariantContextWindow.java | 18 ++--- .../filters/VariantFiltrationWalker.java | 75 +++++++------------ .../VariantFiltrationIntegrationTest.java | 12 +-- 5 files changed, 96 insertions(+), 69 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java index 730e45b1e..5b828385c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java @@ -1,10 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; - public class ClusteredSnps { private int window = 10; @@ -19,13 +15,13 @@ public class ClusteredSnps { public boolean filter(VariantContextWindow contextWindow) { - Pair[] variants = contextWindow.getWindow(snpThreshold-1, snpThreshold-1); + FiltrationContext[] variants = contextWindow.getWindow(snpThreshold-1, snpThreshold-1); for (int i = 0; i < snpThreshold; i++) { if ( variants[i] == null || variants[i+snpThreshold-1] == null ) continue; - GenomeLoc left = variants[i].second.getLocation(); - GenomeLoc right = variants[i+snpThreshold-1].second.getLocation(); + GenomeLoc left = variants[i].getVariantContext().getLocation(); + GenomeLoc right = variants[i+snpThreshold-1].getVariantContext().getLocation(); if ( left.getContigIndex() == right.getContigIndex() && Math.abs(right.getStart() - left.getStart()) <= window ) return true; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java new file mode 100755 index 000000000..6d0afd594 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.filters; + +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; + + +public class FiltrationContext { + + private RefMetaDataTracker tracker; + private ReferenceContext ref; + private VariantContext vc; + + public FiltrationContext(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc) { + this.tracker = tracker; + this.ref = ref; + this.vc = vc; + } + + public RefMetaDataTracker getTracker() { return tracker; } + + public ReferenceContext getReferenceContext() { return ref; } + + public VariantContext getVariantContext() { return vc; } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantContextWindow.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantContextWindow.java index 52f14137a..d5e1daf8e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantContextWindow.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantContextWindow.java @@ -25,8 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.filters; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.utils.StingException; import java.util.*; @@ -38,17 +37,18 @@ import java.util.*; */ public class VariantContextWindow { + /** * The variants. */ - private LinkedList> window = new LinkedList>(); + private LinkedList window = new LinkedList(); private int currentContext; /** * Contructor for a variant context. * @param firstVariants the first set of variants, comprising the right half of the window */ - public VariantContextWindow(List> firstVariants) { + public VariantContextWindow(List firstVariants) { int windowSize = (firstVariants == null ? 1 : 2 * firstVariants.size() + 1); currentContext = (firstVariants == null ? 0 : firstVariants.size()); window.addAll(firstVariants); @@ -60,7 +60,7 @@ public class VariantContextWindow { * The context currently being examined. * @return The current context. */ - public Pair getContext() { + public FiltrationContext getContext() { return window.get(currentContext); } @@ -78,14 +78,14 @@ public class VariantContextWindow { * @param elementsToRight number of later contexts to return () * @return The current context window. */ - public Pair[] getWindow(int elementsToLeft, int elementsToRight) { + public FiltrationContext[] getWindow(int elementsToLeft, int elementsToRight) { if ( elementsToLeft > maxWindowElements() || elementsToRight > maxWindowElements() ) throw new StingException("Too large a window requested"); if ( elementsToLeft < 0 || elementsToRight < 0 ) throw new StingException("Window size cannot be negative"); - Pair[] array = new Pair[elementsToLeft + elementsToRight + 1]; - ListIterator> iter = window.listIterator(currentContext - elementsToLeft); + FiltrationContext[] array = new FiltrationContext[elementsToLeft + elementsToRight + 1]; + ListIterator iter = window.listIterator(currentContext - elementsToLeft); for (int i = 0; i < elementsToLeft + elementsToRight + 1; i++) array[i] = iter.next(); return array; @@ -95,7 +95,7 @@ public class VariantContextWindow { * Move the window along to the next context * @param context The new rightmost context */ - public void moveWindow(Pair context) { + public void moveWindow(FiltrationContext context) { window.removeFirst(); window.addLast(context); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 8086033a8..6afa6da0c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -1,11 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broadinstitute.sting.gatk.contexts.*; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -16,12 +17,13 @@ import org.apache.commons.jexl.*; /** * Filters variant calls using a number of user-selectable, parameterizable criteria. */ -@Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class)) +@Requires(value={},referenceMetaData=@RMD(name="variant",type= ReferenceOrderedDatum.class)) public class VariantFiltrationWalker extends RodWalker { - @Argument(fullName="filterExpression", shortName="filter", doc="Expression used with INFO fields to filter (see wiki docs for more info)", required=false) - protected String[] FILTER_STRINGS = new String[]{null}; - @Argument(fullName="filterName", shortName="filterName", doc="The text to put in the FILTER field if a filter expression is provided and a variant call matches", required=false) - protected String[] FILTER_NAMES = new String[]{"GATK_filter"}; + + @Argument(fullName="filterExpression", shortName="filter", doc="One or more expression used with INFO fields to filter (see wiki docs for more info)", required=false) + protected String[] FILTER_EXPS = new String[]{}; + @Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false) + protected String[] FILTER_NAMES = new String[]{}; @Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize)", required=false) protected Integer clusterSize = 3; @@ -31,6 +33,9 @@ public class VariantFiltrationWalker extends RodWalker { @Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false) protected String MASK_NAME = "Mask"; + // JEXL expressions for the filters + List filterExps; + public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster"; private VCFWriter writer = null; @@ -49,15 +54,13 @@ public class VariantFiltrationWalker extends RodWalker { } } - private List filterExpressions = new ArrayList(); - // the structures necessary to initialize and maintain a windowed context private VariantContextWindow variantContextWindow; private static final int windowSize = 10; // 10 variants on either end of the current one - private ArrayList> windowInitializer = new ArrayList>(); + private ArrayList windowInitializer = new ArrayList(); - private void initializeVcfWriter(RodVCF rod) { + private void initializeVcfWriter(VCFRecord rec) { // setup the header fields Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); @@ -67,8 +70,8 @@ public class VariantFiltrationWalker extends RodWalker { if ( clusterWindow > 0 ) hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters")); - for ( FilterExp exp : filterExpressions ) { - hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr)); + for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) { + hInfo.add(new VCFFilterHeaderLine(exp.name, exp.exp.toString())); } List dataSources = getToolkit().getRodDataSources(); @@ -80,26 +83,14 @@ public class VariantFiltrationWalker extends RodWalker { } writer = new VCFWriter(out); - writer.writeHeader(new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples())); + writer.writeHeader(new VCFHeader(hInfo, new TreeSet(Arrays.asList(rec.getSampleNames())))); } public void initialize() { if ( clusterWindow > 0 ) clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow); - if ( FILTER_NAMES.length != FILTER_STRINGS.length ) - throw new StingException("Inconsistent number of provided filter names and expressions."); - - for ( int i = 0; i < FILTER_NAMES.length; i++ ) { - if ( FILTER_STRINGS[i] != null ) { - try { - Expression filterExpression = ExpressionFactory.createExpression(FILTER_STRINGS[i]); - filterExpressions.add(new FilterExp(FILTER_NAMES[i], FILTER_STRINGS[i], filterExpression)); - } catch (Exception e) { - throw new StingException("Invalid expression used (" + FILTER_STRINGS[i] + "). Please see the JEXL docs for correct syntax."); - } - } - } + filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS); } public Integer reduceInit() { return 0; } @@ -121,8 +112,8 @@ public class VariantFiltrationWalker extends RodWalker { if ( rods == null || rods.size() == 0 ) return 0; - RodVCF variant = (RodVCF)rods.get(0); - Pair varContext = new Pair(tracker, variant); + VariantContext vc = VariantContextAdaptors.toVariantContext("variant", rods.get(0)); + FiltrationContext varContext = new FiltrationContext(tracker, ref, vc); // if we're still initializing the context, do so if ( windowInitializer != null ) { @@ -141,14 +132,14 @@ public class VariantFiltrationWalker extends RodWalker { private void filter() { // get the current context - Pair context = variantContextWindow.getContext(); + FiltrationContext context = variantContextWindow.getContext(); if ( context == null ) return; StringBuilder filterString = new StringBuilder(); // test for SNP mask, if present - RODRecordList mask = context.first.getTrackData("mask", null); + RODRecordList mask = context.getTracker().getTrackData("mask", null); if ( mask != null && mask.size() > 0 ) addFilter(filterString, MASK_NAME); @@ -156,22 +147,13 @@ public class VariantFiltrationWalker extends RodWalker { if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) ) addFilter(filterString, CLUSTERED_SNP_FILTER_NAME); - for ( FilterExp exp : filterExpressions ) { - Map infoMap = new HashMap(context.second.mCurrentRecord.getInfoValues()); - infoMap.put("QUAL", String.valueOf(context.second.mCurrentRecord.getQual())); - - JexlContext jContext = JexlHelper.createContext(); - jContext.setVars(infoMap); - - try { - if ( (Boolean)exp.exp.evaluate(jContext) ) - addFilter(filterString, exp.name); - } catch (Exception e) { - throw new StingException("Error evaluating filter expression for given input. Most often this is caused by a malformatted expression string, such as using an integral comparison for floating-point values. Observed value was: "+e.getMessage()); - } + VariantContext vc = context.getVariantContext(); + for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) { + if ( VariantContextUtils.match(vc, exp) ) + addFilter(filterString, exp.name); } - writeVCF(context.second, filterString); + writeVCF(VariantContextAdaptors.toVCF(vc, context.getReferenceContext().getBase(), null, true), filterString); } private static void addFilter(StringBuilder sb, String filter) { @@ -180,8 +162,7 @@ public class VariantFiltrationWalker extends RodWalker { sb.append(filter); } - private void writeVCF(RodVCF variant, StringBuilder filterString) { - VCFRecord rec = variant.mCurrentRecord; + private void writeVCF(VCFRecord rec, StringBuilder filterString) { if ( filterString.length() != 0 ) { // if the record is already filtered, don't destroy those filters @@ -195,7 +176,7 @@ public class VariantFiltrationWalker extends RodWalker { } if ( writer == null ) - initializeVcfWriter(variant); + initializeVcfWriter(rec); writer.addRecord(rec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 2dfa478b4..b98648a31 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testNoAction() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("e8e1898d65eb77ec32b7eca6764864f3")); + Arrays.asList("af14c7d03e3516b61f2702c9e4a7780f")); executeTest("test no action", spec); } @@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testClusteredSnps() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -window 10 -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("25da669bc6411d31f585b01be85e8429")); + Arrays.asList("b2b18929289bb47f07bcc23d4cec94c4")); executeTest("test clustered SNPs", spec); } @@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testMask() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -mask foo -B mask,VCF," + validationDataLocation + "vcfexample2.vcf -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("bd08169351c4cc9a2d95f33a2a73c7fa")); + Arrays.asList("af048053eaef84fc4d61c51c50be1e0a")); executeTest("test mask", spec); } @@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilter1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("9e7631aeda1c174f5aac6712279ed861")); + Arrays.asList("43a580d9c684a496f21d0d42939dd910")); executeTest("test filter #1", spec); } @@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilter2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("9009742f5697c5a85f9e41e1d7d6f260")); + Arrays.asList("2bd1e8975d8105dec2fb6055fbf00569")); executeTest("test filter #2", spec); } @@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilterWithSeparateNames() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --filterName ABF -filter 'AlleleBalance < 70.0' --filterName FSF -filter 'FisherStrand == 1.4' -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("6b280c5abd6e911df8e4fca58c7b216b")); + Arrays.asList("3479158ffd02a45371b5103277a30a53")); executeTest("test filter with separate names #2", spec); } } \ No newline at end of file