From e365d308d4198cb78f40dd58948f45bb38e5cafb Mon Sep 17 00:00:00 2001 From: aaron Date: Mon, 15 Mar 2010 16:00:55 +0000 Subject: [PATCH] add a new JEXLContext that lazy-evaluates JEXL expressions given the VariantContext. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3003 348d0f76-0448-11de-a6fe-93d51630548a --- .../variantcontext/VariantContextUtils.java | 69 ++--- .../variantcontext/VariantJEXLContext.java | 255 ++++++++++++++++++ .../VariantJEXLContextTest.java | 159 +++++++++++ 3 files changed, 438 insertions(+), 45 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java create mode 100644 java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index 881ebabae..3be9fa682 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -1,9 +1,31 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.gatk.contexts.variantcontext; import java.util.*; import org.apache.commons.jexl.*; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation; public class VariantContextUtils { @@ -97,50 +119,7 @@ public class VariantContextUtils { * @return true if there is a match */ public static Map match(VariantContext vc, Collection exps) { - // todo -- actually, we should implement a JEXL context interface to VariantContext, - // todo -- which just looks up the values assigned statically here. Much better approach - - Map infoMap = new HashMap(); - - infoMap.put("CHROM", vc.getLocation().getContig()); - infoMap.put("POS", String.valueOf(vc.getLocation().getStart())); - infoMap.put("TYPE", vc.getType().toString()); - infoMap.put("QUAL", String.valueOf(10 * vc.getNegLog10PError())); - - // add alleles - infoMap.put("ALLELES", Utils.join(";", vc.getAlleles())); - infoMap.put("N_ALLELES", String.valueOf(vc.getNAlleles())); - - // add attributes - addAttributesToMap(infoMap, vc.getAttributes(), ""); - - // add filter fields - infoMap.put("FILTER", String.valueOf(vc.isFiltered() ? "1" : "0")); - for ( Object filterCode : vc.getFilters() ) { - infoMap.put(String.valueOf(filterCode), "1"); - } - - // add genotypes - // todo -- comment this back in when we figure out how to represent it nicely -// for ( Genotype g : vc.getGenotypes().values() ) { -// String prefix = g.getSampleName() + "."; -// addAttributesToMap(infoMap, g.getAttributes(), prefix); -// infoMap.put(prefix + "GT", g.getGenotypeString()); -// } - - JexlContext jContext = JexlHelper.createContext(); - //System.out.printf(infoMap.toString()); - jContext.setVars(infoMap); - - try { - Map resultMap = new HashMap(); - for ( JexlVCMatchExp e : exps ) { - resultMap.put(e, (Boolean)e.exp.evaluate(jContext)); - } - return resultMap; - } catch (Exception e) { - throw new StingException(e.getMessage()); - } + return new VariantJEXLContext(exps,vc).getVars(); } diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java new file mode 100644 index 000000000..501fd4777 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.contexts.variantcontext; + +import org.apache.commons.jexl.JexlContext; +import org.apache.commons.jexl.JexlHelper; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.Utils; + +import java.util.*; + +/** + * + * @author aaron + * + * Class VariantJEXLContext + * + * implements the JEXML context for VariantContext; this saves us from + * having to generate a JEXML context lookup map everytime we want to evaluate an expression. + * + * This is package protected, only classes in variantcontext should have access to it. + */ + +class VariantJEXLContext implements JexlContext { + // our stored variant context + private final JEXLMap map; + + public VariantJEXLContext(Collection jexl, VariantContext vc) { + map = new JEXLMap(jexl, vc); + } + + @Override + public void setVars(Map map) { + throw new UnsupportedOperationException("this operation is unsupported"); + } + + @Override + public Map getVars() { + return map; + } +} + + +/** + * this is an implementation of a Map of JexlVCMatchExp to true or false values. It lazy initializes each value + * as requested to save as much processing time as possible. + * + * Compatible with JEXL 1.1 (this code will be easier if we move to 2.0, all of the functionality can go into the + * JexlContext's get() + * + */ + +class JEXLMap implements Map { + // our variant context + private final VariantContext vc; + + // our context + private JexlContext jContext = null; + + // our mapping from JEXLVCMatchExp to Booleans, which will be set to NULL for previously uncached JexlVCMatchExp + private final Map jexl; + + + public JEXLMap(Collection jexlCollection, VariantContext vc) { + this.vc = vc; + jexl = new HashMap(); + for (VariantContextUtils.JexlVCMatchExp exp: jexlCollection) { + jexl.put(exp, null); + } + } + + /** + * create the internal JexlContext, only when required. This code is where new JEXL context variables + * should get added. + * + * @param vc the VariantContext + * + */ + private void createContext(VariantContext vc) { + // create a mapping of what we know about the variant context, its Chromosome, positions, etc. + Map infoMap = new HashMap(); + infoMap.put("CHROM", vc.getLocation().getContig()); + infoMap.put("POS", String.valueOf(vc.getLocation().getStart())); + infoMap.put("TYPE", vc.getType().toString()); + infoMap.put("QUAL", String.valueOf(10 * vc.getNegLog10PError())); + + // add alleles + infoMap.put("ALLELES", Utils.join(";", vc.getAlleles())); + infoMap.put("N_ALLELES", String.valueOf(vc.getNAlleles())); + + // add attributes + addAttributesToMap(infoMap, vc.getAttributes()); + + // add filter fields + infoMap.put("FILTER", String.valueOf(vc.isFiltered() ? "1" : "0")); + for ( Object filterCode : vc.getFilters() ) { + infoMap.put(String.valueOf(filterCode), "1"); + } + + // add genotypes + // todo -- comment this back in when we figure out how to represent it nicely +// for ( Genotype g : vc.getGenotypes().values() ) { +// String prefix = g.getSampleName() + "."; +// addAttributesToMap(infoMap, g.getAttributes(), prefix); +// infoMap.put(prefix + "GT", g.getGenotypeString()); +// } + + + // create the internal context that we can evaluate expressions against + jContext = JexlHelper.createContext(); + jContext.setVars(infoMap); + } + + /** + * @return the size of the internal data structure + */ + @Override + public int size() { + return jexl.size(); + } + + /** + * @return true if we're empty + */ + @Override + public boolean isEmpty() { return this.jexl.isEmpty(); } + + /** + * do we contain the specified key + * @param o the key + * @return true if we have a value for that key + */ + @Override + public boolean containsKey(Object o) { return jexl.containsKey(o); } + + @Override + public Boolean get(Object o) { + // if we've already determined the value, return it + if (jexl.containsKey(o) && jexl.get(o) != null) return jexl.get(o); + + // try and cast the expression + VariantContextUtils.JexlVCMatchExp e = (VariantContextUtils.JexlVCMatchExp) o; + evaulateExpression(e); + return jexl.get(e); + } + + /** + * get the keyset of map + * @return a set of keys of type JexlVCMatchExp + */ + @Override + public Set keySet() { + return jexl.keySet(); + } + + /** + * get all the values of the map. This is an expensive call, since it evaluates all keys that haven't + * been evaluated yet. This is fine if you truely want all the keys, but if you only want a portion, or know + * the keys you want, you would be better off using get() to get them by name. + * @return a collection of boolean values, representing the results of all the variants evaluated + */ + @Override + public Collection values() { + // this is an expensive call + for (VariantContextUtils.JexlVCMatchExp exp : jexl.keySet()) + if (jexl.get(exp) == null) + evaulateExpression(exp); + return jexl.values(); + } + + /** + * evaulate a JexlVCMatchExp's expression, given the current context (and setup the context if it's null) + * @param exp the JexlVCMatchExp to evaluate + */ + private void evaulateExpression(VariantContextUtils.JexlVCMatchExp exp) { + // if the context is null, we need to create it to evaluate the JEXL expression + if (this.jContext == null) createContext(vc); + try { + jexl.put (exp, (Boolean) exp.exp.evaluate(jContext)); + } catch (Exception e) { + throw new StingException(e.getMessage()); + } + } + + /** + * helper function: adds the list of attributes to the information map we're building + * @param infoMap the map + * @param attributes the attributes + */ + private static void addAttributesToMap(Map infoMap, Map attributes ) { + for (Map.Entry e : attributes.entrySet()) { + infoMap.put(String.valueOf(e.getKey()), String.valueOf(e.getValue())); + } + } + + @Override + public Boolean put(VariantContextUtils.JexlVCMatchExp jexlVCMatchExp, Boolean aBoolean) { + return jexl.put(jexlVCMatchExp,aBoolean); + } + + @Override + public void putAll(Map map) { + jexl.putAll(map); + } + + // ////////////////////////////////////////////////////////////////////////////////////// + // The Following are unsupported at the moment + // ////////////////////////////////////////////////////////////////////////////////////// + + // this doesn't make much sense to implement, boolean doesn't offer too much variety to deal + // with evaluating every key in the internal map. + @Override + public boolean containsValue(Object o) { + throw new UnsupportedOperationException("containsValue() not supported on a JEXLMap"); + } + + // this doesn't make much sense + @Override + public Boolean remove(Object o) { + throw new UnsupportedOperationException("remove() not supported on a JEXLMap"); + } + + + @Override + public Set> entrySet() { + throw new UnsupportedOperationException("clear() not supported on a JEXLMap"); + } + + // nope + @Override + public void clear() { + throw new UnsupportedOperationException("clear() not supported on a JEXLMap"); + } +} diff --git a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextTest.java b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextTest.java new file mode 100644 index 000000000..cbccc70a5 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextTest.java @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.contexts.variantcontext; + +import net.sf.samtools.SAMFileHeader; +import org.apache.commons.jexl.ExpressionFactory; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; + + +/** + * + * @author aaron + * + * Class VariantJEXLContextTest + * + * Test out parts of the VariantJEXLContext + */ +public class VariantJEXLContextTest extends BaseTest { + + + private static String expression = "QUAL > 500.0"; + private static VariantContextUtils.JexlVCMatchExp exp; + + Allele A, Aref, T, Tref; + + Allele del, delRef, ATC, ATCref; + // A [ref] / T at 10 + + GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 10); + // - / ATC [ref] from 20-23 + + private static int startingChr = 1; + private static int endingChr = 2; + private static int readCount = 100; + private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH; + static SAMFileHeader header; + + @BeforeClass + public static void beforeClass() { + header = ArtificialSAMUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); + GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + try { + exp = new VariantContextUtils.JexlVCMatchExp("name", ExpressionFactory.createExpression(expression)); + } catch (Exception e) { + Assert.fail("Unable to create expression" + e.getMessage()); + } + } + + @Before + public void before() { + del = new Allele("-"); + delRef = new Allele("-", true); + + A = new Allele("A"); + Aref = new Allele("A", true); + T = new Allele("T"); + Tref = new Allele("T", true); + + ATC = new Allele("ATC"); + ATCref = new Allele("ATC", true); + } + + + @Test + public void testGetValue() { + VariantJEXLContext context = getVarContext(); + + Map map = context.getVars(); + + // make sure the context has a value + Assert.assertTrue(!map.isEmpty()); + Assert.assertEquals(1,map.size()); + + // eval our known expression + Assert.assertTrue(!map.get(exp)); + } + + @Test(expected=UnsupportedOperationException.class) + public void testContainsValue() { + VariantJEXLContext context = getVarContext(); + + Map map = context.getVars(); + + map.containsValue(exp); + } + + @Test(expected=UnsupportedOperationException.class) + public void testRemove() { + VariantJEXLContext context = getVarContext(); + + Map map = context.getVars(); + + map.remove(exp); + } + + @Test(expected=UnsupportedOperationException.class) + public void testEntrySet() { + VariantJEXLContext context = getVarContext(); + + Map map = context.getVars(); + + map.entrySet(); + } + + @Test(expected=UnsupportedOperationException.class) + public void testClear() { + VariantJEXLContext context = getVarContext(); + + Map map = context.getVars(); + + map.clear(); + } + + /** + * helper method + * @return a VariantJEXLContext + */ + private VariantJEXLContext getVarContext() { + List alleles = Arrays.asList(Aref, T); + + VariantContext vc = new VariantContext("test", snpLoc, alleles); + VariantJEXLContext context = new VariantJEXLContext(Arrays.asList(exp),vc); + return context; + } + + +}