JEXL2 upgrade. Improvements to JEXL processing including dynamically resolving variable -> value bindings instead of up front adding them to a map. Performance improvements and code cleanup throughout.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3494 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-06-07 00:33:02 +00:00
parent c1ecf75dd5
commit 6eeb1693ca
6 changed files with 148 additions and 92 deletions

View File

@ -36,7 +36,8 @@
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3" conf="default"/>
<!-- Dependencies for VariantFiltration -->
<dependency org="commons-jexl" name="commons-jexl" rev="1.1" conf="default"/>
<!-- <dependency org="commons-jexl" name="commons-jexl" rev="1.1" conf="default"/> -->
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0" conf="default"/>
<dependency org="commons-logging" name="commons-logging" rev="1.1.1" conf="default"/>
<dependency org="commons-io" name="commons-io" rev="1.3.2" conf="default"/>

View File

@ -24,13 +24,15 @@
package org.broadinstitute.sting.gatk.contexts.variantcontext;
import java.util.*;
import org.apache.commons.jexl.*;
import org.apache.commons.jexl2.*;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
import org.broad.tribble.vcf.VCFRecord;
public class VariantContextUtils {
public static JexlEngine engine = new JexlEngine();
/**
* A simple but common wrapper for matching VariantContext objects using JEXL expressions
*/
@ -90,7 +92,7 @@ public class VariantContextUtils {
if ( name == null || expStr == null ) throw new IllegalArgumentException("Cannot create null expressions : " + name + " " + expStr);
try {
Expression exp = ExpressionFactory.createExpression(expStr);
Expression exp = engine.createExpression(expStr);
exps.add(new JexlVCMatchExp(name, exp));
} catch (Exception e) {
throw new StingException("Invalid expression used (" + expStr + "). Please see the JEXL docs for correct syntax.");
@ -121,7 +123,7 @@ public class VariantContextUtils {
* @return true if there is a match
*/
public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Collection<JexlVCMatchExp> exps) {
return new VariantJEXLContext(exps,vc).getVars();
return new JEXLMap(exps,vc);
}
@ -146,16 +148,10 @@ public class VariantContextUtils {
* @return true if there is a match
*/
public static Map<JexlVCMatchExp, Boolean> match(Genotype g, Collection<JexlVCMatchExp> exps) {
return new VariantJEXLContext(exps,g).getVars();
return new JEXLMap(exps,g);
}
private static void addAttributesToMap(Map<String, String> infoMap, Map<String, ?> attributes, String prefix ) {
for (Map.Entry<String, ?> e : attributes.entrySet()) {
infoMap.put(prefix + String.valueOf(e.getKey()), String.valueOf(e.getValue()));
}
}
public static double computeHardyWeinbergPvalue(VariantContext vc) {
if ( vc.getChromosomeCount() == 0 )
return 0.0;

View File

@ -23,8 +23,9 @@
package org.broadinstitute.sting.gatk.contexts.variantcontext;
import org.apache.commons.jexl.JexlContext;
import org.apache.commons.jexl.JexlHelper;
import org.apache.commons.jexl2.JexlContext;
import org.apache.commons.jexl2.MapContext;
//import org.apache.commons.jexl2.JexlHelper;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
@ -44,26 +45,68 @@ import java.util.*;
class VariantJEXLContext implements JexlContext {
// our stored variant context
private final JEXLMap map;
private VariantContext vc;
private Genotype g;
public VariantJEXLContext(Collection<VariantContextUtils.JexlVCMatchExp> jexl, VariantContext vc) {
map = new JEXLMap(jexl, vc);
private interface AttributeGetter {
public Object get(VariantContext vc);
}
public VariantJEXLContext(Collection<VariantContextUtils.JexlVCMatchExp> jexl, Genotype g) {
map = new JEXLMap(jexl, g);
private static Map<String, AttributeGetter> x = new HashMap<String, AttributeGetter>();
static {
x.put("CHROM", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getLocation().getContig(); }});
x.put("POS", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getLocation().getStart(); }});
x.put("TYPE", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getType().toString(); }});
x.put("QUAL", new AttributeGetter() { public Object get(VariantContext vc) { return 10 * vc.getNegLog10PError(); }});
x.put("ALLELES", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getAlleles(); }});
x.put("N_ALLELES", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getNAlleles(); }});
x.put("FILTER", new AttributeGetter() { public Object get(VariantContext vc) { return vc.isFiltered() ? "1" : "0"; }});
// x.put("GT", new AttributeGetter() { public Object get(VariantContext vc) { return g.getGenotypeString(); }});
// x.put("isHomRef", new AttributeGetter() { public Object get(VariantContext vc) { return g.isHomRef() ? "1" : "0"; }});
// x.put("isHet", new AttributeGetter() { public Object get(VariantContext vc) { return g.isHet() ? "1" : "0"; }});
// x.put("isHomVar", new AttributeGetter() { public Object get(VariantContext vc) { return g.isHomVar() ? "1" : "0"; }});
}
public void setVars(Map map) {
throw new UnsupportedOperationException("this operation is unsupported");
public VariantJEXLContext(VariantContext vc) {
this(vc, null);
}
public Map getVars() {
return map;
public VariantJEXLContext(VariantContext vc, Genotype g) {
this.vc = vc;
this.g = g;
//throw new UnsupportedOperationException("Cannot instantiate VariantJEXLContext");
}
public Object get(String name) {
Object result = null;
if ( x.containsKey(name) ) { // dynamic resolution of name -> value via map
result = x.get(name).get(vc);
} else if ( vc.hasAttribute(name)) {
result = vc.getAttribute(name);
} else if ( vc.getFilters().contains(name) ) {
result = "1";
}
//System.out.printf("dynamic lookup %s => %s%n", name, result);
return result;
}
public boolean has(String name) {
return get(name) != null;
}
public void set(String name, Object value) {
throw new UnsupportedOperationException("remove() not supported on a VariantJEXLContext");
}
}
/**
* this is an implementation of a Map of JexlVCMatchExp to true or false values. It lazy initializes each value
* as requested to save as much processing time as possible.
@ -111,52 +154,60 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
* should get added.
*
*/
private static final boolean USE_VCONTEXT = true;
private void createContext() {
if ( USE_VCONTEXT && g == null ) {
jContext = new VariantJEXLContext(vc, g);
} else {
Map<String, String> infoMap = new HashMap<String, String>();
Map<String, Object> infoMap = new HashMap<String, Object>();
if ( vc != null ) {
// create a mapping of what we know about the variant context, its Chromosome, positions, etc.
infoMap.put("CHROM", vc.getLocation().getContig());
infoMap.put("POS", String.valueOf(vc.getLocation().getStart()));
infoMap.put("TYPE", vc.getType().toString());
infoMap.put("QUAL", String.valueOf(10 * vc.getNegLog10PError()));
if ( vc != null ) {
// create a mapping of what we know about the variant context, its Chromosome, positions, etc.
infoMap.put("CHROM", vc.getLocation().getContig());
infoMap.put("POS", String.valueOf(vc.getLocation().getStart()));
infoMap.put("TYPE", vc.getType().toString());
infoMap.put("QUAL", String.valueOf(10 * vc.getNegLog10PError()));
// add alleles
infoMap.put("ALLELES", Utils.join(";", vc.getAlleles()));
infoMap.put("N_ALLELES", String.valueOf(vc.getNAlleles()));
// add alleles
infoMap.put("ALLELES", Utils.join(";", vc.getAlleles()));
infoMap.put("N_ALLELES", String.valueOf(vc.getNAlleles()));
// add attributes
addAttributesToMap(infoMap, vc.getAttributes());
// add attributes
addAttributesToMap(infoMap, vc.getAttributes());
// add filter fields
infoMap.put("FILTER", vc.isFiltered() ? "1" : "0");
for ( Object filterCode : vc.getFilters() ) {
infoMap.put(String.valueOf(filterCode), "1");
// add filter fields
infoMap.put("FILTER", vc.isFiltered() ? "1" : "0");
for ( Object filterCode : vc.getFilters() ) {
infoMap.put(String.valueOf(filterCode), "1");
}
// add genotype-specific fields
// TODO -- implement me when we figure out a good way to represent this
// for ( Genotype g : vc.getGenotypes().values() ) {
// String prefix = g.getSampleName() + ".";
// addAttributesToMap(infoMap, g.getAttributes(), prefix);
// infoMap.put(prefix + "GT", g.getGenotypeString());
// }
}
// add genotype-specific fields
// TODO -- implement me when we figure out a good way to represent this
// for ( Genotype g : vc.getGenotypes().values() ) {
// String prefix = g.getSampleName() + ".";
// addAttributesToMap(infoMap, g.getAttributes(), prefix);
// infoMap.put(prefix + "GT", g.getGenotypeString());
// }
}
// add specific genotype if one is provided
if ( g != null ) {
infoMap.put("GT", g.getGenotypeString());
infoMap.put("isHomRef", g.isHomRef() ? "1" : "0");
infoMap.put("isHet", g.isHet() ? "1" : "0");
infoMap.put("isHomVar", g.isHomVar() ? "1" : "0");
for ( Map.Entry<String, Object> e : g.getAttributes().entrySet() )
infoMap.put(e.getKey(), String.valueOf(e.getValue()));
}
// add specific genotype if one is provided
if ( g != null ) {
infoMap.put("GT", g.getGenotypeString());
infoMap.put("isHomRef", g.isHomRef() ? "1" : "0");
infoMap.put("isHet", g.isHet() ? "1" : "0");
infoMap.put("isHomVar", g.isHomVar() ? "1" : "0");
for ( Map.Entry<String, Object> e : g.getAttributes().entrySet() )
infoMap.put(e.getKey(), String.valueOf(e.getValue()));
}
// create the internal context that we can evaluate expressions against
jContext = JexlHelper.createContext();
jContext.setVars(infoMap);
// create the internal context that we can evaluate expressions against
jContext = new MapContext(infoMap);
// jContext = JexlHelper.createContext();
// jContext.setVars(infoMap);
}
}
/**
@ -184,7 +235,7 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
// try and cast the expression
VariantContextUtils.JexlVCMatchExp e = (VariantContextUtils.JexlVCMatchExp) o;
evaulateExpression(e);
evaluateExpression(e);
return jexl.get(e);
}
@ -206,7 +257,7 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
// this is an expensive call
for (VariantContextUtils.JexlVCMatchExp exp : jexl.keySet())
if (jexl.get(exp) == null)
evaulateExpression(exp);
evaluateExpression(exp);
return jexl.values();
}
@ -214,7 +265,7 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
* evaulate a JexlVCMatchExp's expression, given the current context (and setup the context if it's null)
* @param exp the JexlVCMatchExp to evaluate
*/
private void evaulateExpression(VariantContextUtils.JexlVCMatchExp exp) {
private void evaluateExpression(VariantContextUtils.JexlVCMatchExp exp) {
// if the context is null, we need to create it to evaluate the JEXL expression
if (this.jContext == null) createContext();
try {
@ -229,7 +280,7 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
* @param infoMap the map
* @param attributes the attributes
*/
private static void addAttributesToMap(Map<String, String> infoMap, Map<String, ?> attributes ) {
private static void addAttributesToMap(Map<String, Object> infoMap, Map<String, ?> attributes ) {
for (Map.Entry<String, ?> e : attributes.entrySet()) {
infoMap.put(e.getKey(), String.valueOf(e.getValue()));
}

View File

@ -25,13 +25,13 @@
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
import org.apache.commons.jexl.Expression;
import org.apache.commons.jexl.ExpressionFactory;
import org.apache.commons.jexl.JexlContext;
import org.apache.commons.jexl.JexlHelper;
import org.apache.commons.jexl2.Expression;
import org.apache.commons.jexl2.JexlContext;
import org.apache.commons.jexl2.MapContext;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
@ -43,7 +43,7 @@ import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.util.*;
/**
* Selects variant calls for output from a user-supplied VCF file using a number of user-selectable, parameterizable criteria.
* Selects variant calls for output from a user-supplied VCF file using a number of user-selectable, parameterizable criteria. [TODO -- update to new walker style]
*/
@Requires(value={},referenceMetaData=@RMD(name="variant",type= VCFRecord.class))
public class VCFSelectWalker extends RodWalker<Integer, Integer> {
@ -85,7 +85,7 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
for ( int i = 0; i < MATCH_STRINGS.length; i++ ) {
if ( MATCH_STRINGS[i] != null ) {
try {
Expression filterExpression = ExpressionFactory.createExpression(MATCH_STRINGS[i]);
Expression filterExpression = VariantContextUtils.engine.createExpression(MATCH_STRINGS[i]);
matchExpressions.add(new MatchExp(String.format("match-%d", i), MATCH_STRINGS[i], filterExpression));
} catch (Exception e) {
throw new StingException("Invalid expression used (" + MATCH_STRINGS[i] + "). Please see the JEXL docs for correct syntax.");
@ -115,11 +115,10 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
boolean someoneMatched = false;
for ( MatchExp exp : matchExpressions ) {
Map<String, String> infoMap = new HashMap<String, String>(variant.getInfoValues());
Map<String, Object> infoMap = new HashMap<String, Object>(variant.getInfoValues());
infoMap.put("QUAL", String.valueOf(variant.getQual()));
JexlContext jContext = JexlHelper.createContext();
jContext.setVars(infoMap);
JexlContext jContext = new MapContext(infoMap);
try {
//System.out.printf("Matching %s vs. %s%n", infoMap, exp.expStr);

View File

@ -24,7 +24,6 @@
package org.broadinstitute.sting.gatk.contexts.variantcontext;
import net.sf.samtools.SAMFileHeader;
import org.apache.commons.jexl.ExpressionFactory;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -72,7 +71,7 @@ public class VariantJEXLContextUnitTest extends BaseTest {
header = ArtificialSAMUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
try {
exp = new VariantContextUtils.JexlVCMatchExp("name", ExpressionFactory.createExpression(expression));
exp = new VariantContextUtils.JexlVCMatchExp("name", VariantContextUtils.engine.createExpression(expression));
} catch (Exception e) {
Assert.fail("Unable to create expression" + e.getMessage());
}
@ -95,9 +94,7 @@ public class VariantJEXLContextUnitTest extends BaseTest {
@Test
public void testGetValue() {
VariantJEXLContext context = getVarContext();
Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = context.getVars();
Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = getVarContext();
// make sure the context has a value
Assert.assertTrue(!map.isEmpty());
@ -109,36 +106,28 @@ public class VariantJEXLContextUnitTest extends BaseTest {
@Test(expected=UnsupportedOperationException.class)
public void testContainsValue() {
VariantJEXLContext context = getVarContext();
Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = context.getVars();
Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = getVarContext();
map.containsValue(exp);
}
@Test(expected=UnsupportedOperationException.class)
public void testRemove() {
VariantJEXLContext context = getVarContext();
Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = context.getVars();
Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = getVarContext();
map.remove(exp);
}
@Test(expected=UnsupportedOperationException.class)
public void testEntrySet() {
VariantJEXLContext context = getVarContext();
Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = context.getVars();
Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = getVarContext();
map.entrySet();
}
@Test(expected=UnsupportedOperationException.class)
public void testClear() {
VariantJEXLContext context = getVarContext();
Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = context.getVars();
Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = getVarContext();
map.clear();
}
@ -147,12 +136,11 @@ public class VariantJEXLContextUnitTest extends BaseTest {
* helper method
* @return a VariantJEXLContext
*/
private VariantJEXLContext getVarContext() {
private JEXLMap getVarContext() {
List<Allele> alleles = Arrays.asList(Aref, T);
VariantContext vc = new VariantContext("test", snpLoc, alleles);
VariantJEXLContext context = new VariantJEXLContext(Arrays.asList(exp),vc);
return context;
return new JEXLMap(Arrays.asList(exp),vc);
}

View File

@ -17,6 +17,23 @@ public class
" -B eval,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" -B comp_genotypes,VCF," + validationDataLocation + "yri.trio.gatk.ug.head.vcf -reportType Grep";
@Test
public void testSelect1() {
String extraArgs = "-L 1:1-10,000,000";
WalkerTestSpec spec = new WalkerTestSpec( withSelect(root, "DP < 50", "DP50") + " " + extraArgs + " -o %s",
1, Arrays.asList("5a330d359b5c7ea0dfa6698b4830db82"));
executeTest("testSelect1", spec);
}
@Test
public void testSelect2() {
String extraArgs = "-L 1:1-10,000,000";
WalkerTestSpec spec = new WalkerTestSpec( withSelect(withSelect(root, "DP < 50", "DP50"), "set==\"Intersection\"", "intersection") + " " + extraArgs + " -o %s",
1, Arrays.asList("e39d6790e4ee8709dfa2eab8598b168e"));
executeTest("testSelect2", spec);
}
@Test
public void testVEGenotypeConcordance() {
WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -B eval,VCF," + validationDataLocation + "GenotypeConcordanceEval.vcf -B comp,VCF," + validationDataLocation + "GenotypeConcordanceComp.vcf -E GenotypeConcordance -reportType CSV -o %s",
@ -74,4 +91,8 @@ public class
Arrays.asList("521837758da151b84fca57fd1bb7dad1", "b4a42c90318adc88361691ece50426f2"));
executeTest("testVEWriteVCF", spec);
}
private static String withSelect(String cmd, String select, String name) {
return String.format("%s -select '%s' -selectName %s", cmd, select, name);
}
}