diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java index 2174f3392..598eff4f6 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java @@ -496,7 +496,7 @@ public class VariantContext { return ref; } - /** Private helper routine that grabs the reference allele but doesn't through an error if there's no such allele */ + /** Private helper routine that grabs the reference allele but doesn't throw an error if there's no such allele */ private Allele getReferenceWithoutError() { for ( Allele allele : getAlleles() ) if ( allele.isReference() ) diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index 3be9fa682..42dcee7cf 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -123,6 +123,31 @@ public class VariantContextUtils { } + /** + * Returns true if exp match VC. See collection<> version for full docs. + * @param g genotype + * @param exp expression + * @return true if there is a match + */ + public static boolean match(Genotype g, JexlVCMatchExp exp) { + return match(g,Arrays.asList(exp)).get(exp); + } + + /** + * Matches each JexlVCMatchExp exp against the data contained in vc, and returns a map from these + * expressions to true (if they matched) or false (if they didn't). This the best way to apply JEXL + * expressions to VariantContext records. Use initializeMatchExps() to create the list of JexlVCMatchExp + * expressions. + * + * @param g genotype + * @param exps expressions + * @return true if there is a match + */ + public static Map match(Genotype g, Collection exps) { + return new VariantJEXLContext(exps,g).getVars(); + + } + private static void addAttributesToMap(Map infoMap, Map attributes, String prefix ) { for (Map.Entry e : attributes.entrySet()) { infoMap.put(prefix + String.valueOf(e.getKey()), String.valueOf(e.getValue())); diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java index 501fd4777..92e2d7078 100644 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java @@ -50,12 +50,14 @@ class VariantJEXLContext implements JexlContext { map = new JEXLMap(jexl, vc); } - @Override + public VariantJEXLContext(Collection jexl, Genotype g) { + map = new JEXLMap(jexl, g); + } + public void setVars(Map map) { throw new UnsupportedOperationException("this operation is unsupported"); } - @Override public Map getVars() { return map; } @@ -72,18 +74,32 @@ class VariantJEXLContext implements JexlContext { */ class JEXLMap implements Map { - // our variant context + // our variant context and/or Genotype private final VariantContext vc; + private final Genotype g; // our context private JexlContext jContext = null; // our mapping from JEXLVCMatchExp to Booleans, which will be set to NULL for previously uncached JexlVCMatchExp - private final Map jexl; + private Map jexl; + public JEXLMap(Collection jexlCollection, VariantContext vc, Genotype g) { + this.vc = vc; + this.g = g; + initialize(jexlCollection); + } + public JEXLMap(Collection jexlCollection, VariantContext vc) { - this.vc = vc; + this(jexlCollection, vc, null); + } + + public JEXLMap(Collection jexlCollection, Genotype g) { + this(jexlCollection, null, g); + } + + private void initialize(Collection jexlCollection) { jexl = new HashMap(); for (VariantContextUtils.JexlVCMatchExp exp: jexlCollection) { jexl.put(exp, null); @@ -94,38 +110,49 @@ class JEXLMap implements Map { * create the internal JexlContext, only when required. This code is where new JEXL context variables * should get added. * - * @param vc the VariantContext - * */ - private void createContext(VariantContext vc) { - // create a mapping of what we know about the variant context, its Chromosome, positions, etc. + private void createContext() { + Map infoMap = new HashMap(); - infoMap.put("CHROM", vc.getLocation().getContig()); - infoMap.put("POS", String.valueOf(vc.getLocation().getStart())); - infoMap.put("TYPE", vc.getType().toString()); - infoMap.put("QUAL", String.valueOf(10 * vc.getNegLog10PError())); - // add alleles - infoMap.put("ALLELES", Utils.join(";", vc.getAlleles())); - infoMap.put("N_ALLELES", String.valueOf(vc.getNAlleles())); + if ( vc != null ) { + // create a mapping of what we know about the variant context, its Chromosome, positions, etc. + infoMap.put("CHROM", vc.getLocation().getContig()); + infoMap.put("POS", String.valueOf(vc.getLocation().getStart())); + infoMap.put("TYPE", vc.getType().toString()); + infoMap.put("QUAL", String.valueOf(10 * vc.getNegLog10PError())); - // add attributes - addAttributesToMap(infoMap, vc.getAttributes()); + // add alleles + infoMap.put("ALLELES", Utils.join(";", vc.getAlleles())); + infoMap.put("N_ALLELES", String.valueOf(vc.getNAlleles())); - // add filter fields - infoMap.put("FILTER", String.valueOf(vc.isFiltered() ? "1" : "0")); - for ( Object filterCode : vc.getFilters() ) { - infoMap.put(String.valueOf(filterCode), "1"); + // add attributes + addAttributesToMap(infoMap, vc.getAttributes()); + + // add filter fields + infoMap.put("FILTER", vc.isFiltered() ? "1" : "0"); + for ( Object filterCode : vc.getFilters() ) { + infoMap.put(String.valueOf(filterCode), "1"); + } + + // add genotype-specific fields + // TODO -- implement me when we figure out a good way to represent this + // for ( Genotype g : vc.getGenotypes().values() ) { + // String prefix = g.getSampleName() + "."; + // addAttributesToMap(infoMap, g.getAttributes(), prefix); + // infoMap.put(prefix + "GT", g.getGenotypeString()); + // } + } + + // add specific genotype if one is provided + if ( g != null ) { + infoMap.put("GT", g.getGenotypeString()); + infoMap.put("isHomRef", g.isHomRef() ? "1" : "0"); + infoMap.put("isHet", g.isHet() ? "1" : "0"); + infoMap.put("isHomVar", g.isHomVar() ? "1" : "0"); + for ( Map.Entry e : g.getAttributes().entrySet() ) + infoMap.put(e.getKey(), String.valueOf(e.getValue())); } - - // add genotypes - // todo -- comment this back in when we figure out how to represent it nicely -// for ( Genotype g : vc.getGenotypes().values() ) { -// String prefix = g.getSampleName() + "."; -// addAttributesToMap(infoMap, g.getAttributes(), prefix); -// infoMap.put(prefix + "GT", g.getGenotypeString()); -// } - // create the internal context that we can evaluate expressions against jContext = JexlHelper.createContext(); @@ -135,7 +162,6 @@ class JEXLMap implements Map { /** * @return the size of the internal data structure */ - @Override public int size() { return jexl.size(); } @@ -143,7 +169,6 @@ class JEXLMap implements Map { /** * @return true if we're empty */ - @Override public boolean isEmpty() { return this.jexl.isEmpty(); } /** @@ -151,10 +176,8 @@ class JEXLMap implements Map { * @param o the key * @return true if we have a value for that key */ - @Override public boolean containsKey(Object o) { return jexl.containsKey(o); } - @Override public Boolean get(Object o) { // if we've already determined the value, return it if (jexl.containsKey(o) && jexl.get(o) != null) return jexl.get(o); @@ -169,7 +192,6 @@ class JEXLMap implements Map { * get the keyset of map * @return a set of keys of type JexlVCMatchExp */ - @Override public Set keySet() { return jexl.keySet(); } @@ -180,7 +202,6 @@ class JEXLMap implements Map { * the keys you want, you would be better off using get() to get them by name. * @return a collection of boolean values, representing the results of all the variants evaluated */ - @Override public Collection values() { // this is an expensive call for (VariantContextUtils.JexlVCMatchExp exp : jexl.keySet()) @@ -195,7 +216,7 @@ class JEXLMap implements Map { */ private void evaulateExpression(VariantContextUtils.JexlVCMatchExp exp) { // if the context is null, we need to create it to evaluate the JEXL expression - if (this.jContext == null) createContext(vc); + if (this.jContext == null) createContext(); try { jexl.put (exp, (Boolean) exp.exp.evaluate(jContext)); } catch (Exception e) { @@ -210,16 +231,14 @@ class JEXLMap implements Map { */ private static void addAttributesToMap(Map infoMap, Map attributes ) { for (Map.Entry e : attributes.entrySet()) { - infoMap.put(String.valueOf(e.getKey()), String.valueOf(e.getValue())); + infoMap.put(e.getKey(), String.valueOf(e.getValue())); } } - @Override public Boolean put(VariantContextUtils.JexlVCMatchExp jexlVCMatchExp, Boolean aBoolean) { return jexl.put(jexlVCMatchExp,aBoolean); } - @Override public void putAll(Map map) { jexl.putAll(map); } @@ -230,25 +249,21 @@ class JEXLMap implements Map { // this doesn't make much sense to implement, boolean doesn't offer too much variety to deal // with evaluating every key in the internal map. - @Override public boolean containsValue(Object o) { throw new UnsupportedOperationException("containsValue() not supported on a JEXLMap"); } // this doesn't make much sense - @Override public Boolean remove(Object o) { throw new UnsupportedOperationException("remove() not supported on a JEXLMap"); } - @Override public Set> entrySet() { throw new UnsupportedOperationException("clear() not supported on a JEXLMap"); } // nope - @Override public void clear() { throw new UnsupportedOperationException("clear() not supported on a JEXLMap"); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index d489e3d77..31f7d3ee1 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -217,10 +217,10 @@ public class VariantContextAdaptors { } public static VCFRecord toVCF(VariantContext vc, char vcfRefBase) { - return toVCF(vc, vcfRefBase, null, true); + return toVCF(vc, vcfRefBase, null, true, false); } - public static VCFRecord toVCF(VariantContext vc, char vcfRefBase, List allowedGenotypeAttributeKeys, boolean filtersWereAppliedToContext) { + public static VCFRecord toVCF(VariantContext vc, char vcfRefBase, List allowedGenotypeAttributeKeys, boolean filtersWereAppliedToContext, boolean filtersWereAppliedToGenotypes) { // deal with the reference String referenceBases = new String(vc.getReference().getBases()); @@ -284,6 +284,8 @@ public class VariantContextAdaptors { if ( allowedGenotypeAttributeKeys == null || allowedGenotypeAttributeKeys.contains(key) ) vcfGenotypeAttributeKeys.add(key); } + if ( filtersWereAppliedToGenotypes ) + vcfGenotypeAttributeKeys.add(VCFGenotypeRecord.GENOTYPE_FILTER_KEY); } String genotypeFormatString = Utils.join(VCFRecord.GENOTYPE_FIELD_SEPERATOR, vcfGenotypeAttributeKeys); @@ -313,6 +315,8 @@ public class VariantContextAdaptors { ReadBackedPileup pileup = (ReadBackedPileup)g.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY); if ( pileup != null ) val = pileup.size(); + } else if ( key.equals(VCFGenotypeRecord.GENOTYPE_FILTER_KEY) ) { + val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : VCFRecord.PASSES_FILTERS; } String outputValue = formatVCFField(key, val); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index f2fe3ae64..2e0643eb9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.*; @@ -25,16 +26,22 @@ public class VariantFiltrationWalker extends RodWalker { @Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false) protected String[] FILTER_NAMES = new String[]{}; - @Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize)", required=false) + @Argument(fullName="genotypeFilterExpression", shortName="G_filter", doc="One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info)", required=false) + protected String[] GENOTYPE_FILTER_EXPS = new String[]{}; + @Argument(fullName="genotypeFilterName", shortName="G_filterName", doc="Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false) + protected String[] GENOTYPE_FILTER_NAMES = new String[]{}; + + @Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize); [default:3]", required=false) protected Integer clusterSize = 3; - @Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1)", required=false) + @Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]", required=false) protected Integer clusterWindow = 0; - @Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false) + @Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false) protected String MASK_NAME = "Mask"; // JEXL expressions for the filters List filterExps; + List genotypeFilterExps; public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster"; @@ -70,9 +77,13 @@ public class VariantFiltrationWalker extends RodWalker { if ( clusterWindow > 0 ) hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters")); - for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) { + for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) hInfo.add(new VCFFilterHeaderLine(exp.name, exp.exp.toString())); - } + for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) + hInfo.add(new VCFFilterHeaderLine(exp.name, exp.exp.toString())); + + if ( genotypeFilterExps.size() > 0 ) + hInfo.add(new VCFFormatHeaderLine(VCFGenotypeRecord.GENOTYPE_FILTER_KEY, 1, VCFFormatHeaderLine.FORMAT_TYPE.String, "Genotype-level filter")); List dataSources = getToolkit().getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { @@ -91,6 +102,7 @@ public class VariantFiltrationWalker extends RodWalker { clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow); filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS); + genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS); } public Integer reduceInit() { return 0; } @@ -136,45 +148,53 @@ public class VariantFiltrationWalker extends RodWalker { if ( context == null ) return; - StringBuilder filterString = new StringBuilder(); + VariantContext vc = context.getVariantContext(); + + // make new Genotypes based on filters + Map genotypes; + if ( genotypeFilterExps.size() == 0 ) { + genotypes = vc.getGenotypes(); + } else { + genotypes = new HashMap(vc.getGenotypes().size()); + + // for each genotype, check filters then create a new object + for ( Map.Entry genotype : vc.getGenotypes().entrySet() ) { + + Genotype g = genotype.getValue(); + Set filters = new LinkedHashSet(g.getFilters()); + + for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) { + if ( VariantContextUtils.match(g, exp) ) + filters.add(exp.name); + } + + genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.genotypesArePhased())); + } + } + + // make a new variant context based on filters + Set filters = new LinkedHashSet(vc.getFilters()); // test for SNP mask, if present RODRecordList mask = context.getTracker().getTrackData("mask", null); if ( mask != null && mask.size() > 0 ) - addFilter(filterString, MASK_NAME); + filters.add(MASK_NAME); // test for clustered SNPs if requested if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) ) - addFilter(filterString, CLUSTERED_SNP_FILTER_NAME); + filters.add(CLUSTERED_SNP_FILTER_NAME); - VariantContext vc = context.getVariantContext(); for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) { if ( VariantContextUtils.match(vc, exp) ) - addFilter(filterString, exp.name); + filters.add(exp.name); } - writeVCF(VariantContextAdaptors.toVCF(vc, context.getReferenceContext().getBase(), null, true), filterString); + VariantContext filteredVC = new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes()); + + writeVCF(VariantContextAdaptors.toVCF(filteredVC, context.getReferenceContext().getBase(), null, true, genotypeFilterExps.size() > 0)); } - private static void addFilter(StringBuilder sb, String filter) { - if ( sb.length() > 0 ) - sb.append(";"); - sb.append(filter); - } - - private void writeVCF(VCFRecord rec, StringBuilder filterString) { - - if ( filterString.length() != 0 ) { - // if the record is already filtered, don't destroy those filters - if ( rec.isFiltered() ) - filterString.append(";" + rec.getFilterString()); - rec.setFilterString(filterString.toString()); - } - // otherwise, if it's not already filtered, set it to "passing filters" - else if ( !rec.isFiltered() ) { - rec.setFilterString(VCFRecord.PASSES_FILTERS); - } - + private void writeVCF(VCFRecord rec) { if ( writer == null ) initializeVcfWriter(rec); writer.addRecord(rec); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index 1631c26f3..cb4681e65 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -28,6 +28,7 @@ public class VCFGenotypeRecord { public static final int MISSING_GENOTYPE_QUALITY = -1; public static final int MISSING_DEPTH = -1; public static final int MISSING_HAPLOTYPE_QUALITY = -1; + public static final String PASSES_FILTERS = "0"; public static final String UNFILTERED = "."; public static final double MAX_QUAL_VALUE = 99.0; @@ -199,7 +200,9 @@ public class VCFGenotypeRecord { } public boolean isFiltered() { - return ( mFields.get(GENOTYPE_FILTER_KEY) != null && ! mFields.get(GENOTYPE_FILTER_KEY).equals("0")); + return ( mFields.get(GENOTYPE_FILTER_KEY) != null && + !mFields.get(GENOTYPE_FILTER_KEY).equals(UNFILTERED) && + !mFields.get(GENOTYPE_FILTER_KEY).equals(PASSES_FILTERS)); } public int getPloidy() { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 34d2462e9..4c8981fb6 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -83,7 +83,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter { if ( mHeader == null ) throw new IllegalStateException("The VCF Header must be written before records can be added"); - VCFRecord call = VariantContextAdaptors.toVCF(vc, vc.getReference().toString().charAt(0), allowedGenotypeFormatStrings, false); + VCFRecord call = VariantContextAdaptors.toVCF(vc, vc.getReference().toString().charAt(0), allowedGenotypeFormatStrings, false, false); Set altAlleles = vc.getAlternateAlleles(); StringBuffer altAlleleCountString = new StringBuffer();