From 4c6d0e6143c8eea58fd6c0dca05a8c2b50cbb31d Mon Sep 17 00:00:00 2001 From: depristo Date: Sun, 19 Jun 2011 03:11:00 +0000 Subject: [PATCH] Added stratification by discrete allele count, just like AF, but requiring genotypes so it can be exact. Added docs on wiki, and integrationtest using Kiran's very nice fundamental VCF VariantEvalWalker now passes a pointer to itself to the Stratefication setVariantEvalWalker (and assoc. get method) so that stratefications can look at VEWalker variables to obtain information necessary for their calculations, like the list of eval samples. This is a better interface, in my opinion, than the current approach of extending the base abstract Stratefication to include an initialize function that has all arguments necessarily for any Strat. JEXL expressions now provide access to the VariantContext vc object itself, so you can write JEXL's that directly use VariantContext and associated functions from the command line. ExomePostQC Queue script now creates a byAC eval using the new strat, and no longer produces a byAF file (as this was not exact, and lead to strange punctile behavior when actual AF quantization was out of sync with fix quantization of AF strat. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6015 348d0f76-0448-11de-a6fe-93d51630548a --- .../variantcontext/VariantJEXLContext.java | 5 +- .../varianteval/VariantEvalWalker.java | 2 +- .../stratifications/AlleleCount.java | 59 +++++++++++++++++++ .../stratifications/VariantStratifier.java | 18 ++++++ .../varianteval/util/VariantEvalUtils.java | 4 +- .../VariantEvalIntegrationTest.java | 22 +++++++ .../oneoffs/depristo/ExomePostQCEval.scala | 9 ++- 7 files changed, 108 insertions(+), 11 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java index fce94b758..e29054893 100644 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java @@ -59,6 +59,7 @@ class VariantJEXLContext implements JexlContext { private static Map x = new HashMap(); static { + x.put("vc", new AttributeGetter() { public Object get(VariantContext vc) { return vc; }}); x.put("CHROM", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getChr(); }}); x.put("POS", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getStart(); }}); x.put("TYPE", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getType().toString(); }}); @@ -135,10 +136,6 @@ class JEXLMap implements Map { this(jexlCollection, vc, null); } - public JEXLMap(Collection jexlCollection, Genotype g) { - this(jexlCollection, null, g); - } - private void initialize(Collection jexlCollection) { jexl = new HashMap(); for (VariantContextUtils.JexlVCMatchExp exp: jexlCollection) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 3ac363bab..2a21bfe41 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -194,7 +194,7 @@ public class VariantEvalWalker extends RodWalker implements Tr } // Initialize the set of stratifications and evaluations to use - stratificationObjects = variantEvalUtils.initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); + stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); Set> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); // Initialize the evaluation contexts diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java new file mode 100755 index 000000000..19f207df1 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -0,0 +1,59 @@ +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.util.ArrayList; +import java.util.Set; + +public class AlleleCount extends VariantStratifier { + // needs to know the variant context + private ArrayList states = new ArrayList(); + + @Override + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { + // we can only work with a single eval VCF, and it must have genotypes + if ( evalNames.size() != 1 ) + throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification only works with a single eval vcf"); + + // There are 2 x n sample chromosomes for diploids + int nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * 2; + if ( nchrom < 2 ) + throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample"); + + // create an array containing each of the allele counts + for( int ac = 0; ac <= nchrom; ac++ ) { + states.add(String.format("%d", ac)); + } + + getVariantEvalWalker().getLogger().info("AlleleCount using " + nchrom + " chromosomes"); + } + + public ArrayList getAllStates() { + return states; + } + + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + ArrayList relevantStates = new ArrayList(1); + + if (eval != null) { + int AC = -1; + if ( eval.hasAttribute("AC") ) + AC = eval.getAttributeAsInt("AC"); + else if ( eval.isVariant() ) { + for (Allele allele : eval.getAlternateAlleles()) + AC = Math.max(AC, eval.getChromosomeCount(allele)); + } else + // by default, the site is considered monomorphic + AC = 0; + relevantStates.add(String.format("%d", AC)); + } + + return relevantStates; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index b2043c21a..96603edaf 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -4,12 +4,30 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import java.util.ArrayList; import java.util.Set; public abstract class VariantStratifier implements Comparable { + private VariantEvalWalker variantEvalWalker; + + /** + * @return a reference to the parent VariantEvalWalker running this stratification + */ + public VariantEvalWalker getVariantEvalWalker() { + return variantEvalWalker; + } + + /** + * Should only be called by VariantEvalWalker itself + * @param variantEvalWalker + */ + public void setVariantEvalWalker(VariantEvalWalker variantEvalWalker) { + this.variantEvalWalker = variantEvalWalker; + } + public abstract void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames); public ArrayList getAllStates() { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index dc587b17e..32996a0bc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; @@ -62,7 +63,7 @@ public class VariantEvalUtils { * @param modulesToUse the list of stratification modules to use * @return set of stratifications to use */ - public TreeSet initializeStratificationObjects(boolean noStandardStrats, String[] modulesToUse) { + public TreeSet initializeStratificationObjects(VariantEvalWalker variantEvalWalker, boolean noStandardStrats, String[] modulesToUse) { TreeSet strats = new TreeSet(); Set stratsToUse = new HashSet(); @@ -102,6 +103,7 @@ public class VariantEvalUtils { try { VariantStratifier vs = c.newInstance(); + vs.setVariantEvalWalker(variantEvalWalker); vs.initialize(variantEvalWalker.getJexlExpressions(), variantEvalWalker.getCompNames(), variantEvalWalker.getKnownNames(), variantEvalWalker.getEvalNames(), variantEvalWalker.getSampleNamesForStratification(), variantEvalWalker.getContigNames()); strats.add(vs); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 38efd50ef..23c606ad0 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -373,4 +373,26 @@ public class VariantEvalIntegrationTest extends WalkerTest { ); executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-onesample", spec2); } + + + @Test + public void testAlleleCountStrat() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-D " + b37dbSNP129, + "-B:eval,VCF " + fundamentalTestSNPsVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST AlleleCount", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("bf324e4c87fe0d21170fcd2a67a20371") + ); + executeTest("testAlleleCountStrat", spec); + } } diff --git a/scala/qscript/oneoffs/depristo/ExomePostQCEval.scala b/scala/qscript/oneoffs/depristo/ExomePostQCEval.scala index 2600dcf56..919cb105a 100755 --- a/scala/qscript/oneoffs/depristo/ExomePostQCEval.scala +++ b/scala/qscript/oneoffs/depristo/ExomePostQCEval.scala @@ -39,18 +39,17 @@ class ExomePostQCEval extends QScript { // The basic summary eval createEval(evalVCF, ".summary", List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"), - List("EvalRod", "CompRod", "Novelty", "FunctionalClass")) + List("FunctionalClass")) // The basic summary eval, by AF - createEval(evalVCF, ".byAF", + createEval(evalVCF, ".byAC", List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"), - List("EvalRod", "CompRod", "Novelty", "AlleleFrequency", "FunctionalClass")) + List("AlleleCount")) // By sample createEval(evalVCF, ".bySample", List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"), - List("EvalRod", "CompRod", "Novelty", "Sample")) - + List("Sample")) add(new ExomeQCRScript(evalVCF)) } }