Added stratification by discrete allele count, just like AF, but requiring genotypes so it can be exact. Added docs on wiki, and integrationtest using Kiran's very nice fundamental VCF

VariantEvalWalker now passes a pointer to itself to the Stratefication setVariantEvalWalker (and assoc. get method) so that stratefications can look at VEWalker variables to obtain information necessary for their calculations, like the list of eval samples.  This is a better interface, in my opinion, than the current approach of extending the base abstract Stratefication to include an initialize function that has all arguments necessarily for any Strat.  
JEXL expressions now provide access to the VariantContext vc object itself, so you can write JEXL's that directly use VariantContext and associated functions from the command line.
ExomePostQC Queue script now creates a byAC eval using the new strat, and no longer produces a byAF file (as this was not exact, and lead to strange punctile behavior when actual AF quantization was out of sync with fix quantization of AF strat.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6015 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-06-19 03:11:00 +00:00
parent 907018768c
commit 4c6d0e6143
7 changed files with 108 additions and 11 deletions

View File

@ -59,6 +59,7 @@ class VariantJEXLContext implements JexlContext {
private static Map<String, AttributeGetter> x = new HashMap<String, AttributeGetter>();
static {
x.put("vc", new AttributeGetter() { public Object get(VariantContext vc) { return vc; }});
x.put("CHROM", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getChr(); }});
x.put("POS", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getStart(); }});
x.put("TYPE", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getType().toString(); }});
@ -135,10 +136,6 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
this(jexlCollection, vc, null);
}
public JEXLMap(Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, Genotype g) {
this(jexlCollection, null, g);
}
private void initialize(Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection) {
jexl = new HashMap<VariantContextUtils.JexlVCMatchExp,Boolean>();
for (VariantContextUtils.JexlVCMatchExp exp: jexlCollection) {

View File

@ -194,7 +194,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
}
// Initialize the set of stratifications and evaluations to use
stratificationObjects = variantEvalUtils.initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
// Initialize the evaluation contexts

View File

@ -0,0 +1,59 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.ArrayList;
import java.util.Set;
public class AlleleCount extends VariantStratifier {
// needs to know the variant context
private ArrayList<String> states = new ArrayList<String>();
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
// we can only work with a single eval VCF, and it must have genotypes
if ( evalNames.size() != 1 )
throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification only works with a single eval vcf");
// There are 2 x n sample chromosomes for diploids
int nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * 2;
if ( nchrom < 2 )
throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample");
// create an array containing each of the allele counts
for( int ac = 0; ac <= nchrom; ac++ ) {
states.add(String.format("%d", ac));
}
getVariantEvalWalker().getLogger().info("AlleleCount using " + nchrom + " chromosomes");
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>(1);
if (eval != null) {
int AC = -1;
if ( eval.hasAttribute("AC") )
AC = eval.getAttributeAsInt("AC");
else if ( eval.isVariant() ) {
for (Allele allele : eval.getAlternateAlleles())
AC = Math.max(AC, eval.getChromosomeCount(allele));
} else
// by default, the site is considered monomorphic
AC = 0;
relevantStates.add(String.format("%d", AC));
}
return relevantStates;
}
}

View File

@ -4,12 +4,30 @@ import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
import java.util.ArrayList;
import java.util.Set;
public abstract class VariantStratifier implements Comparable {
private VariantEvalWalker variantEvalWalker;
/**
* @return a reference to the parent VariantEvalWalker running this stratification
*/
public VariantEvalWalker getVariantEvalWalker() {
return variantEvalWalker;
}
/**
* Should only be called by VariantEvalWalker itself
* @param variantEvalWalker
*/
public void setVariantEvalWalker(VariantEvalWalker variantEvalWalker) {
this.variantEvalWalker = variantEvalWalker;
}
public abstract void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames);
public ArrayList<String> getAllStates() {

View File

@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
@ -62,7 +63,7 @@ public class VariantEvalUtils {
* @param modulesToUse the list of stratification modules to use
* @return set of stratifications to use
*/
public TreeSet<VariantStratifier> initializeStratificationObjects(boolean noStandardStrats, String[] modulesToUse) {
public TreeSet<VariantStratifier> initializeStratificationObjects(VariantEvalWalker variantEvalWalker, boolean noStandardStrats, String[] modulesToUse) {
TreeSet<VariantStratifier> strats = new TreeSet<VariantStratifier>();
Set<String> stratsToUse = new HashSet<String>();
@ -102,6 +103,7 @@ public class VariantEvalUtils {
try {
VariantStratifier vs = c.newInstance();
vs.setVariantEvalWalker(variantEvalWalker);
vs.initialize(variantEvalWalker.getJexlExpressions(), variantEvalWalker.getCompNames(), variantEvalWalker.getKnownNames(), variantEvalWalker.getEvalNames(), variantEvalWalker.getSampleNamesForStratification(), variantEvalWalker.getContigNames());
strats.add(vs);

View File

@ -373,4 +373,26 @@ public class VariantEvalIntegrationTest extends WalkerTest {
);
executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-onesample", spec2);
}
@Test
public void testAlleleCountStrat() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
"-R " + b37KGReference,
"-D " + b37dbSNP129,
"-B:eval,VCF " + fundamentalTestSNPsVCF,
"-noEV",
"-EV CountVariants",
"-noST",
"-ST AlleleCount",
"-BTI eval",
"-o %s"
),
1,
Arrays.asList("bf324e4c87fe0d21170fcd2a67a20371")
);
executeTest("testAlleleCountStrat", spec);
}
}

View File

@ -39,18 +39,17 @@ class ExomePostQCEval extends QScript {
// The basic summary eval
createEval(evalVCF, ".summary",
List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"),
List("EvalRod", "CompRod", "Novelty", "FunctionalClass"))
List("FunctionalClass"))
// The basic summary eval, by AF
createEval(evalVCF, ".byAF",
createEval(evalVCF, ".byAC",
List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"),
List("EvalRod", "CompRod", "Novelty", "AlleleFrequency", "FunctionalClass"))
List("AlleleCount"))
// By sample
createEval(evalVCF, ".bySample",
List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"),
List("EvalRod", "CompRod", "Novelty", "Sample"))
List("Sample"))
add(new ExomeQCRScript(evalVCF))
}
}