Fixed an issue where a no-call in the eval track would prevent a site from a comparison track from being loaded. Added a new test to cover the use case.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5169 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2011-02-02 01:47:53 +00:00
parent aab0ec209b
commit fd8dd8fb9b
5 changed files with 87 additions and 75 deletions

View File

@ -100,7 +100,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
private Set<String> compNames = new TreeSet<String>();
private Set<String> knownNames = new TreeSet<String>();
private Set<String> evalNames = new TreeSet<String>();
private Set<String> sampleNames = new TreeSet<String>();
private Set<String> sampleNamesForEvaluation = new TreeSet<String>();
private Set<String> sampleNamesForStratification = new TreeSet<String>();
private int numSamples = 0;
// The list of stratifiers and evaluators to use
@ -186,7 +188,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
try {
VariantStratifier vs = c.newInstance();
vs.initialize(jexlExpressions, compNames, knownNames, evalNames, sampleNames);
vs.initialize(jexlExpressions, compNames, knownNames, evalNames, sampleNamesForStratification);
strats.add(vs);
} catch (InstantiationException e) {
@ -372,15 +374,14 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evalNames);
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
// If we're not using the per-sample stratification, don't bother loading the sample list
if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) {
sampleNames.addAll(SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS));
numSamples = sampleNames.size();
} else {
numSamples = vcfSamples.size();
}
// Load the sample list
sampleNamesForEvaluation.addAll(SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS));
numSamples = sampleNamesForEvaluation.size();
sampleNames.add(ALL_SAMPLE_NAME);
if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) {
sampleNamesForStratification.addAll(sampleNamesForEvaluation);
}
sampleNamesForStratification.add(ALL_SAMPLE_NAME);
// Initialize select expressions
jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS));
@ -411,21 +412,30 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
*
* @param tracker the reference metadata tracker
* @param ref the reference context
* @param compNames the comp track names
* @param evalNames the evaluation track names
* @return the set of allowable variation types
*/
private EnumSet<VariantContext.Type> getAllowableVariationTypes(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> evalNames) {
private EnumSet<VariantContext.Type> getAllowableVariationTypes(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> compNames, Set<String> evalNames) {
EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION);
if (tracker != null) {
Collection<VariantContext> vcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false);
Collection<VariantContext> evalvcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false);
for ( VariantContext vc : vcs ) {
for ( VariantContext vc : evalvcs ) {
allowableTypes.add(vc.getType());
}
} else {
allowableTypes.add(VariantContext.Type.SNP);
if (allowableTypes.size() == 1) {
// We didn't find any variation in the eval track, so now let's look at the comp track for allowable types
Collection<VariantContext> compvcs = tracker.getVariantContexts(ref, compNames, null, ref.getLocus(), true, false);
for ( VariantContext vc : compvcs ) {
allowableTypes.add(vc.getType());
}
}
}
return allowableTypes;
}
@ -478,42 +488,41 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
* @param tracker the metadata tracker
* @param ref the reference context
* @param trackNames the list of track names to process
* @param sampleNames the list of samples to include
* @param allowableTypes a set of allowable variation types
* @param byFilter if false, only accept PASSing VariantContexts. Otherwise, accept both PASSing and filtered sites
* @param trackPerSample if false, don't stratify per sample (and don't cut up the VariantContext like we would need to do this)
* @param allowNoCalls if false, don't accept no-call loci from a variant track
* @return a mapping of track names to a list of VariantContext objects
*/
private HashMap<String, HashMap<String, VariantContext>> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> trackNames, Set<String> sampleNames, EnumSet<VariantContext.Type> allowableTypes, boolean byFilter) {
private HashMap<String, HashMap<String, VariantContext>> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> trackNames, EnumSet<VariantContext.Type> allowableTypes, boolean byFilter, boolean trackPerSample, boolean allowNoCalls) {
HashMap<String, HashMap<String, VariantContext>> bindings = new HashMap<String, HashMap<String, VariantContext>>();
for ( String trackName : trackNames ) {
Collection<VariantContext> contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true);
VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null;
HashMap<String, VariantContext> vcs = new HashMap<String, VariantContext>();
Collection<VariantContext> contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true);
VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null;
// First, filter the VariantContext to represent only the samples for evaluation
if ( vc != null ) {
ArrayList<String> sampleNamesMinusAll = new ArrayList<String>();
VariantContext vcsub = vc;
for ( String sampleName : sampleNames ) {
VariantContext vcsub = vc;
if (!sampleName.equals(ALL_SAMPLE_NAME)) {
vcsub = getSubsetOfVariantContext(vc, sampleName);
sampleNamesMinusAll.add(sampleName);
}
if (byFilter || !vcsub.isFiltered()) {
vcs.put(sampleName, vcsub);
}
if (vc.hasGenotypes(sampleNamesForEvaluation)) {
vcsub = getSubsetOfVariantContext(vc, sampleNamesForEvaluation);
}
if ( trackName.contains("eval") ) {
VariantContext vcsub = (sampleNamesMinusAll.size() > 0) ? getSubsetOfVariantContext(vc, sampleNamesMinusAll) : vc;
if ((byFilter || !vcsub.isFiltered()) && (allowNoCalls || vcsub.getType() != VariantContext.Type.NO_VARIATION)) {
vcs.put(ALL_SAMPLE_NAME, vcsub);
}
if (byFilter || !vcsub.isFiltered()) {
vcs.put(ALL_SAMPLE_NAME, vcsub);
// Now, if stratifying, split the subsetted vc per sample and add each as a new context
if ( trackPerSample ) {
for ( String sampleName : sampleNamesForEvaluation ) {
VariantContext samplevc = getSubsetOfVariantContext(vc, sampleName);
if ((byFilter || !samplevc.isFiltered()) && (allowNoCalls || samplevc.getType() != VariantContext.Type.NO_VARIATION)) {
vcs.put(sampleName, samplevc);
}
}
}
@ -532,35 +541,25 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
* @param ref the reference context
* @param compNames the list of comp names to process
* @param evalNames the list of eval names to process
* @param sampleNames the list of samples to include
* @return a mapping of track names to a list of VariantContext objects
*/
private HashMap<String, HashMap<String, VariantContext>> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> compNames, Set<String> evalNames, Set<String> sampleNames) {
private HashMap<String, HashMap<String, VariantContext>> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> compNames, Set<String> evalNames) {
HashMap<String, HashMap<String, VariantContext>> vcs = new HashMap<String, HashMap<String, VariantContext>>();
Set<String> allSamplesList = new HashSet<String>();
allSamplesList.add(ALL_SAMPLE_NAME);
EnumSet<VariantContext.Type> allowableTypes = getAllowableVariationTypes(tracker, ref, compNames, evalNames);
EnumSet<VariantContext.Type> allowableTypes = getAllowableVariationTypes(tracker, ref, evalNames);
boolean perSampleIsEnabled = false;
boolean byFilter = false;
boolean perSampleIsEnabled = false;
for (VariantStratifier vs : stratificationObjects) {
if (vs.getClass().getSimpleName().equals("Sample")) {
perSampleIsEnabled = true;
} else if (vs.getClass().getSimpleName().equals("Filter")) {
if (vs.getClass().getSimpleName().equals("Filter")) {
byFilter = true;
} else if (vs.getClass().getSimpleName().equals("Sample")) {
perSampleIsEnabled = true;
}
}
HashMap<String, HashMap<String, VariantContext>> evalBindings;
if (perSampleIsEnabled) {
evalBindings = bindVariantContexts(tracker, ref, evalNames, sampleNames, allowableTypes, byFilter);
} else {
evalBindings = bindVariantContexts(tracker, ref, evalNames, allSamplesList, allowableTypes, byFilter);
}
HashMap<String, HashMap<String, VariantContext>> compBindings = bindVariantContexts(tracker, ref, compNames, allSamplesList, allowableTypes, byFilter);
HashMap<String, HashMap<String, VariantContext>> evalBindings = bindVariantContexts(tracker, ref, evalNames, allowableTypes, byFilter, perSampleIsEnabled, true);
HashMap<String, HashMap<String, VariantContext>> compBindings = bindVariantContexts(tracker, ref, compNames, allowableTypes, byFilter, false, false);
vcs.putAll(compBindings);
vcs.putAll(evalBindings);
@ -657,13 +656,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
}
// track sample vc
HashMap<String, HashMap<String, VariantContext>> vcs = getVariantContexts(tracker, ref, compNames, evalNames, sampleNames);
HashMap<String, HashMap<String, VariantContext>> vcs = getVariantContexts(tracker, ref, compNames, evalNames);
for ( String compName : compNames ) {
VariantContext comp = vcs.containsKey(compName) && vcs.get(compName) != null && vcs.get(compName).containsKey(ALL_SAMPLE_NAME) ? vcs.get(compName).get(ALL_SAMPLE_NAME) : null;
for ( String evalName : evalNames ) {
for ( String sampleName : sampleNames ) {
for ( String sampleName : sampleNamesForStratification ) {
VariantContext eval = vcs.containsKey(evalName) && vcs.get(evalName) != null ? vcs.get(evalName).get(sampleName) : null;
HashMap<VariantStratifier, ArrayList<String>> stateMap = new HashMap<VariantStratifier, ArrayList<String>>();

View File

@ -76,10 +76,13 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean expectingIndels = false;
//boolean expectingIndels = false;
boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ;
//boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
//boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ;
boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP() ;
boolean evalIsGood = eval != null && eval.isSNP() ;
if (compIsGood) nCompSNPs++; // count the number of comp events
if (evalIsGood) nEvalSNPs++; // count the number of eval events
@ -87,8 +90,9 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
if (compIsGood && evalIsGood) {
nSNPsAtComp++;
if (!discordantP(eval, comp)) // count whether we're concordant or not with the comp value
if (!discordantP(eval, comp)) { // count whether we're concordant or not with the comp value
nConcordant++;
}
}
return null; // we don't capture any interesting sites

View File

@ -15,7 +15,6 @@ public class Sample extends VariantStratifier {
@Override
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
samples = new ArrayList<String>();
samples.add(VariantEvalWalker.ALL_SAMPLE_NAME);
samples.addAll(sampleNames);
}

View File

@ -52,7 +52,7 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
//synchronized ( evaluation ) {
//synchronized ( this ) {
// we always call update0 in case the evaluation tracks things like number of bases covered
//evaluation.update0(tracker, ref, context);
@ -69,9 +69,9 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
break;
case 2:
if (eval != null) {
//if (eval != null) {
evaluation.update2(eval, comp, tracker, ref, context);
}
//}
break;
default:
@ -82,8 +82,10 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
evaluation.update0(tracker, ref, context);
}
//synchronized (this) {
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
evaluation.update0(tracker, ref, context);
}
//}
}
}

View File

@ -29,7 +29,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
String extraArgs = "-L 1:1-10,000,000";
for (String tests : testsEnumerations) {
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s",
1, Arrays.asList("8ddc1c4a86cb3f4c22346497785b23e3"));
1, Arrays.asList("0087b2a096aa99135b065aa9a0fff34c"));
//executeTestParallel("testSelect1", spec);
executeTest("testSelect1", spec);
}
@ -50,7 +50,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
for (String vcfFile : vcfFiles) {
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B:eval,VCF " + validationDataLocation + vcfFile + " -B:comp,VCF " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
1,
Arrays.asList("7a6754176b573d14b6be7c808a04929d"));
Arrays.asList("bb16335f9510bcab2bd14a4299afd879"));
//executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
executeTest("testVEGenotypeConcordance" + vcfFile, spec);
}
@ -60,8 +60,8 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testVESimple() {
HashMap<String, String> expectations = new HashMap<String, String>();
expectations.put("-L 1:1-10,000,000", "ffd1abed44faf1590d9026e478b2f8ee");
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "32c2411fbf58bae5750c8229d15b98eb");
expectations.put("-L 1:1-10,000,000", "e46e8e7457b338c4cfec62ee7aa51ffe");
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "a0554ca0baa097a1761da3f7e8487833");
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
String extraArgs = entry.getKey();
@ -84,10 +84,10 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -B:comp_hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
String matchingMD5 = "6c2fa6573cc57ef8795e9cce2b654d0b";
String matchingMD5 = "5050011ad00b859faf2be679830bec90";
expectations.put("", matchingMD5);
expectations.put(" -knownName comp_hapmap -knownName dbsnp", matchingMD5);
expectations.put(" -knownName comp_hapmap", "6c2fa6573cc57ef8795e9cce2b654d0b");
expectations.put(" -knownName comp_hapmap", "5050011ad00b859faf2be679830bec90");
for (String tests : testsEnumerations) {
for (Map.Entry<String, String> entry : expectations.entrySet()) {
String extraArgs2 = entry.getKey();
@ -133,7 +133,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -EV GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("929e4ec46fb6957c29803531322bb35e"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("113228ffa35e0f67b8e067860c04171f"));
//executeTestParallel("testACDiscordanceAtAC1EvalAC2Comp",spec);
executeTest("testCompVsEvalAC",spec);
}
@ -150,6 +150,14 @@ public class VariantEvalIntegrationTest extends WalkerTest {
executeTest("testTranches",spec);
}
@Test
public void testCompOverlap() {
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("81377be26bf8fa32339d01c173428f7d"));
//executeTestParallel("testTranches",spec);
executeTest("testCompOverlap",spec);
}
// @Test
// public void testVEValidatePass() {
// String extraArgs = "-L 1:1-10,000,000";