diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index e285862d1..61d61581a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -100,7 +100,9 @@ public class VariantEvalWalker extends RodWalker implements Tr private Set compNames = new TreeSet(); private Set knownNames = new TreeSet(); private Set evalNames = new TreeSet(); - private Set sampleNames = new TreeSet(); + + private Set sampleNamesForEvaluation = new TreeSet(); + private Set sampleNamesForStratification = new TreeSet(); private int numSamples = 0; // The list of stratifiers and evaluators to use @@ -186,7 +188,7 @@ public class VariantEvalWalker extends RodWalker implements Tr try { VariantStratifier vs = c.newInstance(); - vs.initialize(jexlExpressions, compNames, knownNames, evalNames, sampleNames); + vs.initialize(jexlExpressions, compNames, knownNames, evalNames, sampleNamesForStratification); strats.add(vs); } catch (InstantiationException e) { @@ -372,15 +374,14 @@ public class VariantEvalWalker extends RodWalker implements Tr Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evalNames); Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); - // If we're not using the per-sample stratification, don't bother loading the sample list - if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) { - sampleNames.addAll(SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS)); - numSamples = sampleNames.size(); - } else { - numSamples = vcfSamples.size(); - } + // Load the sample list + sampleNamesForEvaluation.addAll(SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS)); + numSamples = sampleNamesForEvaluation.size(); - sampleNames.add(ALL_SAMPLE_NAME); + if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) { + sampleNamesForStratification.addAll(sampleNamesForEvaluation); + } + sampleNamesForStratification.add(ALL_SAMPLE_NAME); // Initialize select expressions jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)); @@ -411,21 +412,30 @@ public class VariantEvalWalker extends RodWalker implements Tr * * @param tracker the reference metadata tracker * @param ref the reference context + * @param compNames the comp track names * @param evalNames the evaluation track names * @return the set of allowable variation types */ - private EnumSet getAllowableVariationTypes(RefMetaDataTracker tracker, ReferenceContext ref, Set evalNames) { + private EnumSet getAllowableVariationTypes(RefMetaDataTracker tracker, ReferenceContext ref, Set compNames, Set evalNames) { EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION); if (tracker != null) { - Collection vcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false); + Collection evalvcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false); - for ( VariantContext vc : vcs ) { + for ( VariantContext vc : evalvcs ) { allowableTypes.add(vc.getType()); } - } else { - allowableTypes.add(VariantContext.Type.SNP); + + if (allowableTypes.size() == 1) { + // We didn't find any variation in the eval track, so now let's look at the comp track for allowable types + Collection compvcs = tracker.getVariantContexts(ref, compNames, null, ref.getLocus(), true, false); + + for ( VariantContext vc : compvcs ) { + allowableTypes.add(vc.getType()); + } + } } + return allowableTypes; } @@ -478,42 +488,41 @@ public class VariantEvalWalker extends RodWalker implements Tr * @param tracker the metadata tracker * @param ref the reference context * @param trackNames the list of track names to process - * @param sampleNames the list of samples to include * @param allowableTypes a set of allowable variation types * @param byFilter if false, only accept PASSing VariantContexts. Otherwise, accept both PASSing and filtered sites + * @param trackPerSample if false, don't stratify per sample (and don't cut up the VariantContext like we would need to do this) + * @param allowNoCalls if false, don't accept no-call loci from a variant track * @return a mapping of track names to a list of VariantContext objects */ - private HashMap> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set trackNames, Set sampleNames, EnumSet allowableTypes, boolean byFilter) { + private HashMap> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set trackNames, EnumSet allowableTypes, boolean byFilter, boolean trackPerSample, boolean allowNoCalls) { HashMap> bindings = new HashMap>(); for ( String trackName : trackNames ) { - Collection contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true); - - VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null; - HashMap vcs = new HashMap(); + Collection contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true); + VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null; + + // First, filter the VariantContext to represent only the samples for evaluation if ( vc != null ) { - ArrayList sampleNamesMinusAll = new ArrayList(); + VariantContext vcsub = vc; - for ( String sampleName : sampleNames ) { - VariantContext vcsub = vc; - - if (!sampleName.equals(ALL_SAMPLE_NAME)) { - vcsub = getSubsetOfVariantContext(vc, sampleName); - sampleNamesMinusAll.add(sampleName); - } - - if (byFilter || !vcsub.isFiltered()) { - vcs.put(sampleName, vcsub); - } + if (vc.hasGenotypes(sampleNamesForEvaluation)) { + vcsub = getSubsetOfVariantContext(vc, sampleNamesForEvaluation); } - if ( trackName.contains("eval") ) { - VariantContext vcsub = (sampleNamesMinusAll.size() > 0) ? getSubsetOfVariantContext(vc, sampleNamesMinusAll) : vc; + if ((byFilter || !vcsub.isFiltered()) && (allowNoCalls || vcsub.getType() != VariantContext.Type.NO_VARIATION)) { + vcs.put(ALL_SAMPLE_NAME, vcsub); + } - if (byFilter || !vcsub.isFiltered()) { - vcs.put(ALL_SAMPLE_NAME, vcsub); + // Now, if stratifying, split the subsetted vc per sample and add each as a new context + if ( trackPerSample ) { + for ( String sampleName : sampleNamesForEvaluation ) { + VariantContext samplevc = getSubsetOfVariantContext(vc, sampleName); + + if ((byFilter || !samplevc.isFiltered()) && (allowNoCalls || samplevc.getType() != VariantContext.Type.NO_VARIATION)) { + vcs.put(sampleName, samplevc); + } } } @@ -532,35 +541,25 @@ public class VariantEvalWalker extends RodWalker implements Tr * @param ref the reference context * @param compNames the list of comp names to process * @param evalNames the list of eval names to process - * @param sampleNames the list of samples to include * @return a mapping of track names to a list of VariantContext objects */ - private HashMap> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set compNames, Set evalNames, Set sampleNames) { + private HashMap> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set compNames, Set evalNames) { HashMap> vcs = new HashMap>(); - Set allSamplesList = new HashSet(); - allSamplesList.add(ALL_SAMPLE_NAME); + EnumSet allowableTypes = getAllowableVariationTypes(tracker, ref, compNames, evalNames); - EnumSet allowableTypes = getAllowableVariationTypes(tracker, ref, evalNames); - - boolean perSampleIsEnabled = false; boolean byFilter = false; + boolean perSampleIsEnabled = false; for (VariantStratifier vs : stratificationObjects) { - if (vs.getClass().getSimpleName().equals("Sample")) { - perSampleIsEnabled = true; - } else if (vs.getClass().getSimpleName().equals("Filter")) { + if (vs.getClass().getSimpleName().equals("Filter")) { byFilter = true; + } else if (vs.getClass().getSimpleName().equals("Sample")) { + perSampleIsEnabled = true; } } - HashMap> evalBindings; - if (perSampleIsEnabled) { - evalBindings = bindVariantContexts(tracker, ref, evalNames, sampleNames, allowableTypes, byFilter); - } else { - evalBindings = bindVariantContexts(tracker, ref, evalNames, allSamplesList, allowableTypes, byFilter); - } - - HashMap> compBindings = bindVariantContexts(tracker, ref, compNames, allSamplesList, allowableTypes, byFilter); + HashMap> evalBindings = bindVariantContexts(tracker, ref, evalNames, allowableTypes, byFilter, perSampleIsEnabled, true); + HashMap> compBindings = bindVariantContexts(tracker, ref, compNames, allowableTypes, byFilter, false, false); vcs.putAll(compBindings); vcs.putAll(evalBindings); @@ -657,13 +656,13 @@ public class VariantEvalWalker extends RodWalker implements Tr } // track sample vc - HashMap> vcs = getVariantContexts(tracker, ref, compNames, evalNames, sampleNames); + HashMap> vcs = getVariantContexts(tracker, ref, compNames, evalNames); for ( String compName : compNames ) { VariantContext comp = vcs.containsKey(compName) && vcs.get(compName) != null && vcs.get(compName).containsKey(ALL_SAMPLE_NAME) ? vcs.get(compName).get(ALL_SAMPLE_NAME) : null; for ( String evalName : evalNames ) { - for ( String sampleName : sampleNames ) { + for ( String sampleName : sampleNamesForStratification ) { VariantContext eval = vcs.containsKey(evalName) && vcs.get(evalName) != null ? vcs.get(evalName).get(sampleName) : null; HashMap> stateMap = new HashMap>(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java index 0097395ff..97e721a4b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java @@ -76,10 +76,13 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { } public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - boolean expectingIndels = false; + //boolean expectingIndels = false; - boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ; - boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ; + //boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ; + //boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ; + + boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP() ; + boolean evalIsGood = eval != null && eval.isSNP() ; if (compIsGood) nCompSNPs++; // count the number of comp events if (evalIsGood) nEvalSNPs++; // count the number of eval events @@ -87,8 +90,9 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { if (compIsGood && evalIsGood) { nSNPsAtComp++; - if (!discordantP(eval, comp)) // count whether we're concordant or not with the comp value + if (!discordantP(eval, comp)) { // count whether we're concordant or not with the comp value nConcordant++; + } } return null; // we don't capture any interesting sites diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java index d30484f6d..6820eddb8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java @@ -15,7 +15,6 @@ public class Sample extends VariantStratifier { @Override public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { samples = new ArrayList(); - samples.add(VariantEvalWalker.ALL_SAMPLE_NAME); samples.addAll(sampleNames); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java index 3d76ffb4a..4faa9c42f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java @@ -52,7 +52,7 @@ public class NewEvaluationContext extends HashMap { public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) { for ( VariantEvaluator evaluation : evaluationInstances.values() ) { - //synchronized ( evaluation ) { + //synchronized ( this ) { // we always call update0 in case the evaluation tracks things like number of bases covered //evaluation.update0(tracker, ref, context); @@ -69,9 +69,9 @@ public class NewEvaluationContext extends HashMap { break; case 2: - if (eval != null) { + //if (eval != null) { evaluation.update2(eval, comp, tracker, ref, context); - } + //} break; default: @@ -82,8 +82,10 @@ public class NewEvaluationContext extends HashMap { } public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - for ( VariantEvaluator evaluation : evaluationInstances.values() ) { - evaluation.update0(tracker, ref, context); - } + //synchronized (this) { + for ( VariantEvaluator evaluation : evaluationInstances.values() ) { + evaluation.update0(tracker, ref, context); + } + //} } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index bad14a1b7..840accea8 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -29,7 +29,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000"; for (String tests : testsEnumerations) { WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s", - 1, Arrays.asList("8ddc1c4a86cb3f4c22346497785b23e3")); + 1, Arrays.asList("0087b2a096aa99135b065aa9a0fff34c")); //executeTestParallel("testSelect1", spec); executeTest("testSelect1", spec); } @@ -50,7 +50,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { for (String vcfFile : vcfFiles) { WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B:eval,VCF " + validationDataLocation + vcfFile + " -B:comp,VCF " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", 1, - Arrays.asList("7a6754176b573d14b6be7c808a04929d")); + Arrays.asList("bb16335f9510bcab2bd14a4299afd879")); //executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); executeTest("testVEGenotypeConcordance" + vcfFile, spec); } @@ -60,8 +60,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testVESimple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "ffd1abed44faf1590d9026e478b2f8ee"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "32c2411fbf58bae5750c8229d15b98eb"); + expectations.put("-L 1:1-10,000,000", "e46e8e7457b338c4cfec62ee7aa51ffe"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "a0554ca0baa097a1761da3f7e8487833"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -84,10 +84,10 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:comp_hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String matchingMD5 = "6c2fa6573cc57ef8795e9cce2b654d0b"; + String matchingMD5 = "5050011ad00b859faf2be679830bec90"; expectations.put("", matchingMD5); expectations.put(" -knownName comp_hapmap -knownName dbsnp", matchingMD5); - expectations.put(" -knownName comp_hapmap", "6c2fa6573cc57ef8795e9cce2b654d0b"); + expectations.put(" -knownName comp_hapmap", "5050011ad00b859faf2be679830bec90"); for (String tests : testsEnumerations) { for (Map.Entry entry : expectations.entrySet()) { String extraArgs2 = entry.getKey(); @@ -133,7 +133,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -EV GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("929e4ec46fb6957c29803531322bb35e")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("113228ffa35e0f67b8e067860c04171f")); //executeTestParallel("testACDiscordanceAtAC1EvalAC2Comp",spec); executeTest("testCompVsEvalAC",spec); } @@ -150,6 +150,14 @@ public class VariantEvalIntegrationTest extends WalkerTest { executeTest("testTranches",spec); } + @Test + public void testCompOverlap() { + String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("81377be26bf8fa32339d01c173428f7d")); + //executeTestParallel("testTranches",spec); + executeTest("testCompOverlap",spec); + } + // @Test // public void testVEValidatePass() { // String extraArgs = "-L 1:1-10,000,000";