From 7c5c656b46a56cd7d968c830cb0d412c74111322 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 28 Jul 2011 14:19:27 -0400 Subject: [PATCH] Uncovered fundamental accounting bug in VariantEval. Will be fixed by dev. team Problem is that Novelty sees multiple records at a site (SNP, INDEL) to calculate whether a site is novel, but VariantEvalWalker makes an arbitrary decision which to use for analysis and CompOverlap may not see a comp record of the same type as eval. So you get lines where the stratification is known but there are 10 novel sites! --- .../commandline/ArgumentTypeDescriptor.java | 5 +- .../sting/commandline/RodBinding.java | 22 +-- .../commandline/VariantContextRodBinding.java | 164 +++++++++--------- .../varianteval/evaluators/CompOverlap.java | 4 +- .../varianteval/stratifications/Novelty.java | 43 ++--- .../VariantEvalIntegrationTest.java | 10 +- 6 files changed, 114 insertions(+), 134 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 9b751cc3a..2e5cb4d62 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -299,8 +299,9 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); String value = getArgumentValue( defaultDefinition, matches ); try { - Constructor ctor = type.getConstructor(String.class, String.class, ParsingEngine.class); - RodBinding result = (RodBinding)ctor.newInstance(source.field.getName(), value, parsingEngine); + // TODO: determine type of internal value via Parameter + Constructor ctor = type.getConstructor(Class.class, String.class, String.class, ParsingEngine.class); + RodBinding result = (RodBinding)ctor.newInstance(null, source.field.getName(), value, parsingEngine); Tags tags = getArgumentTags(matches); parsingEngine.addTags(result,tags); return result; diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index ec2117127..d7d086824 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.commandline; +import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; @@ -34,12 +35,14 @@ import java.util.List; * * There is no constraint on the type of the ROD bound. */ -public class RodBinding { +public class RodBinding { final String variableName; final String source; final ParsingEngine parser; + final Class type; - protected RodBinding(final String variableName, final String source, final ParsingEngine parser) { + protected RodBinding(Class type, final String variableName, final String source, final ParsingEngine parser) { + this.type = type; this.variableName = variableName; this.source = source; this.parser = parser; @@ -53,16 +56,16 @@ public class RodBinding { return source; } - public List getValues(final RefMetaDataTracker tracker) { - return tracker.getValues(variableName); + public List getValues(final RefMetaDataTracker tracker) { + return tracker.getValues(variableName, type); } - public List getValues(final RefMetaDataTracker tracker, final Class clazz) { - return tracker.getValues(variableName, clazz); - } +// public List getValues(final RefMetaDataTracker tracker, final Class clazz) { +// return tracker.getValues(variableName, clazz); +// } - public T getFirstValue(final RefMetaDataTracker tracker, final Class clazz) { - return tracker.getFirstValue(variableName, clazz); + public T getFirstValue(final RefMetaDataTracker tracker) { + return tracker.getFirstValue(variableName, type); } public boolean hasValues(final RefMetaDataTracker tracker) { @@ -80,5 +83,4 @@ public class RodBinding { public String toString() { return String.format("(RodBinding name=%s source=%s)", getVariableName(), getSource()); } - } diff --git a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java index f5e29986e..a01149cb0 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java @@ -1,82 +1,82 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.commandline; - -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.Collection; - -/** - * A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts - */ -public class VariantContextRodBinding extends RodBinding { - /** - * Create a new RodBinding specialized to provide VariantContexts. - * @param variableName the name of the field in the walker that we will bind the ROD track too - * @param sourceFile the data source from which we will read the VCs - * @param parser the Engine parser used to obtain information about this argument, such as its underlying file type - */ - protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) { - super(variableName, sourceFile, parser); - } - - /** - * Forwarding method to identical tracker method - */ - public Collection getVariantContexts(final RefMetaDataTracker tracker, - final ReferenceContext ref, - final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly ) { - return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly); - } - - /** - * Forwarding method to identical tracker method - * @param tracker - * @param ref - * @param curLocation - * @param requireStartHere - * @return - */ - public VariantContext getVariantContext(final RefMetaDataTracker tracker, - final ReferenceContext ref, - final GenomeLoc curLocation, - final boolean requireStartHere ) { - return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere); - } - - /** - * Forwarding method to identical tracker method - */ - public VariantContext getVariantContext(final RefMetaDataTracker tracker, - final ReferenceContext ref, - final GenomeLoc curLocation) { - return tracker.getVariantContext(ref, variableName, curLocation); - } -} +///* +// * Copyright (c) 2011, The Broad Institute +// * +// * Permission is hereby granted, free of charge, to any person +// * obtaining a copy of this software and associated documentation +// * files (the "Software"), to deal in the Software without +// * restriction, including without limitation the rights to use, +// * copy, modify, merge, publish, distribute, sublicense, and/or sell +// * copies of the Software, and to permit persons to whom the +// * Software is furnished to do so, subject to the following +// * conditions: +// * +// * The above copyright notice and this permission notice shall be +// * included in all copies or substantial portions of the Software. +// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// * OTHER DEALINGS IN THE SOFTWARE. +// */ +// +//package org.broadinstitute.sting.commandline; +// +//import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +//import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +//import org.broadinstitute.sting.utils.GenomeLoc; +//import org.broadinstitute.sting.utils.variantcontext.VariantContext; +// +//import java.util.Collection; +// +///** +// * A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts +// */ +//public class VariantContextRodBinding extends RodBinding { +// /** +// * Create a new RodBinding specialized to provide VariantContexts. +// * @param variableName the name of the field in the walker that we will bind the ROD track too +// * @param sourceFile the data source from which we will read the VCs +// * @param parser the Engine parser used to obtain information about this argument, such as its underlying file type +// */ +// protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) { +// super(variableName, sourceFile, parser); +// } +// +// /** +// * Forwarding method to identical tracker method +// */ +// public Collection getVariantContexts(final RefMetaDataTracker tracker, +// final ReferenceContext ref, +// final GenomeLoc curLocation, +// final boolean requireStartHere, +// final boolean takeFirstOnly ) { +// return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly); +// } +// +// /** +// * Forwarding method to identical tracker method +// * @param tracker +// * @param ref +// * @param curLocation +// * @param requireStartHere +// * @return +// */ +// public VariantContext getVariantContext(final RefMetaDataTracker tracker, +// final ReferenceContext ref, +// final GenomeLoc curLocation, +// final boolean requireStartHere ) { +// return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere); +// } +// +// /** +// * Forwarding method to identical tracker method +// */ +// public VariantContext getVariantContext(final RefMetaDataTracker tracker, +// final ReferenceContext ref, +// final GenomeLoc curLocation) { +// return tracker.getVariantContext(ref, variableName, curLocation); +// } +//} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java index 255a54737..2ea64c49c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java @@ -76,9 +76,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { boolean evalIsGood = eval != null && eval.isVariant(); - boolean expectingIndels = eval != null && eval.isIndel(); - - boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ; + boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType()); if (compIsGood) nCompVariants++; // count the number of comp events if (evalIsGood) nEvalVariants++; // count the number of eval events diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index c1611649f..5bdec837e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -5,24 +5,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.ArrayList; -import java.util.Collection; -import java.util.EnumSet; -import java.util.Set; +import java.util.*; public class Novelty extends VariantStratifier implements StandardStratification { // needs the variant contexts and known names private Set knownNames; - private ArrayList states; + final private ArrayList states = new ArrayList(Arrays.asList("all", "known", "novel")); + @Override public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { this.knownNames = knownNames; - - states = new ArrayList(); - states.add("all"); - states.add("known"); - states.add("novel"); } public ArrayList getAllStates() { @@ -30,32 +23,18 @@ public class Novelty extends VariantStratifier implements StandardStratification } public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { - boolean isNovel = true; - - if (tracker != null) { - for (String knownName : knownNames) { - if (tracker.hasValues(knownName)) { - EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION); - if (eval != null) { - allowableTypes.add(eval.getType()); + if (tracker != null && eval != null) { + for (final String knownName : knownNames) { + final Collection knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, false); + for ( final VariantContext c : knownComps ) { + // loop over sites, looking for something that matches the type eval + if ( eval.getType() == c.getType() ) { + return new ArrayList(Arrays.asList("all", "known")); } - - Collection knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, true); - for ( VariantContext c : knownComps ) - if ( allowableTypes.contains(c.getType()) ) { - isNovel = false; - break; - } - - break; } } } - ArrayList relevantStates = new ArrayList(); - relevantStates.add("all"); - relevantStates.add(isNovel ? "novel" : "known"); - - return relevantStates; + return new ArrayList(Arrays.asList("all", "novel")); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 23c606ad0..38663ad42 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -249,7 +249,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000"; for (String tests : testsEnumerations) { WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("cdbe47ea01b9dd79ff1c5ce6f5fa8bec")); + 1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42")); executeTestParallel("testSelect1", spec); } } @@ -299,7 +299,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -D " + dbsnp + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5b1fc9a4066aca61f1b5f7b933ad37d9")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -313,7 +313,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6d902d9d4d8fef5219a43e416a51cee6")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -330,13 +330,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -noST -noEV -ST Novelty -EV CompOverlap" + " -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("55a1c53bced20701c56accfc3eb782a7")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1")); executeTestParallel("testMultipleCompTracks",spec); } @Test public void testPerSampleAndSubsettedSampleHaveSameResults() { - String md5 = "454a1750fd36525f24172b21af5f49de"; + String md5 = "9d61f6e2c8592dcf616712a2c587b2af"; WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine(