Uncovered fundamental accounting bug in VariantEval. Will be fixed by dev. team

Problem is that Novelty sees multiple records at a site (SNP, INDEL) to calculate whether a site is novel, but VariantEvalWalker makes an arbitrary decision which to use for analysis and CompOverlap may not see a comp record of the same type as eval.  So you get lines where the stratification is known but there are 10 novel sites!
This commit is contained in:
Mark DePristo 2011-07-28 14:19:27 -04:00
parent f7a126722b
commit 7c5c656b46
6 changed files with 114 additions and 134 deletions

View File

@ -299,8 +299,9 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
String value = getArgumentValue( defaultDefinition, matches ); String value = getArgumentValue( defaultDefinition, matches );
try { try {
Constructor ctor = type.getConstructor(String.class, String.class, ParsingEngine.class); // TODO: determine type of internal value via Parameter
RodBinding result = (RodBinding)ctor.newInstance(source.field.getName(), value, parsingEngine); Constructor ctor = type.getConstructor(Class.class, String.class, String.class, ParsingEngine.class);
RodBinding result = (RodBinding)ctor.newInstance(null, source.field.getName(), value, parsingEngine);
Tags tags = getArgumentTags(matches); Tags tags = getArgumentTags(matches);
parsingEngine.addTags(result,tags); parsingEngine.addTags(result,tags);
return result; return result;

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.commandline; package org.broadinstitute.sting.commandline;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
@ -34,12 +35,14 @@ import java.util.List;
* *
* There is no constraint on the type of the ROD bound. * There is no constraint on the type of the ROD bound.
*/ */
public class RodBinding { public class RodBinding<T extends Feature> {
final String variableName; final String variableName;
final String source; final String source;
final ParsingEngine parser; final ParsingEngine parser;
final Class<T> type;
protected RodBinding(final String variableName, final String source, final ParsingEngine parser) { protected RodBinding(Class<T> type, final String variableName, final String source, final ParsingEngine parser) {
this.type = type;
this.variableName = variableName; this.variableName = variableName;
this.source = source; this.source = source;
this.parser = parser; this.parser = parser;
@ -53,16 +56,16 @@ public class RodBinding {
return source; return source;
} }
public List<Object> getValues(final RefMetaDataTracker tracker) { public List<T> getValues(final RefMetaDataTracker tracker) {
return tracker.getValues(variableName); return tracker.getValues(variableName, type);
} }
public <T> List<T> getValues(final RefMetaDataTracker tracker, final Class<T> clazz) { // public <T> List<T> getValues(final RefMetaDataTracker tracker, final Class<T> clazz) {
return tracker.getValues(variableName, clazz); // return tracker.getValues(variableName, clazz);
} // }
public <T> T getFirstValue(final RefMetaDataTracker tracker, final Class<T> clazz) { public T getFirstValue(final RefMetaDataTracker tracker) {
return tracker.getFirstValue(variableName, clazz); return tracker.getFirstValue(variableName, type);
} }
public boolean hasValues(final RefMetaDataTracker tracker) { public boolean hasValues(final RefMetaDataTracker tracker) {
@ -80,5 +83,4 @@ public class RodBinding {
public String toString() { public String toString() {
return String.format("(RodBinding name=%s source=%s)", getVariableName(), getSource()); return String.format("(RodBinding name=%s source=%s)", getVariableName(), getSource());
} }
} }

View File

@ -1,82 +1,82 @@
/* ///*
* Copyright (c) 2011, The Broad Institute // * Copyright (c) 2011, The Broad Institute
* // *
* Permission is hereby granted, free of charge, to any person // * Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation // * obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without // * files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, // * restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell // * copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the // * copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following // * Software is furnished to do so, subject to the following
* conditions: // * conditions:
* // *
* The above copyright notice and this permission notice shall be // * The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software. // * included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE. // * OTHER DEALINGS IN THE SOFTWARE.
*/ // */
//
package org.broadinstitute.sting.commandline; //package org.broadinstitute.sting.commandline;
//
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; //import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; //import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc; //import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; //import org.broadinstitute.sting.utils.variantcontext.VariantContext;
//
import java.util.Collection; //import java.util.Collection;
//
/** ///**
* A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts // * A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts
*/ // */
public class VariantContextRodBinding extends RodBinding { //public class VariantContextRodBinding extends RodBinding {
/** // /**
* Create a new RodBinding specialized to provide VariantContexts. // * Create a new RodBinding specialized to provide VariantContexts.
* @param variableName the name of the field in the walker that we will bind the ROD track too // * @param variableName the name of the field in the walker that we will bind the ROD track too
* @param sourceFile the data source from which we will read the VCs // * @param sourceFile the data source from which we will read the VCs
* @param parser the Engine parser used to obtain information about this argument, such as its underlying file type // * @param parser the Engine parser used to obtain information about this argument, such as its underlying file type
*/ // */
protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) { // protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) {
super(variableName, sourceFile, parser); // super(variableName, sourceFile, parser);
} // }
//
/** // /**
* Forwarding method to identical tracker method // * Forwarding method to identical tracker method
*/ // */
public Collection<VariantContext> getVariantContexts(final RefMetaDataTracker tracker, // public Collection<VariantContext> getVariantContexts(final RefMetaDataTracker tracker,
final ReferenceContext ref, // final ReferenceContext ref,
final GenomeLoc curLocation, // final GenomeLoc curLocation,
final boolean requireStartHere, // final boolean requireStartHere,
final boolean takeFirstOnly ) { // final boolean takeFirstOnly ) {
return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly); // return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly);
} // }
//
/** // /**
* Forwarding method to identical tracker method // * Forwarding method to identical tracker method
* @param tracker // * @param tracker
* @param ref // * @param ref
* @param curLocation // * @param curLocation
* @param requireStartHere // * @param requireStartHere
* @return // * @return
*/ // */
public VariantContext getVariantContext(final RefMetaDataTracker tracker, // public VariantContext getVariantContext(final RefMetaDataTracker tracker,
final ReferenceContext ref, // final ReferenceContext ref,
final GenomeLoc curLocation, // final GenomeLoc curLocation,
final boolean requireStartHere ) { // final boolean requireStartHere ) {
return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere); // return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere);
} // }
//
/** // /**
* Forwarding method to identical tracker method // * Forwarding method to identical tracker method
*/ // */
public VariantContext getVariantContext(final RefMetaDataTracker tracker, // public VariantContext getVariantContext(final RefMetaDataTracker tracker,
final ReferenceContext ref, // final ReferenceContext ref,
final GenomeLoc curLocation) { // final GenomeLoc curLocation) {
return tracker.getVariantContext(ref, variableName, curLocation); // return tracker.getVariantContext(ref, variableName, curLocation);
} // }
} //}

View File

@ -76,9 +76,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean evalIsGood = eval != null && eval.isVariant(); boolean evalIsGood = eval != null && eval.isVariant();
boolean expectingIndels = eval != null && eval.isIndel(); boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
if (compIsGood) nCompVariants++; // count the number of comp events if (compIsGood) nCompVariants++; // count the number of comp events
if (evalIsGood) nEvalVariants++; // count the number of eval events if (evalIsGood) nEvalVariants++; // count the number of eval events

View File

@ -5,24 +5,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.*;
import java.util.Collection;
import java.util.EnumSet;
import java.util.Set;
public class Novelty extends VariantStratifier implements StandardStratification { public class Novelty extends VariantStratifier implements StandardStratification {
// needs the variant contexts and known names // needs the variant contexts and known names
private Set<String> knownNames; private Set<String> knownNames;
private ArrayList<String> states; final private ArrayList<String> states = new ArrayList<String>(Arrays.asList("all", "known", "novel"));
@Override @Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) { public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
this.knownNames = knownNames; this.knownNames = knownNames;
states = new ArrayList<String>();
states.add("all");
states.add("known");
states.add("novel");
} }
public ArrayList<String> getAllStates() { public ArrayList<String> getAllStates() {
@ -30,32 +23,18 @@ public class Novelty extends VariantStratifier implements StandardStratification
} }
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
boolean isNovel = true; if (tracker != null && eval != null) {
for (final String knownName : knownNames) {
if (tracker != null) { final Collection<VariantContext> knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, false);
for (String knownName : knownNames) { for ( final VariantContext c : knownComps ) {
if (tracker.hasValues(knownName)) { // loop over sites, looking for something that matches the type eval
EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION); if ( eval.getType() == c.getType() ) {
if (eval != null) { return new ArrayList<String>(Arrays.asList("all", "known"));
allowableTypes.add(eval.getType());
} }
Collection<VariantContext> knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, true);
for ( VariantContext c : knownComps )
if ( allowableTypes.contains(c.getType()) ) {
isNovel = false;
break;
}
break;
} }
} }
} }
ArrayList<String> relevantStates = new ArrayList<String>(); return new ArrayList<String>(Arrays.asList("all", "novel"));
relevantStates.add("all");
relevantStates.add(isNovel ? "novel" : "known");
return relevantStates;
} }
} }

View File

@ -249,7 +249,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
String extraArgs = "-L 1:1-10,000,000"; String extraArgs = "-L 1:1-10,000,000";
for (String tests : testsEnumerations) { for (String tests : testsEnumerations) {
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("cdbe47ea01b9dd79ff1c5ce6f5fa8bec")); 1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42"));
executeTestParallel("testSelect1", spec); executeTestParallel("testSelect1", spec);
} }
} }
@ -299,7 +299,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -D " + dbsnp + " -D " + dbsnp +
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s"; " -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5b1fc9a4066aca61f1b5f7b933ad37d9")); WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec); executeTestParallel("testEvalTrackWithoutGenotypes",spec);
} }
@ -313,7 +313,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s"; " -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6d902d9d4d8fef5219a43e416a51cee6")); WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
} }
@ -330,13 +330,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -noST -noEV -ST Novelty -EV CompOverlap" + " -noST -noEV -ST Novelty -EV CompOverlap" +
" -o %s"; " -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("55a1c53bced20701c56accfc3eb782a7")); WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1"));
executeTestParallel("testMultipleCompTracks",spec); executeTestParallel("testMultipleCompTracks",spec);
} }
@Test @Test
public void testPerSampleAndSubsettedSampleHaveSameResults() { public void testPerSampleAndSubsettedSampleHaveSameResults() {
String md5 = "454a1750fd36525f24172b21af5f49de"; String md5 = "9d61f6e2c8592dcf616712a2c587b2af";
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine( buildCommandLine(