Uncovered fundamental accounting bug in VariantEval. Will be fixed by dev. team

Problem is that Novelty sees multiple records at a site (SNP, INDEL) to calculate whether a site is novel, but VariantEvalWalker makes an arbitrary decision which to use for analysis and CompOverlap may not see a comp record of the same type as eval.  So you get lines where the stratification is known but there are 10 novel sites!
This commit is contained in:
Mark DePristo 2011-07-28 14:19:27 -04:00
parent f7a126722b
commit 7c5c656b46
6 changed files with 114 additions and 134 deletions

View File

@ -299,8 +299,9 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
String value = getArgumentValue( defaultDefinition, matches );
try {
Constructor ctor = type.getConstructor(String.class, String.class, ParsingEngine.class);
RodBinding result = (RodBinding)ctor.newInstance(source.field.getName(), value, parsingEngine);
// TODO: determine type of internal value via Parameter
Constructor ctor = type.getConstructor(Class.class, String.class, String.class, ParsingEngine.class);
RodBinding result = (RodBinding)ctor.newInstance(null, source.field.getName(), value, parsingEngine);
Tags tags = getArgumentTags(matches);
parsingEngine.addTags(result,tags);
return result;

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.commandline;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
@ -34,12 +35,14 @@ import java.util.List;
*
* There is no constraint on the type of the ROD bound.
*/
public class RodBinding {
public class RodBinding<T extends Feature> {
final String variableName;
final String source;
final ParsingEngine parser;
final Class<T> type;
protected RodBinding(final String variableName, final String source, final ParsingEngine parser) {
protected RodBinding(Class<T> type, final String variableName, final String source, final ParsingEngine parser) {
this.type = type;
this.variableName = variableName;
this.source = source;
this.parser = parser;
@ -53,16 +56,16 @@ public class RodBinding {
return source;
}
public List<Object> getValues(final RefMetaDataTracker tracker) {
return tracker.getValues(variableName);
public List<T> getValues(final RefMetaDataTracker tracker) {
return tracker.getValues(variableName, type);
}
public <T> List<T> getValues(final RefMetaDataTracker tracker, final Class<T> clazz) {
return tracker.getValues(variableName, clazz);
}
// public <T> List<T> getValues(final RefMetaDataTracker tracker, final Class<T> clazz) {
// return tracker.getValues(variableName, clazz);
// }
public <T> T getFirstValue(final RefMetaDataTracker tracker, final Class<T> clazz) {
return tracker.getFirstValue(variableName, clazz);
public T getFirstValue(final RefMetaDataTracker tracker) {
return tracker.getFirstValue(variableName, type);
}
public boolean hasValues(final RefMetaDataTracker tracker) {
@ -80,5 +83,4 @@ public class RodBinding {
public String toString() {
return String.format("(RodBinding name=%s source=%s)", getVariableName(), getSource());
}
}

View File

@ -1,82 +1,82 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Collection;
/**
* A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts
*/
public class VariantContextRodBinding extends RodBinding {
/**
* Create a new RodBinding specialized to provide VariantContexts.
* @param variableName the name of the field in the walker that we will bind the ROD track too
* @param sourceFile the data source from which we will read the VCs
* @param parser the Engine parser used to obtain information about this argument, such as its underlying file type
*/
protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) {
super(variableName, sourceFile, parser);
}
/**
* Forwarding method to identical tracker method
*/
public Collection<VariantContext> getVariantContexts(final RefMetaDataTracker tracker,
final ReferenceContext ref,
final GenomeLoc curLocation,
final boolean requireStartHere,
final boolean takeFirstOnly ) {
return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly);
}
/**
* Forwarding method to identical tracker method
* @param tracker
* @param ref
* @param curLocation
* @param requireStartHere
* @return
*/
public VariantContext getVariantContext(final RefMetaDataTracker tracker,
final ReferenceContext ref,
final GenomeLoc curLocation,
final boolean requireStartHere ) {
return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere);
}
/**
* Forwarding method to identical tracker method
*/
public VariantContext getVariantContext(final RefMetaDataTracker tracker,
final ReferenceContext ref,
final GenomeLoc curLocation) {
return tracker.getVariantContext(ref, variableName, curLocation);
}
}
///*
// * Copyright (c) 2011, The Broad Institute
// *
// * Permission is hereby granted, free of charge, to any person
// * obtaining a copy of this software and associated documentation
// * files (the "Software"), to deal in the Software without
// * restriction, including without limitation the rights to use,
// * copy, modify, merge, publish, distribute, sublicense, and/or sell
// * copies of the Software, and to permit persons to whom the
// * Software is furnished to do so, subject to the following
// * conditions:
// *
// * The above copyright notice and this permission notice shall be
// * included in all copies or substantial portions of the Software.
// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// * OTHER DEALINGS IN THE SOFTWARE.
// */
//
//package org.broadinstitute.sting.commandline;
//
//import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
//import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
//import org.broadinstitute.sting.utils.GenomeLoc;
//import org.broadinstitute.sting.utils.variantcontext.VariantContext;
//
//import java.util.Collection;
//
///**
// * A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts
// */
//public class VariantContextRodBinding extends RodBinding {
// /**
// * Create a new RodBinding specialized to provide VariantContexts.
// * @param variableName the name of the field in the walker that we will bind the ROD track too
// * @param sourceFile the data source from which we will read the VCs
// * @param parser the Engine parser used to obtain information about this argument, such as its underlying file type
// */
// protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) {
// super(variableName, sourceFile, parser);
// }
//
// /**
// * Forwarding method to identical tracker method
// */
// public Collection<VariantContext> getVariantContexts(final RefMetaDataTracker tracker,
// final ReferenceContext ref,
// final GenomeLoc curLocation,
// final boolean requireStartHere,
// final boolean takeFirstOnly ) {
// return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly);
// }
//
// /**
// * Forwarding method to identical tracker method
// * @param tracker
// * @param ref
// * @param curLocation
// * @param requireStartHere
// * @return
// */
// public VariantContext getVariantContext(final RefMetaDataTracker tracker,
// final ReferenceContext ref,
// final GenomeLoc curLocation,
// final boolean requireStartHere ) {
// return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere);
// }
//
// /**
// * Forwarding method to identical tracker method
// */
// public VariantContext getVariantContext(final RefMetaDataTracker tracker,
// final ReferenceContext ref,
// final GenomeLoc curLocation) {
// return tracker.getVariantContext(ref, variableName, curLocation);
// }
//}

View File

@ -76,9 +76,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean evalIsGood = eval != null && eval.isVariant();
boolean expectingIndels = eval != null && eval.isIndel();
boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
if (compIsGood) nCompVariants++; // count the number of comp events
if (evalIsGood) nEvalVariants++; // count the number of eval events

View File

@ -5,24 +5,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.Set;
import java.util.*;
public class Novelty extends VariantStratifier implements StandardStratification {
// needs the variant contexts and known names
private Set<String> knownNames;
private ArrayList<String> states;
final private ArrayList<String> states = new ArrayList<String>(Arrays.asList("all", "known", "novel"));
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
this.knownNames = knownNames;
states = new ArrayList<String>();
states.add("all");
states.add("known");
states.add("novel");
}
public ArrayList<String> getAllStates() {
@ -30,32 +23,18 @@ public class Novelty extends VariantStratifier implements StandardStratification
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
boolean isNovel = true;
if (tracker != null) {
for (String knownName : knownNames) {
if (tracker.hasValues(knownName)) {
EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION);
if (eval != null) {
allowableTypes.add(eval.getType());
if (tracker != null && eval != null) {
for (final String knownName : knownNames) {
final Collection<VariantContext> knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, false);
for ( final VariantContext c : knownComps ) {
// loop over sites, looking for something that matches the type eval
if ( eval.getType() == c.getType() ) {
return new ArrayList<String>(Arrays.asList("all", "known"));
}
Collection<VariantContext> knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, true);
for ( VariantContext c : knownComps )
if ( allowableTypes.contains(c.getType()) ) {
isNovel = false;
break;
}
break;
}
}
}
ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add("all");
relevantStates.add(isNovel ? "novel" : "known");
return relevantStates;
return new ArrayList<String>(Arrays.asList("all", "novel"));
}
}

View File

@ -249,7 +249,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
String extraArgs = "-L 1:1-10,000,000";
for (String tests : testsEnumerations) {
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("cdbe47ea01b9dd79ff1c5ce6f5fa8bec"));
1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42"));
executeTestParallel("testSelect1", spec);
}
}
@ -299,7 +299,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -D " + dbsnp +
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5b1fc9a4066aca61f1b5f7b933ad37d9"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
}
@ -313,7 +313,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6d902d9d4d8fef5219a43e416a51cee6"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
}
@ -330,13 +330,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -noST -noEV -ST Novelty -EV CompOverlap" +
" -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("55a1c53bced20701c56accfc3eb782a7"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1"));
executeTestParallel("testMultipleCompTracks",spec);
}
@Test
public void testPerSampleAndSubsettedSampleHaveSameResults() {
String md5 = "454a1750fd36525f24172b21af5f49de";
String md5 = "9d61f6e2c8592dcf616712a2c587b2af";
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(