Uncovered fundamental accounting bug in VariantEval. Will be fixed by dev. team
Problem is that Novelty sees multiple records at a site (SNP, INDEL) to calculate whether a site is novel, but VariantEvalWalker makes an arbitrary decision which to use for analysis and CompOverlap may not see a comp record of the same type as eval. So you get lines where the stratification is known but there are 10 novel sites!
This commit is contained in:
parent
f7a126722b
commit
7c5c656b46
|
|
@ -299,8 +299,9 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||||
String value = getArgumentValue( defaultDefinition, matches );
|
String value = getArgumentValue( defaultDefinition, matches );
|
||||||
try {
|
try {
|
||||||
Constructor ctor = type.getConstructor(String.class, String.class, ParsingEngine.class);
|
// TODO: determine type of internal value via Parameter
|
||||||
RodBinding result = (RodBinding)ctor.newInstance(source.field.getName(), value, parsingEngine);
|
Constructor ctor = type.getConstructor(Class.class, String.class, String.class, ParsingEngine.class);
|
||||||
|
RodBinding result = (RodBinding)ctor.newInstance(null, source.field.getName(), value, parsingEngine);
|
||||||
Tags tags = getArgumentTags(matches);
|
Tags tags = getArgumentTags(matches);
|
||||||
parsingEngine.addTags(result,tags);
|
parsingEngine.addTags(result,tags);
|
||||||
return result;
|
return result;
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.commandline;
|
package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||||
|
|
||||||
|
|
@ -34,12 +35,14 @@ import java.util.List;
|
||||||
*
|
*
|
||||||
* There is no constraint on the type of the ROD bound.
|
* There is no constraint on the type of the ROD bound.
|
||||||
*/
|
*/
|
||||||
public class RodBinding {
|
public class RodBinding<T extends Feature> {
|
||||||
final String variableName;
|
final String variableName;
|
||||||
final String source;
|
final String source;
|
||||||
final ParsingEngine parser;
|
final ParsingEngine parser;
|
||||||
|
final Class<T> type;
|
||||||
|
|
||||||
protected RodBinding(final String variableName, final String source, final ParsingEngine parser) {
|
protected RodBinding(Class<T> type, final String variableName, final String source, final ParsingEngine parser) {
|
||||||
|
this.type = type;
|
||||||
this.variableName = variableName;
|
this.variableName = variableName;
|
||||||
this.source = source;
|
this.source = source;
|
||||||
this.parser = parser;
|
this.parser = parser;
|
||||||
|
|
@ -53,16 +56,16 @@ public class RodBinding {
|
||||||
return source;
|
return source;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Object> getValues(final RefMetaDataTracker tracker) {
|
public List<T> getValues(final RefMetaDataTracker tracker) {
|
||||||
return tracker.getValues(variableName);
|
return tracker.getValues(variableName, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
public <T> List<T> getValues(final RefMetaDataTracker tracker, final Class<T> clazz) {
|
// public <T> List<T> getValues(final RefMetaDataTracker tracker, final Class<T> clazz) {
|
||||||
return tracker.getValues(variableName, clazz);
|
// return tracker.getValues(variableName, clazz);
|
||||||
}
|
// }
|
||||||
|
|
||||||
public <T> T getFirstValue(final RefMetaDataTracker tracker, final Class<T> clazz) {
|
public T getFirstValue(final RefMetaDataTracker tracker) {
|
||||||
return tracker.getFirstValue(variableName, clazz);
|
return tracker.getFirstValue(variableName, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasValues(final RefMetaDataTracker tracker) {
|
public boolean hasValues(final RefMetaDataTracker tracker) {
|
||||||
|
|
@ -80,5 +83,4 @@ public class RodBinding {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("(RodBinding name=%s source=%s)", getVariableName(), getSource());
|
return String.format("(RodBinding name=%s source=%s)", getVariableName(), getSource());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,82 +1,82 @@
|
||||||
/*
|
///*
|
||||||
* Copyright (c) 2011, The Broad Institute
|
// * Copyright (c) 2011, The Broad Institute
|
||||||
*
|
// *
|
||||||
* Permission is hereby granted, free of charge, to any person
|
// * Permission is hereby granted, free of charge, to any person
|
||||||
* obtaining a copy of this software and associated documentation
|
// * obtaining a copy of this software and associated documentation
|
||||||
* files (the "Software"), to deal in the Software without
|
// * files (the "Software"), to deal in the Software without
|
||||||
* restriction, including without limitation the rights to use,
|
// * restriction, including without limitation the rights to use,
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
// * copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
* copies of the Software, and to permit persons to whom the
|
// * copies of the Software, and to permit persons to whom the
|
||||||
* Software is furnished to do so, subject to the following
|
// * Software is furnished to do so, subject to the following
|
||||||
* conditions:
|
// * conditions:
|
||||||
*
|
// *
|
||||||
* The above copyright notice and this permission notice shall be
|
// * The above copyright notice and this permission notice shall be
|
||||||
* included in all copies or substantial portions of the Software.
|
// * included in all copies or substantial portions of the Software.
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
// * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
// * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
// * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
// * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
// * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
// * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
// * OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
// */
|
||||||
|
//
|
||||||
package org.broadinstitute.sting.commandline;
|
//package org.broadinstitute.sting.commandline;
|
||||||
|
//
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
//import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
//import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
//import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
//import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
//
|
||||||
import java.util.Collection;
|
//import java.util.Collection;
|
||||||
|
//
|
||||||
/**
|
///**
|
||||||
* A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts
|
// * A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts
|
||||||
*/
|
// */
|
||||||
public class VariantContextRodBinding extends RodBinding {
|
//public class VariantContextRodBinding extends RodBinding {
|
||||||
/**
|
// /**
|
||||||
* Create a new RodBinding specialized to provide VariantContexts.
|
// * Create a new RodBinding specialized to provide VariantContexts.
|
||||||
* @param variableName the name of the field in the walker that we will bind the ROD track too
|
// * @param variableName the name of the field in the walker that we will bind the ROD track too
|
||||||
* @param sourceFile the data source from which we will read the VCs
|
// * @param sourceFile the data source from which we will read the VCs
|
||||||
* @param parser the Engine parser used to obtain information about this argument, such as its underlying file type
|
// * @param parser the Engine parser used to obtain information about this argument, such as its underlying file type
|
||||||
*/
|
// */
|
||||||
protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) {
|
// protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) {
|
||||||
super(variableName, sourceFile, parser);
|
// super(variableName, sourceFile, parser);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
/**
|
// /**
|
||||||
* Forwarding method to identical tracker method
|
// * Forwarding method to identical tracker method
|
||||||
*/
|
// */
|
||||||
public Collection<VariantContext> getVariantContexts(final RefMetaDataTracker tracker,
|
// public Collection<VariantContext> getVariantContexts(final RefMetaDataTracker tracker,
|
||||||
final ReferenceContext ref,
|
// final ReferenceContext ref,
|
||||||
final GenomeLoc curLocation,
|
// final GenomeLoc curLocation,
|
||||||
final boolean requireStartHere,
|
// final boolean requireStartHere,
|
||||||
final boolean takeFirstOnly ) {
|
// final boolean takeFirstOnly ) {
|
||||||
return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly);
|
// return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
/**
|
// /**
|
||||||
* Forwarding method to identical tracker method
|
// * Forwarding method to identical tracker method
|
||||||
* @param tracker
|
// * @param tracker
|
||||||
* @param ref
|
// * @param ref
|
||||||
* @param curLocation
|
// * @param curLocation
|
||||||
* @param requireStartHere
|
// * @param requireStartHere
|
||||||
* @return
|
// * @return
|
||||||
*/
|
// */
|
||||||
public VariantContext getVariantContext(final RefMetaDataTracker tracker,
|
// public VariantContext getVariantContext(final RefMetaDataTracker tracker,
|
||||||
final ReferenceContext ref,
|
// final ReferenceContext ref,
|
||||||
final GenomeLoc curLocation,
|
// final GenomeLoc curLocation,
|
||||||
final boolean requireStartHere ) {
|
// final boolean requireStartHere ) {
|
||||||
return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere);
|
// return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
/**
|
// /**
|
||||||
* Forwarding method to identical tracker method
|
// * Forwarding method to identical tracker method
|
||||||
*/
|
// */
|
||||||
public VariantContext getVariantContext(final RefMetaDataTracker tracker,
|
// public VariantContext getVariantContext(final RefMetaDataTracker tracker,
|
||||||
final ReferenceContext ref,
|
// final ReferenceContext ref,
|
||||||
final GenomeLoc curLocation) {
|
// final GenomeLoc curLocation) {
|
||||||
return tracker.getVariantContext(ref, variableName, curLocation);
|
// return tracker.getVariantContext(ref, variableName, curLocation);
|
||||||
}
|
// }
|
||||||
}
|
//}
|
||||||
|
|
|
||||||
|
|
@ -76,9 +76,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
||||||
|
|
||||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
boolean evalIsGood = eval != null && eval.isVariant();
|
boolean evalIsGood = eval != null && eval.isVariant();
|
||||||
boolean expectingIndels = eval != null && eval.isIndel();
|
boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
|
||||||
|
|
||||||
boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
|
|
||||||
|
|
||||||
if (compIsGood) nCompVariants++; // count the number of comp events
|
if (compIsGood) nCompVariants++; // count the number of comp events
|
||||||
if (evalIsGood) nEvalVariants++; // count the number of eval events
|
if (evalIsGood) nEvalVariants++; // count the number of eval events
|
||||||
|
|
|
||||||
|
|
@ -5,24 +5,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.EnumSet;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
public class Novelty extends VariantStratifier implements StandardStratification {
|
public class Novelty extends VariantStratifier implements StandardStratification {
|
||||||
// needs the variant contexts and known names
|
// needs the variant contexts and known names
|
||||||
private Set<String> knownNames;
|
private Set<String> knownNames;
|
||||||
private ArrayList<String> states;
|
final private ArrayList<String> states = new ArrayList<String>(Arrays.asList("all", "known", "novel"));
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||||
this.knownNames = knownNames;
|
this.knownNames = knownNames;
|
||||||
|
|
||||||
states = new ArrayList<String>();
|
|
||||||
states.add("all");
|
|
||||||
states.add("known");
|
|
||||||
states.add("novel");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public ArrayList<String> getAllStates() {
|
public ArrayList<String> getAllStates() {
|
||||||
|
|
@ -30,32 +23,18 @@ public class Novelty extends VariantStratifier implements StandardStratification
|
||||||
}
|
}
|
||||||
|
|
||||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||||
boolean isNovel = true;
|
if (tracker != null && eval != null) {
|
||||||
|
for (final String knownName : knownNames) {
|
||||||
if (tracker != null) {
|
final Collection<VariantContext> knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, false);
|
||||||
for (String knownName : knownNames) {
|
for ( final VariantContext c : knownComps ) {
|
||||||
if (tracker.hasValues(knownName)) {
|
// loop over sites, looking for something that matches the type eval
|
||||||
EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION);
|
if ( eval.getType() == c.getType() ) {
|
||||||
if (eval != null) {
|
return new ArrayList<String>(Arrays.asList("all", "known"));
|
||||||
allowableTypes.add(eval.getType());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Collection<VariantContext> knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, true);
|
|
||||||
for ( VariantContext c : knownComps )
|
|
||||||
if ( allowableTypes.contains(c.getType()) ) {
|
|
||||||
isNovel = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
return new ArrayList<String>(Arrays.asList("all", "novel"));
|
||||||
relevantStates.add("all");
|
|
||||||
relevantStates.add(isNovel ? "novel" : "known");
|
|
||||||
|
|
||||||
return relevantStates;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -249,7 +249,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
String extraArgs = "-L 1:1-10,000,000";
|
String extraArgs = "-L 1:1-10,000,000";
|
||||||
for (String tests : testsEnumerations) {
|
for (String tests : testsEnumerations) {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
|
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
|
||||||
1, Arrays.asList("cdbe47ea01b9dd79ff1c5ce6f5fa8bec"));
|
1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42"));
|
||||||
executeTestParallel("testSelect1", spec);
|
executeTestParallel("testSelect1", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -299,7 +299,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
" -D " + dbsnp +
|
" -D " + dbsnp +
|
||||||
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||||
" -noST -ST Novelty -o %s";
|
" -noST -ST Novelty -o %s";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5b1fc9a4066aca61f1b5f7b933ad37d9"));
|
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738"));
|
||||||
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
|
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -313,7 +313,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||||
" -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
|
" -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
|
||||||
" -noST -ST Novelty -o %s";
|
" -noST -ST Novelty -o %s";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6d902d9d4d8fef5219a43e416a51cee6"));
|
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c"));
|
||||||
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
|
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -330,13 +330,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
" -noST -noEV -ST Novelty -EV CompOverlap" +
|
" -noST -noEV -ST Novelty -EV CompOverlap" +
|
||||||
" -o %s";
|
" -o %s";
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("55a1c53bced20701c56accfc3eb782a7"));
|
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1"));
|
||||||
executeTestParallel("testMultipleCompTracks",spec);
|
executeTestParallel("testMultipleCompTracks",spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPerSampleAndSubsettedSampleHaveSameResults() {
|
public void testPerSampleAndSubsettedSampleHaveSameResults() {
|
||||||
String md5 = "454a1750fd36525f24172b21af5f49de";
|
String md5 = "9d61f6e2c8592dcf616712a2c587b2af";
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
buildCommandLine(
|
buildCommandLine(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue