Uncovered fundamental accounting bug in VariantEval. Will be fixed by dev. team
Problem is that Novelty sees multiple records at a site (SNP, INDEL) to calculate whether a site is novel, but VariantEvalWalker makes an arbitrary decision which to use for analysis and CompOverlap may not see a comp record of the same type as eval. So you get lines where the stratification is known but there are 10 novel sites!
This commit is contained in:
parent
f7a126722b
commit
7c5c656b46
|
|
@ -299,8 +299,9 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||
String value = getArgumentValue( defaultDefinition, matches );
|
||||
try {
|
||||
Constructor ctor = type.getConstructor(String.class, String.class, ParsingEngine.class);
|
||||
RodBinding result = (RodBinding)ctor.newInstance(source.field.getName(), value, parsingEngine);
|
||||
// TODO: determine type of internal value via Parameter
|
||||
Constructor ctor = type.getConstructor(Class.class, String.class, String.class, ParsingEngine.class);
|
||||
RodBinding result = (RodBinding)ctor.newInstance(null, source.field.getName(), value, parsingEngine);
|
||||
Tags tags = getArgumentTags(matches);
|
||||
parsingEngine.addTags(result,tags);
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
|
||||
|
|
@ -34,12 +35,14 @@ import java.util.List;
|
|||
*
|
||||
* There is no constraint on the type of the ROD bound.
|
||||
*/
|
||||
public class RodBinding {
|
||||
public class RodBinding<T extends Feature> {
|
||||
final String variableName;
|
||||
final String source;
|
||||
final ParsingEngine parser;
|
||||
final Class<T> type;
|
||||
|
||||
protected RodBinding(final String variableName, final String source, final ParsingEngine parser) {
|
||||
protected RodBinding(Class<T> type, final String variableName, final String source, final ParsingEngine parser) {
|
||||
this.type = type;
|
||||
this.variableName = variableName;
|
||||
this.source = source;
|
||||
this.parser = parser;
|
||||
|
|
@ -53,16 +56,16 @@ public class RodBinding {
|
|||
return source;
|
||||
}
|
||||
|
||||
public List<Object> getValues(final RefMetaDataTracker tracker) {
|
||||
return tracker.getValues(variableName);
|
||||
public List<T> getValues(final RefMetaDataTracker tracker) {
|
||||
return tracker.getValues(variableName, type);
|
||||
}
|
||||
|
||||
public <T> List<T> getValues(final RefMetaDataTracker tracker, final Class<T> clazz) {
|
||||
return tracker.getValues(variableName, clazz);
|
||||
}
|
||||
// public <T> List<T> getValues(final RefMetaDataTracker tracker, final Class<T> clazz) {
|
||||
// return tracker.getValues(variableName, clazz);
|
||||
// }
|
||||
|
||||
public <T> T getFirstValue(final RefMetaDataTracker tracker, final Class<T> clazz) {
|
||||
return tracker.getFirstValue(variableName, clazz);
|
||||
public T getFirstValue(final RefMetaDataTracker tracker) {
|
||||
return tracker.getFirstValue(variableName, type);
|
||||
}
|
||||
|
||||
public boolean hasValues(final RefMetaDataTracker tracker) {
|
||||
|
|
@ -80,5 +83,4 @@ public class RodBinding {
|
|||
public String toString() {
|
||||
return String.format("(RodBinding name=%s source=%s)", getVariableName(), getSource());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,82 +1,82 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts
|
||||
*/
|
||||
public class VariantContextRodBinding extends RodBinding {
|
||||
/**
|
||||
* Create a new RodBinding specialized to provide VariantContexts.
|
||||
* @param variableName the name of the field in the walker that we will bind the ROD track too
|
||||
* @param sourceFile the data source from which we will read the VCs
|
||||
* @param parser the Engine parser used to obtain information about this argument, such as its underlying file type
|
||||
*/
|
||||
protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) {
|
||||
super(variableName, sourceFile, parser);
|
||||
}
|
||||
|
||||
/**
|
||||
* Forwarding method to identical tracker method
|
||||
*/
|
||||
public Collection<VariantContext> getVariantContexts(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final GenomeLoc curLocation,
|
||||
final boolean requireStartHere,
|
||||
final boolean takeFirstOnly ) {
|
||||
return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly);
|
||||
}
|
||||
|
||||
/**
|
||||
* Forwarding method to identical tracker method
|
||||
* @param tracker
|
||||
* @param ref
|
||||
* @param curLocation
|
||||
* @param requireStartHere
|
||||
* @return
|
||||
*/
|
||||
public VariantContext getVariantContext(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final GenomeLoc curLocation,
|
||||
final boolean requireStartHere ) {
|
||||
return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere);
|
||||
}
|
||||
|
||||
/**
|
||||
* Forwarding method to identical tracker method
|
||||
*/
|
||||
public VariantContext getVariantContext(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final GenomeLoc curLocation) {
|
||||
return tracker.getVariantContext(ref, variableName, curLocation);
|
||||
}
|
||||
}
|
||||
///*
|
||||
// * Copyright (c) 2011, The Broad Institute
|
||||
// *
|
||||
// * Permission is hereby granted, free of charge, to any person
|
||||
// * obtaining a copy of this software and associated documentation
|
||||
// * files (the "Software"), to deal in the Software without
|
||||
// * restriction, including without limitation the rights to use,
|
||||
// * copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// * copies of the Software, and to permit persons to whom the
|
||||
// * Software is furnished to do so, subject to the following
|
||||
// * conditions:
|
||||
// *
|
||||
// * The above copyright notice and this permission notice shall be
|
||||
// * included in all copies or substantial portions of the Software.
|
||||
// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// * OTHER DEALINGS IN THE SOFTWARE.
|
||||
// */
|
||||
//
|
||||
//package org.broadinstitute.sting.commandline;
|
||||
//
|
||||
//import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
//import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
//import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
//import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
//
|
||||
//import java.util.Collection;
|
||||
//
|
||||
///**
|
||||
// * A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts
|
||||
// */
|
||||
//public class VariantContextRodBinding extends RodBinding {
|
||||
// /**
|
||||
// * Create a new RodBinding specialized to provide VariantContexts.
|
||||
// * @param variableName the name of the field in the walker that we will bind the ROD track too
|
||||
// * @param sourceFile the data source from which we will read the VCs
|
||||
// * @param parser the Engine parser used to obtain information about this argument, such as its underlying file type
|
||||
// */
|
||||
// protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) {
|
||||
// super(variableName, sourceFile, parser);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Forwarding method to identical tracker method
|
||||
// */
|
||||
// public Collection<VariantContext> getVariantContexts(final RefMetaDataTracker tracker,
|
||||
// final ReferenceContext ref,
|
||||
// final GenomeLoc curLocation,
|
||||
// final boolean requireStartHere,
|
||||
// final boolean takeFirstOnly ) {
|
||||
// return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Forwarding method to identical tracker method
|
||||
// * @param tracker
|
||||
// * @param ref
|
||||
// * @param curLocation
|
||||
// * @param requireStartHere
|
||||
// * @return
|
||||
// */
|
||||
// public VariantContext getVariantContext(final RefMetaDataTracker tracker,
|
||||
// final ReferenceContext ref,
|
||||
// final GenomeLoc curLocation,
|
||||
// final boolean requireStartHere ) {
|
||||
// return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Forwarding method to identical tracker method
|
||||
// */
|
||||
// public VariantContext getVariantContext(final RefMetaDataTracker tracker,
|
||||
// final ReferenceContext ref,
|
||||
// final GenomeLoc curLocation) {
|
||||
// return tracker.getVariantContext(ref, variableName, curLocation);
|
||||
// }
|
||||
//}
|
||||
|
|
|
|||
|
|
@ -76,9 +76,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
|||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean evalIsGood = eval != null && eval.isVariant();
|
||||
boolean expectingIndels = eval != null && eval.isIndel();
|
||||
|
||||
boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
|
||||
boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
|
||||
|
||||
if (compIsGood) nCompVariants++; // count the number of comp events
|
||||
if (evalIsGood) nEvalVariants++; // count the number of eval events
|
||||
|
|
|
|||
|
|
@ -5,24 +5,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Set;
|
||||
import java.util.*;
|
||||
|
||||
public class Novelty extends VariantStratifier implements StandardStratification {
|
||||
// needs the variant contexts and known names
|
||||
private Set<String> knownNames;
|
||||
private ArrayList<String> states;
|
||||
final private ArrayList<String> states = new ArrayList<String>(Arrays.asList("all", "known", "novel"));
|
||||
|
||||
|
||||
@Override
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||
this.knownNames = knownNames;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
states.add("all");
|
||||
states.add("known");
|
||||
states.add("novel");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
|
|
@ -30,32 +23,18 @@ public class Novelty extends VariantStratifier implements StandardStratification
|
|||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
boolean isNovel = true;
|
||||
|
||||
if (tracker != null) {
|
||||
for (String knownName : knownNames) {
|
||||
if (tracker.hasValues(knownName)) {
|
||||
EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION);
|
||||
if (eval != null) {
|
||||
allowableTypes.add(eval.getType());
|
||||
if (tracker != null && eval != null) {
|
||||
for (final String knownName : knownNames) {
|
||||
final Collection<VariantContext> knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, false);
|
||||
for ( final VariantContext c : knownComps ) {
|
||||
// loop over sites, looking for something that matches the type eval
|
||||
if ( eval.getType() == c.getType() ) {
|
||||
return new ArrayList<String>(Arrays.asList("all", "known"));
|
||||
}
|
||||
|
||||
Collection<VariantContext> knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, true);
|
||||
for ( VariantContext c : knownComps )
|
||||
if ( allowableTypes.contains(c.getType()) ) {
|
||||
isNovel = false;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
relevantStates.add("all");
|
||||
relevantStates.add(isNovel ? "novel" : "known");
|
||||
|
||||
return relevantStates;
|
||||
return new ArrayList<String>(Arrays.asList("all", "novel"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -249,7 +249,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
String extraArgs = "-L 1:1-10,000,000";
|
||||
for (String tests : testsEnumerations) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
|
||||
1, Arrays.asList("cdbe47ea01b9dd79ff1c5ce6f5fa8bec"));
|
||||
1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42"));
|
||||
executeTestParallel("testSelect1", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -299,7 +299,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" -D " + dbsnp +
|
||||
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||
" -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5b1fc9a4066aca61f1b5f7b933ad37d9"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738"));
|
||||
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
|
||||
}
|
||||
|
||||
|
|
@ -313,7 +313,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||
" -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
|
||||
" -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6d902d9d4d8fef5219a43e416a51cee6"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c"));
|
||||
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
|
||||
}
|
||||
|
||||
|
|
@ -330,13 +330,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" -noST -noEV -ST Novelty -EV CompOverlap" +
|
||||
" -o %s";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("55a1c53bced20701c56accfc3eb782a7"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1"));
|
||||
executeTestParallel("testMultipleCompTracks",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPerSampleAndSubsettedSampleHaveSameResults() {
|
||||
String md5 = "454a1750fd36525f24172b21af5f49de";
|
||||
String md5 = "9d61f6e2c8592dcf616712a2c587b2af";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
|
|
|
|||
Loading…
Reference in New Issue