Fixed an issue where a no-call in the eval track would prevent a site from a comparison track from being loaded. Added a new test to cover the use case.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5169 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
aab0ec209b
commit
fd8dd8fb9b
|
|
@ -100,7 +100,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
private Set<String> compNames = new TreeSet<String>();
|
||||
private Set<String> knownNames = new TreeSet<String>();
|
||||
private Set<String> evalNames = new TreeSet<String>();
|
||||
private Set<String> sampleNames = new TreeSet<String>();
|
||||
|
||||
private Set<String> sampleNamesForEvaluation = new TreeSet<String>();
|
||||
private Set<String> sampleNamesForStratification = new TreeSet<String>();
|
||||
private int numSamples = 0;
|
||||
|
||||
// The list of stratifiers and evaluators to use
|
||||
|
|
@ -186,7 +188,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
|
||||
try {
|
||||
VariantStratifier vs = c.newInstance();
|
||||
vs.initialize(jexlExpressions, compNames, knownNames, evalNames, sampleNames);
|
||||
vs.initialize(jexlExpressions, compNames, knownNames, evalNames, sampleNamesForStratification);
|
||||
|
||||
strats.add(vs);
|
||||
} catch (InstantiationException e) {
|
||||
|
|
@ -372,15 +374,14 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evalNames);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
|
||||
// If we're not using the per-sample stratification, don't bother loading the sample list
|
||||
if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) {
|
||||
sampleNames.addAll(SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS));
|
||||
numSamples = sampleNames.size();
|
||||
} else {
|
||||
numSamples = vcfSamples.size();
|
||||
}
|
||||
// Load the sample list
|
||||
sampleNamesForEvaluation.addAll(SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS));
|
||||
numSamples = sampleNamesForEvaluation.size();
|
||||
|
||||
sampleNames.add(ALL_SAMPLE_NAME);
|
||||
if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) {
|
||||
sampleNamesForStratification.addAll(sampleNamesForEvaluation);
|
||||
}
|
||||
sampleNamesForStratification.add(ALL_SAMPLE_NAME);
|
||||
|
||||
// Initialize select expressions
|
||||
jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS));
|
||||
|
|
@ -411,21 +412,30 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
*
|
||||
* @param tracker the reference metadata tracker
|
||||
* @param ref the reference context
|
||||
* @param compNames the comp track names
|
||||
* @param evalNames the evaluation track names
|
||||
* @return the set of allowable variation types
|
||||
*/
|
||||
private EnumSet<VariantContext.Type> getAllowableVariationTypes(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> evalNames) {
|
||||
private EnumSet<VariantContext.Type> getAllowableVariationTypes(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> compNames, Set<String> evalNames) {
|
||||
EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION);
|
||||
|
||||
if (tracker != null) {
|
||||
Collection<VariantContext> vcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false);
|
||||
Collection<VariantContext> evalvcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false);
|
||||
|
||||
for ( VariantContext vc : vcs ) {
|
||||
for ( VariantContext vc : evalvcs ) {
|
||||
allowableTypes.add(vc.getType());
|
||||
}
|
||||
} else {
|
||||
allowableTypes.add(VariantContext.Type.SNP);
|
||||
|
||||
if (allowableTypes.size() == 1) {
|
||||
// We didn't find any variation in the eval track, so now let's look at the comp track for allowable types
|
||||
Collection<VariantContext> compvcs = tracker.getVariantContexts(ref, compNames, null, ref.getLocus(), true, false);
|
||||
|
||||
for ( VariantContext vc : compvcs ) {
|
||||
allowableTypes.add(vc.getType());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return allowableTypes;
|
||||
}
|
||||
|
||||
|
|
@ -478,42 +488,41 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
* @param tracker the metadata tracker
|
||||
* @param ref the reference context
|
||||
* @param trackNames the list of track names to process
|
||||
* @param sampleNames the list of samples to include
|
||||
* @param allowableTypes a set of allowable variation types
|
||||
* @param byFilter if false, only accept PASSing VariantContexts. Otherwise, accept both PASSing and filtered sites
|
||||
* @param trackPerSample if false, don't stratify per sample (and don't cut up the VariantContext like we would need to do this)
|
||||
* @param allowNoCalls if false, don't accept no-call loci from a variant track
|
||||
* @return a mapping of track names to a list of VariantContext objects
|
||||
*/
|
||||
private HashMap<String, HashMap<String, VariantContext>> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> trackNames, Set<String> sampleNames, EnumSet<VariantContext.Type> allowableTypes, boolean byFilter) {
|
||||
private HashMap<String, HashMap<String, VariantContext>> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> trackNames, EnumSet<VariantContext.Type> allowableTypes, boolean byFilter, boolean trackPerSample, boolean allowNoCalls) {
|
||||
HashMap<String, HashMap<String, VariantContext>> bindings = new HashMap<String, HashMap<String, VariantContext>>();
|
||||
|
||||
for ( String trackName : trackNames ) {
|
||||
Collection<VariantContext> contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true);
|
||||
|
||||
VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null;
|
||||
|
||||
HashMap<String, VariantContext> vcs = new HashMap<String, VariantContext>();
|
||||
|
||||
Collection<VariantContext> contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true);
|
||||
VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null;
|
||||
|
||||
// First, filter the VariantContext to represent only the samples for evaluation
|
||||
if ( vc != null ) {
|
||||
ArrayList<String> sampleNamesMinusAll = new ArrayList<String>();
|
||||
VariantContext vcsub = vc;
|
||||
|
||||
for ( String sampleName : sampleNames ) {
|
||||
VariantContext vcsub = vc;
|
||||
|
||||
if (!sampleName.equals(ALL_SAMPLE_NAME)) {
|
||||
vcsub = getSubsetOfVariantContext(vc, sampleName);
|
||||
sampleNamesMinusAll.add(sampleName);
|
||||
}
|
||||
|
||||
if (byFilter || !vcsub.isFiltered()) {
|
||||
vcs.put(sampleName, vcsub);
|
||||
}
|
||||
if (vc.hasGenotypes(sampleNamesForEvaluation)) {
|
||||
vcsub = getSubsetOfVariantContext(vc, sampleNamesForEvaluation);
|
||||
}
|
||||
|
||||
if ( trackName.contains("eval") ) {
|
||||
VariantContext vcsub = (sampleNamesMinusAll.size() > 0) ? getSubsetOfVariantContext(vc, sampleNamesMinusAll) : vc;
|
||||
if ((byFilter || !vcsub.isFiltered()) && (allowNoCalls || vcsub.getType() != VariantContext.Type.NO_VARIATION)) {
|
||||
vcs.put(ALL_SAMPLE_NAME, vcsub);
|
||||
}
|
||||
|
||||
if (byFilter || !vcsub.isFiltered()) {
|
||||
vcs.put(ALL_SAMPLE_NAME, vcsub);
|
||||
// Now, if stratifying, split the subsetted vc per sample and add each as a new context
|
||||
if ( trackPerSample ) {
|
||||
for ( String sampleName : sampleNamesForEvaluation ) {
|
||||
VariantContext samplevc = getSubsetOfVariantContext(vc, sampleName);
|
||||
|
||||
if ((byFilter || !samplevc.isFiltered()) && (allowNoCalls || samplevc.getType() != VariantContext.Type.NO_VARIATION)) {
|
||||
vcs.put(sampleName, samplevc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -532,35 +541,25 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
* @param ref the reference context
|
||||
* @param compNames the list of comp names to process
|
||||
* @param evalNames the list of eval names to process
|
||||
* @param sampleNames the list of samples to include
|
||||
* @return a mapping of track names to a list of VariantContext objects
|
||||
*/
|
||||
private HashMap<String, HashMap<String, VariantContext>> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> compNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
private HashMap<String, HashMap<String, VariantContext>> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> compNames, Set<String> evalNames) {
|
||||
HashMap<String, HashMap<String, VariantContext>> vcs = new HashMap<String, HashMap<String, VariantContext>>();
|
||||
|
||||
Set<String> allSamplesList = new HashSet<String>();
|
||||
allSamplesList.add(ALL_SAMPLE_NAME);
|
||||
EnumSet<VariantContext.Type> allowableTypes = getAllowableVariationTypes(tracker, ref, compNames, evalNames);
|
||||
|
||||
EnumSet<VariantContext.Type> allowableTypes = getAllowableVariationTypes(tracker, ref, evalNames);
|
||||
|
||||
boolean perSampleIsEnabled = false;
|
||||
boolean byFilter = false;
|
||||
boolean perSampleIsEnabled = false;
|
||||
for (VariantStratifier vs : stratificationObjects) {
|
||||
if (vs.getClass().getSimpleName().equals("Sample")) {
|
||||
perSampleIsEnabled = true;
|
||||
} else if (vs.getClass().getSimpleName().equals("Filter")) {
|
||||
if (vs.getClass().getSimpleName().equals("Filter")) {
|
||||
byFilter = true;
|
||||
} else if (vs.getClass().getSimpleName().equals("Sample")) {
|
||||
perSampleIsEnabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
HashMap<String, HashMap<String, VariantContext>> evalBindings;
|
||||
if (perSampleIsEnabled) {
|
||||
evalBindings = bindVariantContexts(tracker, ref, evalNames, sampleNames, allowableTypes, byFilter);
|
||||
} else {
|
||||
evalBindings = bindVariantContexts(tracker, ref, evalNames, allSamplesList, allowableTypes, byFilter);
|
||||
}
|
||||
|
||||
HashMap<String, HashMap<String, VariantContext>> compBindings = bindVariantContexts(tracker, ref, compNames, allSamplesList, allowableTypes, byFilter);
|
||||
HashMap<String, HashMap<String, VariantContext>> evalBindings = bindVariantContexts(tracker, ref, evalNames, allowableTypes, byFilter, perSampleIsEnabled, true);
|
||||
HashMap<String, HashMap<String, VariantContext>> compBindings = bindVariantContexts(tracker, ref, compNames, allowableTypes, byFilter, false, false);
|
||||
|
||||
vcs.putAll(compBindings);
|
||||
vcs.putAll(evalBindings);
|
||||
|
|
@ -657,13 +656,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
}
|
||||
|
||||
// track sample vc
|
||||
HashMap<String, HashMap<String, VariantContext>> vcs = getVariantContexts(tracker, ref, compNames, evalNames, sampleNames);
|
||||
HashMap<String, HashMap<String, VariantContext>> vcs = getVariantContexts(tracker, ref, compNames, evalNames);
|
||||
|
||||
for ( String compName : compNames ) {
|
||||
VariantContext comp = vcs.containsKey(compName) && vcs.get(compName) != null && vcs.get(compName).containsKey(ALL_SAMPLE_NAME) ? vcs.get(compName).get(ALL_SAMPLE_NAME) : null;
|
||||
|
||||
for ( String evalName : evalNames ) {
|
||||
for ( String sampleName : sampleNames ) {
|
||||
for ( String sampleName : sampleNamesForStratification ) {
|
||||
VariantContext eval = vcs.containsKey(evalName) && vcs.get(evalName) != null ? vcs.get(evalName).get(sampleName) : null;
|
||||
|
||||
HashMap<VariantStratifier, ArrayList<String>> stateMap = new HashMap<VariantStratifier, ArrayList<String>>();
|
||||
|
|
|
|||
|
|
@ -76,10 +76,13 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean expectingIndels = false;
|
||||
//boolean expectingIndels = false;
|
||||
|
||||
boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
|
||||
boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ;
|
||||
//boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
|
||||
//boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ;
|
||||
|
||||
boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP() ;
|
||||
boolean evalIsGood = eval != null && eval.isSNP() ;
|
||||
|
||||
if (compIsGood) nCompSNPs++; // count the number of comp events
|
||||
if (evalIsGood) nEvalSNPs++; // count the number of eval events
|
||||
|
|
@ -87,8 +90,9 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
|||
if (compIsGood && evalIsGood) {
|
||||
nSNPsAtComp++;
|
||||
|
||||
if (!discordantP(eval, comp)) // count whether we're concordant or not with the comp value
|
||||
if (!discordantP(eval, comp)) { // count whether we're concordant or not with the comp value
|
||||
nConcordant++;
|
||||
}
|
||||
}
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ public class Sample extends VariantStratifier {
|
|||
@Override
|
||||
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
samples = new ArrayList<String>();
|
||||
samples.add(VariantEvalWalker.ALL_SAMPLE_NAME);
|
||||
samples.addAll(sampleNames);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
|
|||
|
||||
public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
|
||||
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
|
||||
//synchronized ( evaluation ) {
|
||||
//synchronized ( this ) {
|
||||
// we always call update0 in case the evaluation tracks things like number of bases covered
|
||||
//evaluation.update0(tracker, ref, context);
|
||||
|
||||
|
|
@ -69,9 +69,9 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
|
|||
|
||||
break;
|
||||
case 2:
|
||||
if (eval != null) {
|
||||
//if (eval != null) {
|
||||
evaluation.update2(eval, comp, tracker, ref, context);
|
||||
}
|
||||
//}
|
||||
|
||||
break;
|
||||
default:
|
||||
|
|
@ -82,8 +82,10 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
|
|||
}
|
||||
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
|
||||
evaluation.update0(tracker, ref, context);
|
||||
}
|
||||
//synchronized (this) {
|
||||
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
|
||||
evaluation.update0(tracker, ref, context);
|
||||
}
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
String extraArgs = "-L 1:1-10,000,000";
|
||||
for (String tests : testsEnumerations) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s",
|
||||
1, Arrays.asList("8ddc1c4a86cb3f4c22346497785b23e3"));
|
||||
1, Arrays.asList("0087b2a096aa99135b065aa9a0fff34c"));
|
||||
//executeTestParallel("testSelect1", spec);
|
||||
executeTest("testSelect1", spec);
|
||||
}
|
||||
|
|
@ -50,7 +50,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
for (String vcfFile : vcfFiles) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B:eval,VCF " + validationDataLocation + vcfFile + " -B:comp,VCF " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
|
||||
1,
|
||||
Arrays.asList("7a6754176b573d14b6be7c808a04929d"));
|
||||
Arrays.asList("bb16335f9510bcab2bd14a4299afd879"));
|
||||
//executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
|
||||
executeTest("testVEGenotypeConcordance" + vcfFile, spec);
|
||||
}
|
||||
|
|
@ -60,8 +60,8 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVESimple() {
|
||||
HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
expectations.put("-L 1:1-10,000,000", "ffd1abed44faf1590d9026e478b2f8ee");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "32c2411fbf58bae5750c8229d15b98eb");
|
||||
expectations.put("-L 1:1-10,000,000", "e46e8e7457b338c4cfec62ee7aa51ffe");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "a0554ca0baa097a1761da3f7e8487833");
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs = entry.getKey();
|
||||
|
|
@ -84,10 +84,10 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" -B:comp_hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
|
||||
|
||||
|
||||
String matchingMD5 = "6c2fa6573cc57ef8795e9cce2b654d0b";
|
||||
String matchingMD5 = "5050011ad00b859faf2be679830bec90";
|
||||
expectations.put("", matchingMD5);
|
||||
expectations.put(" -knownName comp_hapmap -knownName dbsnp", matchingMD5);
|
||||
expectations.put(" -knownName comp_hapmap", "6c2fa6573cc57ef8795e9cce2b654d0b");
|
||||
expectations.put(" -knownName comp_hapmap", "5050011ad00b859faf2be679830bec90");
|
||||
for (String tests : testsEnumerations) {
|
||||
for (Map.Entry<String, String> entry : expectations.entrySet()) {
|
||||
String extraArgs2 = entry.getKey();
|
||||
|
|
@ -133,7 +133,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCompVsEvalAC() {
|
||||
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -EV GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("929e4ec46fb6957c29803531322bb35e"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("113228ffa35e0f67b8e067860c04171f"));
|
||||
//executeTestParallel("testACDiscordanceAtAC1EvalAC2Comp",spec);
|
||||
executeTest("testCompVsEvalAC",spec);
|
||||
}
|
||||
|
|
@ -150,6 +150,14 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
executeTest("testTranches",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompOverlap() {
|
||||
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("81377be26bf8fa32339d01c173428f7d"));
|
||||
//executeTestParallel("testTranches",spec);
|
||||
executeTest("testCompOverlap",spec);
|
||||
}
|
||||
|
||||
// @Test
|
||||
// public void testVEValidatePass() {
|
||||
// String extraArgs = "-L 1:1-10,000,000";
|
||||
|
|
|
|||
Loading…
Reference in New Issue