Bugfix for VariantEval

-- We weren't properly handling the case where a site had both a SNP and indel in both eval and comp.  These would naturally pair off as SNP x SNP and INDEL x INDEL in eval, but we'd still invoke update2 with (null, SNP) and (null, INDEL) resulting most conspicously as incorrect false negatives in the validation report.
-- Updating misc. integrationtests, as the counting of comps (in particular for dbSNP) was inflated because of this effect.
This commit is contained in:
Mark DePristo 2012-03-06 16:54:59 -05:00
parent 5f35f5d338
commit 569be953b9
3 changed files with 50 additions and 31 deletions

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.varianteval;
import com.google.java.contract.Requires;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.util.IntervalTree;
import net.sf.samtools.SAMSequenceRecord;
@ -19,11 +20,8 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.IntervalStratification;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.JexlExpression;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils;
@ -32,7 +30,6 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
@ -389,9 +386,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
nec.apply(tracker, ref, context, comp, eval);
}
// eval=null against all comps of different type
// eval=null against all comps of different type that aren't bound to another eval
for ( VariantContext otherComp : compSet ) {
if ( otherComp != comp ) {
if ( otherComp != comp && ! compHasMatchingEval(otherComp, evalSetBySample) ) {
synchronized (nec) {
nec.apply(tracker, ref, context, otherComp, null);
}
@ -409,6 +406,35 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
return null;
}
@Requires({"comp != null", "evals != null"})
private boolean compHasMatchingEval(final VariantContext comp, final Collection<VariantContext> evals) {
// find all of the matching comps
for ( final VariantContext eval : evals ) {
if ( eval != null && doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) != EvalCompMatchType.NO_MATCH )
return true;
}
// nothing matched
return false;
}
private enum EvalCompMatchType { NO_MATCH, STRICT, LENIENT }
@Requires({"eval != null", "comp != null"})
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
// find all of the matching comps
if ( comp.getType() != eval.getType() )
return EvalCompMatchType.NO_MATCH;
// find the comp which matches both the reference allele and alternate allele from eval
final Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
final Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
if ((altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())))
return EvalCompMatchType.STRICT;
else
return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT;
}
private VariantContext findMatchingComp(final VariantContext eval, final Collection<VariantContext> comps) {
// if no comps, return null
if ( comps == null || comps.isEmpty() )
@ -419,26 +445,21 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
return comps.iterator().next();
// find all of the matching comps
List<VariantContext> matchingComps = new ArrayList<VariantContext>(comps.size());
for ( VariantContext comp : comps ) {
if ( comp.getType() == eval.getType() )
matchingComps.add(comp);
VariantContext lenientMatch = null;
for ( final VariantContext comp : comps ) {
switch ( doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) ) {
case STRICT:
return comp;
case LENIENT:
if ( lenientMatch == null ) lenientMatch = comp;
break;
case NO_MATCH:
;
}
}
// if no matching comp, return null
if ( matchingComps.size() == 0 )
return null;
// find the comp which matches both the reference allele and alternate allele from eval
Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
for ( VariantContext comp : matchingComps ) {
Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())) )
return comp;
}
// if none match, just return the first one unless we require a strict match
return (requireStrictAlleleMatch ? null : matchingComps.get(0));
// nothing matched, just return lenientMatch, which might be null
return lenientMatch;
}
public Integer treeReduce(Integer lhs, Integer rhs) { return null; }

View File

@ -417,8 +417,6 @@ public class VariantEvalUtils {
}
} else {
stateKeys.add(stateKey);
return stateKeys;
}
return stateKeys;

View File

@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("f909fd8374f663e983b9b3fda4cf5cf1")
Arrays.asList("c8d8bffa5c572df9dec7364f71a1b943")
);
executeTest("testFunctionClassWithSnpeff", spec);
}
@ -277,7 +277,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("4f60acc8a4b21c4b4ccb51ad9071449c"));
1, Arrays.asList("c49e239292704447a36e01ee9a71e729"));
executeTestParallel("testSelect1", spec);
}
@ -335,7 +335,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --dbsnp " + b37dbSNP132 +
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("190e1a171132832bf92fbca56a9c40bb"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e42cda858649a35eaa9d14ea2d70a956"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
}
@ -347,7 +347,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("08586d443fdcf3b7f63b8f9e3a943c62"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9561cb4c7aa36dcf30ba253385299859"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
}
@ -463,7 +463,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef")
Arrays.asList("397b0e77459b9b69d2e0dd1dac320c3c")
);
executeTest("testModernVCFWithLargeIndels", spec);
}