Improvements to the validation report of VariantEval
-- If eval has genotypes and comp has genotypes, then subset the genotypes of comp down to the samples being evaluated when considering TP, FP, FN, TN status. This is important in the case where you want to use this to assess, for example, the quality of calls on NA12878 but you have a CEU trio comp VCF. The previous version was counting sites polymorphic in mom against the calls in NA12878. -- Added testdata VCF and integrationtests to ensure this behavior continues in the future
This commit is contained in:
parent
dc292c0317
commit
559a4826be
|
|
@ -102,6 +102,9 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
nDifferentAlleleSites++;
|
||||
else {
|
||||
SiteStatus evalStatus = calcSiteStatus(eval);
|
||||
if ( comp.hasGenotypes() && ! getWalker().getSampleNamesForEvaluation().isEmpty() && comp.hasGenotypes(getWalker().getSampleNamesForEvaluation()) )
|
||||
// if we have genotypes in both eval and comp, subset comp down just the samples in eval
|
||||
comp = comp.subContextFromSamples(eval.getSampleNames(), false);
|
||||
SiteStatus compStatus = calcSiteStatus(comp);
|
||||
counts[compStatus.ordinal()][evalStatus.ordinal()]++;
|
||||
}
|
||||
|
|
@ -111,7 +114,7 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
//
|
||||
// helper routines
|
||||
//
|
||||
public SiteStatus calcSiteStatus(VariantContext vc) {
|
||||
private SiteStatus calcSiteStatus(VariantContext vc) {
|
||||
if ( vc == null ) return SiteStatus.NO_CALL;
|
||||
if ( vc.isFiltered() ) return SiteStatus.FILTERED;
|
||||
if ( vc.isMonomorphicInSamples() ) return SiteStatus.MONO;
|
||||
|
|
@ -121,24 +124,18 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
int ac = 0;
|
||||
if ( vc.getNAlleles() > 2 ) {
|
||||
return SiteStatus.POLY;
|
||||
//// System.out.printf("multiple alleles %s = %s%n", vc.getAlleles(), vc.getExtendedAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
||||
// // todo -- omg this is painful. We need a better approach to dealing with multi-valued attributes
|
||||
// for ( String v : (List<String>)vc.getExtendedAttribute(VCFConstants.ALLELE_COUNT_KEY) )
|
||||
// ac += Integer.valueOf(v);
|
||||
//// System.out.printf(" ac = %d%n", ac);
|
||||
}
|
||||
else
|
||||
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
|
||||
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
|
||||
} else {
|
||||
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
|
||||
//return SiteStatus.NO_CALL; // we can't figure out what to do
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
public boolean haveDifferentAltAlleles(VariantContext eval, VariantContext comp) {
|
||||
private boolean haveDifferentAltAlleles(VariantContext eval, VariantContext comp) {
|
||||
Collection<Allele> evalAlts = eval.getAlternateAlleles();
|
||||
Collection<Allele> compAlts = comp.getAlternateAlleles();
|
||||
if ( evalAlts.size() != compAlts.size() ) {
|
||||
|
|
|
|||
|
|
@ -613,4 +613,29 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
@Test public void testWithAC0() { testIncludingAC0(true, "c786128cfe4d3e28cdbc15c5c838ad20"); }
|
||||
@Test public void testWithoutAC0() { testIncludingAC0(false, "7bc505c07d9aee49571ad4b3fc9f7feb"); }
|
||||
|
||||
//
|
||||
// Test validation report is doing the right thing with sites only and genotypes files
|
||||
// where the validation comp has more genotypes than eval
|
||||
//
|
||||
public void testValidationReport(final String comp, final String md5) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-eval " + privateTestDir + "/validationReportEval.vcf ",
|
||||
"-L 20:10,000,000-10,000,010 -noST -noEV -EV ValidationReport -o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("testValidationReport with comp " + comp, spec);
|
||||
}
|
||||
|
||||
@Test public void testValidationReportSites() {
|
||||
testValidationReport(privateTestDir + "/validationReportComp.noGenotypes.vcf", "f0dbb848a94b451e42765b0cb9d09ee2");
|
||||
}
|
||||
@Test public void testValidationReportSubsetGenotypes() {
|
||||
testValidationReport(privateTestDir + "/validationReportComp.vcf", "73790b530595fcbd467a88475ea9717f");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue