Improvements to the validation report of VariantEval
-- If eval has genotypes and comp has genotypes, then subset the genotypes of comp down to the samples being evaluated when considering TP, FP, FN, TN status. This is important in the case where you want to use this to assess, for example, the quality of calls on NA12878 but you have a CEU trio comp VCF. The previous version was counting sites polymorphic in mom against the calls in NA12878. -- Added testdata VCF and integrationtests to ensure this behavior continues in the future -- TODO: actually run integration tests when I have an internet connection
This commit is contained in:
parent
559a4826be
commit
dede3a30e9
|
|
@ -11,6 +11,7 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
|
|
@ -102,9 +103,10 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
nDifferentAlleleSites++;
|
||||
else {
|
||||
SiteStatus evalStatus = calcSiteStatus(eval);
|
||||
if ( comp.hasGenotypes() && ! getWalker().getSampleNamesForEvaluation().isEmpty() && comp.hasGenotypes(getWalker().getSampleNamesForEvaluation()) )
|
||||
final Set<String> evalSamples = getWalker().getSampleNamesForEvaluation();
|
||||
if ( comp.hasGenotypes() && ! evalSamples.isEmpty() && comp.hasGenotypes(evalSamples) )
|
||||
// if we have genotypes in both eval and comp, subset comp down just the samples in eval
|
||||
comp = comp.subContextFromSamples(eval.getSampleNames(), false);
|
||||
comp = comp.subContextFromSamples(evalSamples, false);
|
||||
SiteStatus compStatus = calcSiteStatus(comp);
|
||||
counts[compStatus.ordinal()][evalStatus.ordinal()]++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,8 +27,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval;
|
|||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class VariantEvalIntegrationTest extends WalkerTest {
|
||||
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/";
|
||||
|
|
@ -617,25 +620,33 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
// Test validation report is doing the right thing with sites only and genotypes files
|
||||
// where the validation comp has more genotypes than eval
|
||||
//
|
||||
public void testValidationReport(final String comp, final String md5) {
|
||||
@Test(dataProvider = "testValidationReportData")
|
||||
public void testValidationReport(final String name, final String eval, final String comp, final String md5) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-eval " + privateTestDir + "/validationReportEval.vcf ",
|
||||
"-L 20:10,000,000-10,000,010 -noST -noEV -EV ValidationReport -o %s"
|
||||
"-eval " + eval,
|
||||
"-comp " + comp,
|
||||
"-L 20:10,000,000-10,000,010 -noST -noEV -EV ValidationReport -o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("testValidationReport with comp " + comp, spec);
|
||||
}
|
||||
|
||||
@Test public void testValidationReportSites() {
|
||||
testValidationReport(privateTestDir + "/validationReportComp.noGenotypes.vcf", "f0dbb848a94b451e42765b0cb9d09ee2");
|
||||
}
|
||||
@Test public void testValidationReportSubsetGenotypes() {
|
||||
testValidationReport(privateTestDir + "/validationReportComp.vcf", "73790b530595fcbd467a88475ea9717f");
|
||||
executeTest("testValidationReport with " + name, spec);
|
||||
}
|
||||
|
||||
@DataProvider(name = "testValidationReportData")
|
||||
public Object[][] testValidationReportData() {
|
||||
final String compGenotypes = privateTestDir + "/validationReportComp.vcf";
|
||||
final String compSites = privateTestDir + "/validationReportComp.noGenotypes.vcf";
|
||||
final String evalGenotypes = privateTestDir + "/validationReportEval.vcf";
|
||||
final String evalSites = privateTestDir + "/validationReportEval.noGenotypes.vcf";
|
||||
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
tests.add(new Object[]{"sites/sites", evalSites, compSites, ""});
|
||||
tests.add(new Object[]{"sites/genotypes", evalSites, compGenotypes, ""});
|
||||
tests.add(new Object[]{"genotypes/sites", evalGenotypes, compSites, ""});
|
||||
tests.add(new Object[]{"genotypes/genotypes", evalGenotypes, compGenotypes, ""});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue