Improvements to the validation report of VariantEval

-- If eval has genotypes and comp has genotypes, then subset the genotypes of comp down to the samples being evaluated when considering TP, FP, FN, TN status.  This is important in the case where you want to use this to assess, for example, the quality of calls on NA12878 but you have a CEU trio comp VCF.  The previous version was counting sites polymorphic in mom against the calls in NA12878.
-- Added testdata VCF and integrationtests to ensure this behavior continues in the future
-- TODO: actually run integration tests when I have an internet connection
This commit is contained in:
Mark DePristo 2012-07-10 18:00:16 -07:00
parent 559a4826be
commit dede3a30e9
2 changed files with 26 additions and 13 deletions

View File

@ -11,6 +11,7 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Collection; import java.util.Collection;
import java.util.Set;
/** /**
* The Broad Institute * The Broad Institute
@ -102,9 +103,10 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
nDifferentAlleleSites++; nDifferentAlleleSites++;
else { else {
SiteStatus evalStatus = calcSiteStatus(eval); SiteStatus evalStatus = calcSiteStatus(eval);
if ( comp.hasGenotypes() && ! getWalker().getSampleNamesForEvaluation().isEmpty() && comp.hasGenotypes(getWalker().getSampleNamesForEvaluation()) ) final Set<String> evalSamples = getWalker().getSampleNamesForEvaluation();
if ( comp.hasGenotypes() && ! evalSamples.isEmpty() && comp.hasGenotypes(evalSamples) )
// if we have genotypes in both eval and comp, subset comp down just the samples in eval // if we have genotypes in both eval and comp, subset comp down just the samples in eval
comp = comp.subContextFromSamples(eval.getSampleNames(), false); comp = comp.subContextFromSamples(evalSamples, false);
SiteStatus compStatus = calcSiteStatus(comp); SiteStatus compStatus = calcSiteStatus(comp);
counts[compStatus.ordinal()][evalStatus.ordinal()]++; counts[compStatus.ordinal()][evalStatus.ordinal()]++;
} }

View File

@ -27,8 +27,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval;
import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import org.testng.annotations.DataProvider;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
public class VariantEvalIntegrationTest extends WalkerTest { public class VariantEvalIntegrationTest extends WalkerTest {
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/"; private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/";
@ -617,25 +620,33 @@ public class VariantEvalIntegrationTest extends WalkerTest {
// Test validation report is doing the right thing with sites only and genotypes files // Test validation report is doing the right thing with sites only and genotypes files
// where the validation comp has more genotypes than eval // where the validation comp has more genotypes than eval
// //
public void testValidationReport(final String comp, final String md5) { @Test(dataProvider = "testValidationReportData")
public void testValidationReport(final String name, final String eval, final String comp, final String md5) {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine( buildCommandLine(
"-T VariantEval", "-T VariantEval",
"-R " + b37KGReference, "-R " + b37KGReference,
"-eval " + privateTestDir + "/validationReportEval.vcf ", "-eval " + eval,
"-comp " + comp,
"-L 20:10,000,000-10,000,010 -noST -noEV -EV ValidationReport -o %s" "-L 20:10,000,000-10,000,010 -noST -noEV -EV ValidationReport -o %s"
), ),
1, 1,
Arrays.asList(md5)); Arrays.asList(md5));
executeTest("testValidationReport with comp " + comp, spec); executeTest("testValidationReport with " + name, spec);
}
@Test public void testValidationReportSites() {
testValidationReport(privateTestDir + "/validationReportComp.noGenotypes.vcf", "f0dbb848a94b451e42765b0cb9d09ee2");
}
@Test public void testValidationReportSubsetGenotypes() {
testValidationReport(privateTestDir + "/validationReportComp.vcf", "73790b530595fcbd467a88475ea9717f");
} }
@DataProvider(name = "testValidationReportData")
public Object[][] testValidationReportData() {
final String compGenotypes = privateTestDir + "/validationReportComp.vcf";
final String compSites = privateTestDir + "/validationReportComp.noGenotypes.vcf";
final String evalGenotypes = privateTestDir + "/validationReportEval.vcf";
final String evalSites = privateTestDir + "/validationReportEval.noGenotypes.vcf";
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{"sites/sites", evalSites, compSites, ""});
tests.add(new Object[]{"sites/genotypes", evalSites, compGenotypes, ""});
tests.add(new Object[]{"genotypes/sites", evalGenotypes, compSites, ""});
tests.add(new Object[]{"genotypes/genotypes", evalGenotypes, compGenotypes, ""});
return tests.toArray(new Object[][]{});
}
} }