From 918b7467984ab2842e8024d6c047bd9f812fc50a Mon Sep 17 00:00:00 2001 From: depristo Date: Wed, 7 Apr 2010 16:38:28 +0000 Subject: [PATCH] More detailed validation output. Fixes for genotyping overflow -- these are temporary and need to be properly resolved git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3129 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/varianteval2/ValidationRate.java | 22 +++++++++++++------ .../varianteval2/VariantEval2Walker.java | 3 ++- .../VariantEval2IntegrationTest.java | 10 ++++----- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java index 32368a992..bf44c7c9f 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java @@ -29,7 +29,7 @@ public class ValidationRate extends VariantEvaluator { double percent_poly_in_comp; @DataPoint(name="# mono calls at mono sites",description = "Number of mono calls at mono sites") long n_mono_calls_at_mono_sites; - @DataPoint(name="# poly calls at poly sites",description = "Number of poly calls at mono sites") + @DataPoint(name="# poly calls at mono sites",description = "Number of poly calls at mono sites") long n_poly_calls_at_mono_sites; @DataPoint(name="# nocalls at mono sites",description = "Number of no calls at mono sites") long n_nocalls_at_mono_sites; @@ -47,6 +47,8 @@ public class ValidationRate extends VariantEvaluator { double PPV; @DataPoint(description = "The sensitivity") double sensitivity; + @DataPoint(description = "The False Discovery Rate (FDR)") + double falseDiscoveryRate; // todo -- subset validation data by list of samples, if provided class SiteStats { @@ -77,35 +79,41 @@ public class ValidationRate extends VariantEvaluator { @Override public void finalizeEvaluation() { - long TP = evalOverlapAtPoly.nPoly + evalOverlapAtMono.nMono; - long FP = evalOverlapAtMono.nPoly + evalOverlapAtPoly.nMono; + long TP = evalOverlapAtPoly.nPoly; // + evalOverlapAtMono.nMono + evalOverlapAtMono.nNoCall; + long FP = evalOverlapAtMono.nPoly; // + evalOverlapAtPoly.nMono; long FN = evalOverlapAtPoly.nMono + evalOverlapAtPoly.nNoCall; // fill in the output fields n_mono_in_comp = validationStats.nMono; n_poly_in_comp = validationStats.nPoly; percent_poly_in_comp = validationStats.polyPercent(); + n_mono_calls_at_mono_sites = evalOverlapAtMono.nMono; n_poly_calls_at_mono_sites = evalOverlapAtMono.nPoly; n_nocalls_at_mono_sites = evalOverlapAtMono.nNoCall; percent_mono_sites_called_poly = evalOverlapAtMono.polyPercent(); + n_mono_calls_at_poly_sites = evalOverlapAtPoly.nMono; - n_poly_calls_at_poly_sites = evalOverlapAtPoly.nMono; + n_poly_calls_at_poly_sites = evalOverlapAtPoly.nPoly; n_nocalls_at_poly_sites = evalOverlapAtPoly.nNoCall; percent_poly_sites_called_poly = evalOverlapAtPoly.polyPercent(); PPV = 100 * rate(TP, TP + FP); sensitivity = 100 * rate(TP, TP + FN); - + falseDiscoveryRate = 100 * rate(FP, FP + TP); } public boolean enabled() { return true; } - public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public String update2(VariantContext eval, VariantContext rawValidationData, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { String interesting = null; - if (validation != null && validation.hasGenotypes() && validation.isNotFiltered()) { + if (rawValidationData!= null && rawValidationData.hasGenotypes() && rawValidationData.isNotFiltered()) { + VariantContext validation = rawValidationData; + //if ( eval != null ) // todo -- remove me when I can get the header from the VariantEval engine + // validation = rawValidationData.subContextFromGenotypes(eval.getGenotypes().values()); + SiteStats overlap; if (validation.isPolymorphic()) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java index 042b808a6..4b675f1f8 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java @@ -410,7 +410,8 @@ public class VariantEval2Walker extends RodWalker { } private void writeInterestingSite(List interestingReasons, VariantContext vc, char ref) { - if ( writer != null && interestingReasons.size() > 0 ) { + if ( vc != null && writer != null && interestingReasons.size() > 0 ) { + // todo -- the vc == null check is because you can be interesting because you are a FN, and so VC == null MutableVariantContext mvc = new MutableVariantContext(vc); for ( String why : interestingReasons ) { diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java index a62160a58..93a3949b2 100755 --- a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java +++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java @@ -18,8 +18,8 @@ public class VariantEval2IntegrationTest extends WalkerTest { @Test public void testVE2Simple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "c304b257004f9d11cbcd89725eb89319"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "59cce874ed647fce22789b82779c4f13"); + expectations.put("-L 1:1-10,000,000", "d5a8c6550236e971ffe29f7f254ad076"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "529b56760da952741e9d843453a8eb73"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -39,10 +39,10 @@ public class VariantEval2IntegrationTest extends WalkerTest { " -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" + " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String eqMD5s = "cf20ea2b9c6b173311b735f6d4ae72e6"; // next two examples should be the same! + String eqMD5s = "13d2de47f9fe56a4ddfeca10d9c1d5a9"; // next two examples should be the same! expectations.put("", eqMD5s); expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s); - expectations.put(" -known comp_hapmap", "f43630aa98764aecb867b175014879e1"); + expectations.put(" -known comp_hapmap", "edd1f32a7f2f0390fdd0cc4c5ff14bf1"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs2 = entry.getKey(); @@ -60,7 +60,7 @@ public class VariantEval2IntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30"; WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s", 2, - Arrays.asList("abc2a36c0375386f731461da5e7e2104", "a3ce1d70d8ae3874807e9d61994d42af")); + Arrays.asList("31f8cf5010d4b73b1520acef88207214", "a3ce1d70d8ae3874807e9d61994d42af")); executeTest("testVE2WriteVCF", spec); } }