From 7a91dbd490d6d3b84b8f2d6b391e54eb69f69a7b Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 16 Jun 2010 15:53:06 +0000 Subject: [PATCH] Renamed some of the column names in Ti/Tv and Concordance modules so that they are clearer. Removed ValidationRate module (it was busted). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3564 348d0f76-0448-11de-a6fe-93d51630548a --- .../varianteval/GenotypeConcordance.java | 24 +-- .../varianteval/TiTvVariantEvaluator.java | 16 +- .../walkers/varianteval/ValidationRate.java | 138 ------------------ .../VariantEvalIntegrationTest.java | 16 +- 4 files changed, 31 insertions(+), 163 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ValidationRate.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java index bd6dd5fe9..b5b5d5505 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java @@ -230,7 +230,7 @@ public class GenotypeConcordance extends VariantEvaluator { private boolean warnedAboutValidationData = false; public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - final String interesting = null; + String interesting = null; // sanity check that we at least have either eval or validation data if (eval == null && !isValidVC(validation)) { @@ -266,12 +266,14 @@ public class GenotypeConcordance extends VariantEvaluator { } } - determineStats(eval, validation); + interesting = determineStats(eval, validation); return interesting; // we don't capture any interesting sites } - private void determineStats(final VariantContext eval, final VariantContext validation) { + private String determineStats(final VariantContext eval, final VariantContext validation) { + + String interesting = null; final boolean validationIsValidVC = isValidVC(validation); @@ -285,6 +287,8 @@ public class GenotypeConcordance extends VariantEvaluator { truth = Genotype.Type.NO_CALL; } else { truth = validation.getGenotype(sample).getType(); + // TODO -- capture "interesting" sites here, for example: + // interesting = "ConcordanceStatus=FP"; } sampleStats.incrValue(sample, truth, called); @@ -327,6 +331,8 @@ public class GenotypeConcordance extends VariantEvaluator { } } + + return interesting; } private static boolean isValidVC(final VariantContext vc) { @@ -429,9 +435,9 @@ class SampleSummaryStats implements TableType { "percent_comp_het_called_var", "percent_comp_hom_called_hom", "percent_comp_hom_called_var", - "percent_variant_sensitivity", - "percent_genotype_concordance", - "percent_genotype_error_rate"}; + "percent_non-reference_sensitivity", + "percent_overall_genotype_concordance", + "percent_non-reference_discrepancy_rate"}; // sample to concordance stats object private final HashMap concordanceSummary = new HashMap(); @@ -547,18 +553,18 @@ class SampleSummaryStats implements TableType { updateSummaries(3, summary, numer, denom); // Summary 4: % non-ref called as non-ref - // MAD: this is known as the variant sensitivity (# non-ref according to comp found in eval / # non-ref in comp) + // MAD: this is known as the non-reference sensitivity (# non-ref according to comp found in eval / # non-ref in comp) numer = sumStatsAllPairs(stats, allVariantGenotypes, allVariantGenotypes); denom = sumStatsAllPairs(stats, allVariantGenotypes, allGenotypes); updateSummaries(4, summary, numer, denom); - // Summary 5: genotype concordance of sites called in eval track + // Summary 5: overall genotype concordance of sites called in eval track // MAD: this is the tradition genotype concordance numer = sumStatsDiag(stats, allCalledGenotypes); denom = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes); updateSummaries(5, summary, numer, denom); - // Summary 6: genotype concordance of sites called non-ref in eval track + // Summary 6: overall genotype concordance of sites called non-ref in eval track long homrefConcords = stats[Genotype.Type.HOM_REF.ordinal()][Genotype.Type.HOM_REF.ordinal()]; long diag = sumStatsDiag(stats, allVariantGenotypes); long allNoHomRef = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes) - homrefConcords; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/TiTvVariantEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/TiTvVariantEvaluator.java index 570070c1d..992e7770d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/TiTvVariantEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/TiTvVariantEvaluator.java @@ -16,11 +16,11 @@ public class TiTvVariantEvaluator extends VariantEvaluator { long nTv = 0; @DataPoint(name = "ti/tv ratio", description = "the transition to transversion ratio") double tiTvRatio = 0.0; - @DataPoint(name = "ti_count_std", description = "number of standard transition sites") - long nTiInStd = 0; - @DataPoint(name = "tv_count_std", description = "number of standard transversion sites") - long nTvInStd = 0; - @DataPoint(name = "ti/tv ratio standard", description = "the transition to transversion ratio") + @DataPoint(name = "ti_count_comp", description = "number of comp transition sites") + long nTiInComp = 0; + @DataPoint(name = "tv_count_comp", description = "number of comp transversion sites") + long nTvInComp = 0; + @DataPoint(name = "ti/tv ratio standard", description = "the transition to transversion ratio for comp sites") double TiTvRatioStandard = 0.0; public TiTvVariantEvaluator(VariantEvalWalker parent) { @@ -42,10 +42,10 @@ public class TiTvVariantEvaluator extends VariantEvaluator { public void updateTiTv(VariantContext vc, boolean updateStandard) { if (vc != null && vc.isSNP() && vc.isBiallelic()) { if (vc.isTransition()) { - if (updateStandard) nTiInStd++; + if (updateStandard) nTiInComp++; else nTi++; } else { - if (updateStandard) nTvInStd++; + if (updateStandard) nTvInComp++; else nTv++; } } @@ -62,6 +62,6 @@ public class TiTvVariantEvaluator extends VariantEvaluator { public void finalizeEvaluation() { // the ti/tv ratio needs to be set (it's not calculated per-variant). this.tiTvRatio = rate(nTi,nTv); - this.TiTvRatioStandard = rate(nTiInStd,nTvInStd); + this.TiTvRatioStandard = rate(nTiInComp, nTvInComp); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ValidationRate.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ValidationRate.java deleted file mode 100755 index f07cbf298..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ValidationRate.java +++ /dev/null @@ -1,138 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; -import org.broadinstitute.sting.playground.utils.report.tags.Analysis; -import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - *

- * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - */ -@Analysis(name = "Validation Rate", description = "Validation Rate") -public class ValidationRate extends VariantEvaluator { - @DataPoint(name="# mono in comp",description = "Number of mono calls in the comparison ROD") - long n_mono_in_comp; - @DataPoint(name="# poly in comp",description = "Number of poly calls in the comparison ROD") - long n_poly_in_comp; - @DataPoint(name="% poly in comp",description = "Percent of poly calls in the comparison ROD") - double percent_poly_in_comp; - @DataPoint(name="# mono calls at mono sites",description = "Number of mono calls at mono sites") - long n_mono_calls_at_mono_sites; - @DataPoint(name="# poly calls at mono sites",description = "Number of poly calls at mono sites") - long n_poly_calls_at_mono_sites; - @DataPoint(name="# nocalls at mono sites",description = "Number of no calls at mono sites") - long n_nocalls_at_mono_sites; - @DataPoint(name="# mono sites called poly",description = "Percentage of mono sites called poly") - double percent_mono_sites_called_poly; - @DataPoint(name="# mono calls at poly sites",description = "Number of mono calls at poly sites") - long n_mono_calls_at_poly_sites; - @DataPoint(name="# poly calls at poly sites",description = "Number of poly calls at poly sites") - long n_poly_calls_at_poly_sites; - @DataPoint(name="# nocalls at poly sites",description = "Number of no calls at poly sites") - long n_nocalls_at_poly_sites; - @DataPoint(name="% poly sites called poly",description = "Number of poly sites called poly") - double percent_poly_sites_called_poly; - @DataPoint(description = "The PPV") - double PPV; - @DataPoint(description = "The sensitivity") - double sensitivity; - @DataPoint(description = "The False Discovery Rate (FDR)") - double falseDiscoveryRate; - - // todo -- subset validation data by list of samples, if provided - class SiteStats { - long nPoly = 0; - long nMono = 0; - long nNoCall = 0; - - double polyPercent() { - return 100 * rate(nPoly, nPoly + nMono + nNoCall); - } - } - - private SiteStats validationStats = new SiteStats(); - private SiteStats evalOverlapAtMono = new SiteStats(); - private SiteStats evalOverlapAtPoly = new SiteStats(); - - public ValidationRate(VariantEvalWalker parent) { - super(parent); - } - - public String getName() { - return "validationRate"; - } - - public int getComparisonOrder() { - return 2; // we need to see each eval track and each comp track - } - - @Override - public void finalizeEvaluation() { - long TP = evalOverlapAtPoly.nPoly; // + evalOverlapAtMono.nMono + evalOverlapAtMono.nNoCall; - long FP = evalOverlapAtMono.nPoly; // + evalOverlapAtPoly.nMono; - long FN = evalOverlapAtPoly.nMono + evalOverlapAtPoly.nNoCall; - - // fill in the output fields - n_mono_in_comp = validationStats.nMono; - n_poly_in_comp = validationStats.nPoly; - percent_poly_in_comp = validationStats.polyPercent(); - - n_mono_calls_at_mono_sites = evalOverlapAtMono.nMono; - n_poly_calls_at_mono_sites = evalOverlapAtMono.nPoly; - n_nocalls_at_mono_sites = evalOverlapAtMono.nNoCall; - percent_mono_sites_called_poly = evalOverlapAtMono.polyPercent(); - - n_mono_calls_at_poly_sites = evalOverlapAtPoly.nMono; - n_poly_calls_at_poly_sites = evalOverlapAtPoly.nPoly; - n_nocalls_at_poly_sites = evalOverlapAtPoly.nNoCall; - percent_poly_sites_called_poly = evalOverlapAtPoly.polyPercent(); - PPV = 100 * rate(TP, TP + FP); - sensitivity = 100 * rate(TP, TP + FN); - falseDiscoveryRate = 100 * rate(FP, FP + TP); - } - - public boolean enabled() { - return true; - } - - public String update2(VariantContext eval, VariantContext rawValidationData, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - String interesting = null; - - if (rawValidationData!= null && rawValidationData.hasGenotypes() && rawValidationData.isNotFiltered()) { - VariantContext validation = rawValidationData; - - SiteStats overlap; - - if (validation.isPolymorphic()) { - validationStats.nPoly++; - overlap = evalOverlapAtPoly; - if (eval == null || eval.isMonomorphic()) - interesting = "ValidationStatus=FN"; - } else { - validationStats.nMono++; - overlap = evalOverlapAtMono; - - if (eval != null && eval.isPolymorphic()) - interesting = "ValidationStatus=FP"; - } - - if (eval == null) - overlap.nNoCall++; - //else if (eval.isPolymorphic()) // requires genotypes - else if (eval.isSNP()) - overlap.nPoly++; - else - overlap.nMono++; - } - - return interesting; // we don't capture any interesting sites - } -} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 28fa4b262..3cf9a4843 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -29,7 +29,7 @@ public class String extraArgs = "-L 1:1-10,000,000"; for (String tests : testsEnumerations) { WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s", - 1, Arrays.asList("5a330d359b5c7ea0dfa6698b4830db82")); + 1, Arrays.asList("97d2471ed6ee79d70ce5bd9cc0be2239")); executeTest("testSelect1", spec); } } @@ -38,7 +38,7 @@ public class public void testSelect2() { String extraArgs = "-L 1:1-10,000,000"; WalkerTestSpec spec = new WalkerTestSpec( withSelect(withSelect(root, "DP < 50", "DP50"), "set==\"Intersection\"", "intersection") + " " + extraArgs + " -o %s", - 1, Arrays.asList("e39d6790e4ee8709dfa2eab8598b168e")); + 1, Arrays.asList("e3366d73ee7cdf630c29809ce230e32e")); executeTest("testSelect2", spec); } @@ -48,7 +48,7 @@ public class for (String vcfFile : vcfFiles) { WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B eval,VCF," + validationDataLocation + vcfFile + " -B comp,VCF," + validationDataLocation + "GenotypeConcordanceComp.vcf -E GenotypeConcordance -reportType CSV -o %s", 1, - Arrays.asList("608b32189818df7271c546dd5f257033")); + Arrays.asList("51574b4ab0b381c5a01268f91e78b25c")); executeTest("testVEGenotypeConcordance" + vcfFile, spec); } @@ -57,8 +57,8 @@ public class @Test public void testVESimple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "08b93c2561742dc480534ae16d2d0508"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "65bf329cf1499544d1eb28de786a4561"); + expectations.put("-L 1:1-10,000,000", "8f76d8c0e3a8a5836bb5bf423e04c268"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "6b128bacbd0402471bd6d4e3f9283c47"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -80,10 +80,10 @@ public class " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String matchingMD5 = "fc1d4d2aca0667605a6b4d486fcbedf2"; + String matchingMD5 = "aca5a64a0e4850906db2bd820253b784"; expectations.put("", matchingMD5); expectations.put(" -known comp_hapmap -known dbsnp", matchingMD5); - expectations.put(" -known comp_hapmap", "fd9cd9970f7e509ca476502869063929"); + expectations.put(" -known comp_hapmap", "442213609c2866f7a90cbc4b3486441a"); for (String tests : testsEnumerations) { for (Map.Entry entry : expectations.entrySet()) { String extraArgs2 = entry.getKey(); @@ -103,7 +103,7 @@ public class for (String tests : testsEnumerations) { WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s", 2, - Arrays.asList("521837758da151b84fca57fd1bb7dad1", "b4a42c90318adc88361691ece50426f2")); + Arrays.asList("dc53aaf7db9f05e3b0a38bf5efe3fbbe", "cd8616aca14eb77bd90732fbfce038d5")); executeTest("testVEWriteVCF", spec); } }