From 3b5673d967a261f4bf7bf4e3b4949f7b0116a057 Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 21 Apr 2010 02:46:42 +0000 Subject: [PATCH] 1. Removed -all; by default all modules are used; use -none for no modules. 2. Don't make dbsnp track be a comp by default (to cut back on output). Please let me know if someone wants this back for some reason. 3. Cleaned up dbsnp module output to print the right numbers. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3220 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/varianteval/DbSNPPercentage.java | 44 +++++++++---------- .../varianteval/VariantEvalWalker.java | 15 +++++-- .../VariantEvalIntegrationTest.java | 15 +++---- 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/DbSNPPercentage.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/DbSNPPercentage.java index 8b28f2439..6edf6168c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/DbSNPPercentage.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/DbSNPPercentage.java @@ -17,22 +17,22 @@ import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; * This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ -@Analysis(name = "DbSNP Overlap", description = "the overlap between DbSNP sites and other SNP tracks") +@Analysis(name = "Comp Overlap", description = "the overlap between eval and comp sites") public class DbSNPPercentage extends VariantEvaluator { - @DataPoint(name = "DbSNP count", description = "number of DPSNP sites") - long nDBSNPs = 0; - - @DataPoint(name = "total count", description = "number of total snp sites") + @DataPoint(name = "eval sites", description = "number of eval SNP sites") long nEvalSNPs = 0; - @DataPoint(name = "novel snps", description = "number of total snp sites") + @DataPoint(name = "comp sites", description = "number of comp SNP sites") + long nCompSNPs = 0; + + @DataPoint(name = "evals not at comp", description = "number of eval sites outside of comp sites") long novelSites = 0; - @DataPoint(name = "snps at comp", description = "number of SNP sites at comp sites") + @DataPoint(name = "evals at comp", description = "number of eval sites at comp sites") long nSNPsAtComp = 0; - @DataPoint(name = "% snps at comp", description = "percentage of SNP sites at comp sites") + @DataPoint(name = "% evals at comp", description = "percentage of eval sites at comp sites") double compRate = 0.0; @DataPoint(name = "concordant", description = "number of concordant sites") @@ -46,19 +46,19 @@ public class DbSNPPercentage extends VariantEvaluator { } public String getName() { - return "dbOverlap"; + return "compOverlap"; } public int getComparisonOrder() { return 2; // we need to see each eval track and each comp track } - public long nNovelSites() { return Math.abs(nEvalSNPs - nSNPsAtComp); } - public double dbSNPRate() { return rate(nSNPsAtComp, nEvalSNPs); } + public long nNovelSites() { return nEvalSNPs - nSNPsAtComp; } + public double compRate() { return rate(nSNPsAtComp, nEvalSNPs); } public double concordanceRate() { return rate(nConcordant, nSNPsAtComp); } public void finalizeEvaluation() { - compRate = 100 * dbSNPRate(); + compRate = 100 * compRate(); concordantRate = 100 * concordanceRate(); novelSites = nNovelSites(); } @@ -68,32 +68,32 @@ public class DbSNPPercentage extends VariantEvaluator { } /** - * Returns true if every allele in eval is also in dbsnp + * Returns true if every allele in eval is also in comp * * @param eval eval context - * @param dbsnp db context + * @param comp db context * @return true if eval and db are discordant */ - public boolean discordantP(VariantContext eval, VariantContext dbsnp) { + public boolean discordantP(VariantContext eval, VariantContext comp) { for (Allele a : eval.getAlleles()) { - if (!dbsnp.hasAllele(a, true)) + if (!comp.hasAllele(a, true)) return true; } return false; } - public String update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered(); + public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + boolean compIsGood = comp != null && comp.isSNP() && comp.isNotFiltered(); boolean evalIsGood = eval != null && eval.isSNP(); - if (dbSNPIsGood) nDBSNPs++; // count the number of dbSNP events - if (evalIsGood) nEvalSNPs++; // count the number of dbSNP events + if (compIsGood) nCompSNPs++; // count the number of comp events + if (evalIsGood) nEvalSNPs++; // count the number of eval events - if (dbSNPIsGood && evalIsGood) { + if (compIsGood && evalIsGood) { nSNPsAtComp++; - if (!discordantP(eval, dbsnp)) // count whether we're concordant or not with the dbSNP value + if (!discordantP(eval, comp)) // count whether we're concordant or not with the comp value nConcordant++; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 03dc16523..ec940e139 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -133,8 +133,8 @@ public class VariantEvalWalker extends RodWalker { @Argument(fullName="evalModule", shortName="E", doc="One or more specific eval modules to apply to the eval track(s)", required=false) protected String[] modulesToUse = {}; - @Argument(fullName="useAllModules", shortName="all", doc="Use all possible eval modules", required=false) - protected Boolean USE_ALL_MODULES = false; + @Argument(fullName="useNoModules", shortName="none", doc="Use no eval modules", required=false) + protected Boolean USE_NO_MODULES = false; @Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit") protected Boolean LIST = false; @@ -264,7 +264,12 @@ public class VariantEvalWalker extends RodWalker { for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { if ( d.getName().startsWith("eval") ) { evalNames.add(d.getName()); - } else if ( d.getName().startsWith(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) || d.getName().startsWith("hapmap") || d.getName().startsWith("comp") ) { + } else if ( d.getName().startsWith(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) ) { + // it feels like overkill (i.e. too much output) to include the dbsnp track as + // truth given that it's also used in the known/novel stratification. If this + // becomes useful for some reason, uncomment this line... + //compNames.add(d.getName()); + } else if ( d.getName().startsWith("hapmap") || d.getName().startsWith("comp") ) { compNames.add(d.getName()); } else { logger.info("Not evaluating ROD binding " + d.getName()); @@ -340,7 +345,9 @@ public class VariantEvalWalker extends RodWalker { for ( Class c : PackageUtils.getClassesImplementingInterface(VariantEvaluator.class) ) classMap.put(c.getSimpleName(), c); - if ( USE_ALL_MODULES ) { + if ( USE_NO_MODULES ) { + evaluationClasses = new ArrayList>(0); + } else if ( modulesToUse.length == 0 ) { evaluationClasses = new ArrayList>(classMap.values()); } else { // get the specific classes provided diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 04a92fcc6..331a48a99 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -9,7 +9,7 @@ import java.util.Map; public class VariantEvalIntegrationTest extends WalkerTest { private static String cmdRoot = "-T VariantEval" + - " -R " + oneKGLocation + "reference/human_b36_both.fasta -reportType Grep -all"; + " -R " + oneKGLocation + "reference/human_b36_both.fasta -reportType Grep"; private static String root = cmdRoot + " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + @@ -18,8 +18,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testVESimple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "278c9c2798fed510a0cc3e65f3749b26"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "98ceb8dab20af47e724a0b47f82ba698"); + expectations.put("-L 1:1-10,000,000", "f44a2c9057a78349e1d3ccec327d3343"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "e8c5107015e5926aa647e274cb58bff6"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -39,10 +39,9 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" + " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String eqMD5s = "fe4b6bd3e46d956a829fa08f3594427d"; // next two examples should be the same! - expectations.put("", eqMD5s); - expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s); - expectations.put(" -known comp_hapmap", "cb8ce4b0f15e1b01c7aee5106dad4e95"); + expectations.put("", "49a723e6e77a45a4fb0a46890d66806b"); + expectations.put(" -known comp_hapmap -known dbsnp", "11e2a3ae77b3ce4b659baa832388f920"); + expectations.put(" -known comp_hapmap", "11e2a3ae77b3ce4b659baa832388f920"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs2 = entry.getKey(); @@ -60,7 +59,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30"; WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s", 2, - Arrays.asList("782b569e213b494c19598d3f4dacba49", "a3ce1d70d8ae3874807e9d61994d42af")); + Arrays.asList("d8b14bc334a8c4f3cd5462839f5a3ef2", "a3ce1d70d8ae3874807e9d61994d42af")); executeTest("testVEWriteVCF", spec); } }