diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/DbSNPPercentage.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/DbSNPPercentage.java index 8b28f2439..6edf6168c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/DbSNPPercentage.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/DbSNPPercentage.java @@ -17,22 +17,22 @@ import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; * This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ -@Analysis(name = "DbSNP Overlap", description = "the overlap between DbSNP sites and other SNP tracks") +@Analysis(name = "Comp Overlap", description = "the overlap between eval and comp sites") public class DbSNPPercentage extends VariantEvaluator { - @DataPoint(name = "DbSNP count", description = "number of DPSNP sites") - long nDBSNPs = 0; - - @DataPoint(name = "total count", description = "number of total snp sites") + @DataPoint(name = "eval sites", description = "number of eval SNP sites") long nEvalSNPs = 0; - @DataPoint(name = "novel snps", description = "number of total snp sites") + @DataPoint(name = "comp sites", description = "number of comp SNP sites") + long nCompSNPs = 0; + + @DataPoint(name = "evals not at comp", description = "number of eval sites outside of comp sites") long novelSites = 0; - @DataPoint(name = "snps at comp", description = "number of SNP sites at comp sites") + @DataPoint(name = "evals at comp", description = "number of eval sites at comp sites") long nSNPsAtComp = 0; - @DataPoint(name = "% snps at comp", description = "percentage of SNP sites at comp sites") + @DataPoint(name = "% evals at comp", description = "percentage of eval sites at comp sites") double compRate = 0.0; @DataPoint(name = "concordant", description = "number of concordant sites") @@ -46,19 +46,19 @@ public class DbSNPPercentage extends VariantEvaluator { } public String getName() { - return "dbOverlap"; + return "compOverlap"; } public int getComparisonOrder() { return 2; // we need to see each eval track and each comp track } - public long nNovelSites() { return Math.abs(nEvalSNPs - nSNPsAtComp); } - public double dbSNPRate() { return rate(nSNPsAtComp, nEvalSNPs); } + public long nNovelSites() { return nEvalSNPs - nSNPsAtComp; } + public double compRate() { return rate(nSNPsAtComp, nEvalSNPs); } public double concordanceRate() { return rate(nConcordant, nSNPsAtComp); } public void finalizeEvaluation() { - compRate = 100 * dbSNPRate(); + compRate = 100 * compRate(); concordantRate = 100 * concordanceRate(); novelSites = nNovelSites(); } @@ -68,32 +68,32 @@ public class DbSNPPercentage extends VariantEvaluator { } /** - * Returns true if every allele in eval is also in dbsnp + * Returns true if every allele in eval is also in comp * * @param eval eval context - * @param dbsnp db context + * @param comp db context * @return true if eval and db are discordant */ - public boolean discordantP(VariantContext eval, VariantContext dbsnp) { + public boolean discordantP(VariantContext eval, VariantContext comp) { for (Allele a : eval.getAlleles()) { - if (!dbsnp.hasAllele(a, true)) + if (!comp.hasAllele(a, true)) return true; } return false; } - public String update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered(); + public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + boolean compIsGood = comp != null && comp.isSNP() && comp.isNotFiltered(); boolean evalIsGood = eval != null && eval.isSNP(); - if (dbSNPIsGood) nDBSNPs++; // count the number of dbSNP events - if (evalIsGood) nEvalSNPs++; // count the number of dbSNP events + if (compIsGood) nCompSNPs++; // count the number of comp events + if (evalIsGood) nEvalSNPs++; // count the number of eval events - if (dbSNPIsGood && evalIsGood) { + if (compIsGood && evalIsGood) { nSNPsAtComp++; - if (!discordantP(eval, dbsnp)) // count whether we're concordant or not with the dbSNP value + if (!discordantP(eval, comp)) // count whether we're concordant or not with the comp value nConcordant++; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 03dc16523..ec940e139 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -133,8 +133,8 @@ public class VariantEvalWalker extends RodWalker { @Argument(fullName="evalModule", shortName="E", doc="One or more specific eval modules to apply to the eval track(s)", required=false) protected String[] modulesToUse = {}; - @Argument(fullName="useAllModules", shortName="all", doc="Use all possible eval modules", required=false) - protected Boolean USE_ALL_MODULES = false; + @Argument(fullName="useNoModules", shortName="none", doc="Use no eval modules", required=false) + protected Boolean USE_NO_MODULES = false; @Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit") protected Boolean LIST = false; @@ -264,7 +264,12 @@ public class VariantEvalWalker extends RodWalker { for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { if ( d.getName().startsWith("eval") ) { evalNames.add(d.getName()); - } else if ( d.getName().startsWith(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) || d.getName().startsWith("hapmap") || d.getName().startsWith("comp") ) { + } else if ( d.getName().startsWith(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) ) { + // it feels like overkill (i.e. too much output) to include the dbsnp track as + // truth given that it's also used in the known/novel stratification. If this + // becomes useful for some reason, uncomment this line... + //compNames.add(d.getName()); + } else if ( d.getName().startsWith("hapmap") || d.getName().startsWith("comp") ) { compNames.add(d.getName()); } else { logger.info("Not evaluating ROD binding " + d.getName()); @@ -340,7 +345,9 @@ public class VariantEvalWalker extends RodWalker { for ( Class c : PackageUtils.getClassesImplementingInterface(VariantEvaluator.class) ) classMap.put(c.getSimpleName(), c); - if ( USE_ALL_MODULES ) { + if ( USE_NO_MODULES ) { + evaluationClasses = new ArrayList>(0); + } else if ( modulesToUse.length == 0 ) { evaluationClasses = new ArrayList>(classMap.values()); } else { // get the specific classes provided diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 04a92fcc6..331a48a99 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -9,7 +9,7 @@ import java.util.Map; public class VariantEvalIntegrationTest extends WalkerTest { private static String cmdRoot = "-T VariantEval" + - " -R " + oneKGLocation + "reference/human_b36_both.fasta -reportType Grep -all"; + " -R " + oneKGLocation + "reference/human_b36_both.fasta -reportType Grep"; private static String root = cmdRoot + " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + @@ -18,8 +18,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testVESimple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "278c9c2798fed510a0cc3e65f3749b26"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "98ceb8dab20af47e724a0b47f82ba698"); + expectations.put("-L 1:1-10,000,000", "f44a2c9057a78349e1d3ccec327d3343"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "e8c5107015e5926aa647e274cb58bff6"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -39,10 +39,9 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" + " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String eqMD5s = "fe4b6bd3e46d956a829fa08f3594427d"; // next two examples should be the same! - expectations.put("", eqMD5s); - expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s); - expectations.put(" -known comp_hapmap", "cb8ce4b0f15e1b01c7aee5106dad4e95"); + expectations.put("", "49a723e6e77a45a4fb0a46890d66806b"); + expectations.put(" -known comp_hapmap -known dbsnp", "11e2a3ae77b3ce4b659baa832388f920"); + expectations.put(" -known comp_hapmap", "11e2a3ae77b3ce4b659baa832388f920"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs2 = entry.getKey(); @@ -60,7 +59,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30"; WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s", 2, - Arrays.asList("782b569e213b494c19598d3f4dacba49", "a3ce1d70d8ae3874807e9d61994d42af")); + Arrays.asList("d8b14bc334a8c4f3cd5462839f5a3ef2", "a3ce1d70d8ae3874807e9d61994d42af")); executeTest("testVEWriteVCF", spec); } }