1. Removed -all; by default all modules are used; use -none for no modules.

2. Don't make dbsnp track be a comp by default (to cut back on output). Please let me know if someone wants this back for some reason. 3. Cleaned up dbsnp module output to print the right numbers. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3220 348d0f76-0448-11de-a6fe-93d51630548a
2010-04-21 02:46:42 +00:00 · 2010-04-21 02:46:42 +00:00 · 3b5673d967
parent 4e18c54bb8
commit 3b5673d967
3 changed files with 40 additions and 34 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/DbSNPPercentage.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/DbSNPPercentage.java
@ -17,22 +17,22 @@ import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
 * This software is supplied without any warranty or guaranteed support whatsoever. Neither
 * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
 */
-@Analysis(name = "DbSNP Overlap", description = "the overlap between DbSNP sites and other SNP tracks")
+@Analysis(name = "Comp Overlap", description = "the overlap between eval and comp sites")
 public class DbSNPPercentage extends VariantEvaluator {

-    @DataPoint(name = "DbSNP count", description = "number of DPSNP sites")
-    long nDBSNPs = 0;
-
-    @DataPoint(name = "total count", description = "number of total snp sites")
+    @DataPoint(name = "eval sites", description = "number of eval SNP sites")
    long nEvalSNPs = 0;

-    @DataPoint(name = "novel snps", description = "number of total snp sites")
+    @DataPoint(name = "comp sites", description = "number of comp SNP sites")
+    long nCompSNPs = 0;
+
+    @DataPoint(name = "evals not at comp", description = "number of eval sites outside of comp sites")
    long novelSites = 0;

-    @DataPoint(name = "snps at comp", description = "number of SNP sites at comp sites")
+    @DataPoint(name = "evals at comp", description = "number of eval sites at comp sites")
    long nSNPsAtComp = 0;

-    @DataPoint(name = "% snps at comp", description = "percentage of SNP sites at comp sites")
+    @DataPoint(name = "% evals at comp", description = "percentage of eval sites at comp sites")
    double compRate = 0.0;

    @DataPoint(name = "concordant", description = "number of concordant sites")
@ -46,19 +46,19 @@ public class DbSNPPercentage extends VariantEvaluator {
    }

    public String getName() {
-        return "dbOverlap";
+        return "compOverlap";
    }

    public int getComparisonOrder() {
        return 2;   // we need to see each eval track and each comp track
    }

-    public long nNovelSites() { return Math.abs(nEvalSNPs - nSNPsAtComp); }
-    public double dbSNPRate() { return rate(nSNPsAtComp, nEvalSNPs); }
+    public long nNovelSites() { return nEvalSNPs - nSNPsAtComp; }
+    public double compRate() { return rate(nSNPsAtComp, nEvalSNPs); }
    public double concordanceRate() { return rate(nConcordant, nSNPsAtComp); }

    public void finalizeEvaluation() {
-        compRate = 100 * dbSNPRate();
+        compRate = 100 * compRate();
        concordantRate = 100 * concordanceRate();
        novelSites = nNovelSites();
    }
@ -68,32 +68,32 @@ public class DbSNPPercentage extends VariantEvaluator {
    }

    /**
-     * Returns true if every allele in eval is also in dbsnp
+     * Returns true if every allele in eval is also in comp
     *
     * @param eval  eval context
-     * @param dbsnp db context
+     * @param comp db context
     * @return true if eval and db are discordant
     */
-    public boolean discordantP(VariantContext eval, VariantContext dbsnp) {
+    public boolean discordantP(VariantContext eval, VariantContext comp) {
        for (Allele a : eval.getAlleles()) {
-            if (!dbsnp.hasAllele(a, true))
+            if (!comp.hasAllele(a, true))
                return true;
        }

        return false;
    }

-    public String update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered();
+    public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        boolean compIsGood = comp != null && comp.isSNP() && comp.isNotFiltered();
        boolean evalIsGood = eval != null && eval.isSNP();

-        if (dbSNPIsGood) nDBSNPs++;             // count the number of dbSNP events
-        if (evalIsGood) nEvalSNPs++;           // count the number of dbSNP events
+        if (compIsGood) nCompSNPs++;           // count the number of comp events
+        if (evalIsGood) nEvalSNPs++;           // count the number of eval events

-        if (dbSNPIsGood && evalIsGood) {
+        if (compIsGood && evalIsGood) {
            nSNPsAtComp++;

-            if (!discordantP(eval, dbsnp))     // count whether we're concordant or not with the dbSNP value
+            if (!discordantP(eval, comp))     // count whether we're concordant or not with the comp value
                nConcordant++;
        }

--- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
@ -133,8 +133,8 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
    @Argument(fullName="evalModule", shortName="E", doc="One or more specific eval modules to apply to the eval track(s)", required=false)
    protected String[] modulesToUse = {};

-    @Argument(fullName="useAllModules", shortName="all", doc="Use all possible eval modules", required=false)
-    protected Boolean USE_ALL_MODULES = false;
+    @Argument(fullName="useNoModules", shortName="none", doc="Use no eval modules", required=false)
+    protected Boolean USE_NO_MODULES = false;

    @Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit")
    protected Boolean LIST = false;
@ -264,7 +264,12 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
        for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
            if ( d.getName().startsWith("eval") ) {
                evalNames.add(d.getName());
-            } else if ( d.getName().startsWith(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) || d.getName().startsWith("hapmap") || d.getName().startsWith("comp") ) {
+            } else if ( d.getName().startsWith(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) ) {
+                // it feels like overkill (i.e. too much output) to include the dbsnp track as
+                //  truth given that it's also used in the known/novel stratification.  If this
+                //  becomes useful for some reason, uncomment this line...
+                //compNames.add(d.getName());
+            } else if ( d.getName().startsWith("hapmap") || d.getName().startsWith("comp") ) {
                compNames.add(d.getName());
            } else {
                logger.info("Not evaluating ROD binding " + d.getName());
@ -340,7 +345,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
        for ( Class<? extends VariantEvaluator> c : PackageUtils.getClassesImplementingInterface(VariantEvaluator.class) )
            classMap.put(c.getSimpleName(), c);

-        if ( USE_ALL_MODULES ) {
+        if ( USE_NO_MODULES ) {
+            evaluationClasses = new ArrayList<Class<? extends VariantEvaluator>>(0);            
+        } else if ( modulesToUse.length == 0 ) {
            evaluationClasses = new ArrayList<Class<? extends VariantEvaluator>>(classMap.values());
        } else {
            // get the specific classes provided
--- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
@ -9,7 +9,7 @@ import java.util.Map;

 public class VariantEvalIntegrationTest extends WalkerTest {
    private static String cmdRoot = "-T VariantEval" +
-            " -R " + oneKGLocation + "reference/human_b36_both.fasta -reportType Grep -all";
+            " -R " + oneKGLocation + "reference/human_b36_both.fasta -reportType Grep";

    private static String root = cmdRoot +
            " -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
@ -18,8 +18,8 @@ public class VariantEvalIntegrationTest extends WalkerTest {
    @Test
    public void testVESimple() {
        HashMap<String, String> expectations = new HashMap<String, String>();
-        expectations.put("-L 1:1-10,000,000", "278c9c2798fed510a0cc3e65f3749b26");
-        expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "98ceb8dab20af47e724a0b47f82ba698");
+        expectations.put("-L 1:1-10,000,000", "f44a2c9057a78349e1d3ccec327d3343");
+        expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "e8c5107015e5926aa647e274cb58bff6");

        for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
            String extraArgs = entry.getKey();
@ -39,10 +39,9 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                " -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" +
                " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";

-        String eqMD5s = "fe4b6bd3e46d956a829fa08f3594427d"; // next two examples should be the same!
-        expectations.put("", eqMD5s);
-        expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s);
-        expectations.put(" -known comp_hapmap", "cb8ce4b0f15e1b01c7aee5106dad4e95");
+        expectations.put("", "49a723e6e77a45a4fb0a46890d66806b");
+        expectations.put(" -known comp_hapmap -known dbsnp", "11e2a3ae77b3ce4b659baa832388f920");
+        expectations.put(" -known comp_hapmap", "11e2a3ae77b3ce4b659baa832388f920");

        for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
            String extraArgs2 = entry.getKey();
@ -60,7 +59,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
        String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
        WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
                2,
-                Arrays.asList("782b569e213b494c19598d3f4dacba49", "a3ce1d70d8ae3874807e9d61994d42af"));
+                Arrays.asList("d8b14bc334a8c4f3cd5462839f5a3ef2", "a3ce1d70d8ae3874807e9d61994d42af"));
        executeTest("testVEWriteVCF", spec);
    }
 }