diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantCounter.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantCounter.java index 220ad94cb..6736f2350 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantCounter.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantCounter.java @@ -62,16 +62,16 @@ public class VariantCounter extends BasicVariantAnalysis implements GenotypeAnal s.add(String.format("n bases covered %d", nBasesCovered)); s.add(String.format("variants %d", nSNPs)); s.add(String.format("variant rate %.5f confident variants per base", variantRate(nSNPs))); - s.add(String.format("variant rate 1 / %d confident variants per base [human single sample genome-wide expectation is ~1 / 666]", variantRateInverse(nSNPs))); + s.add(String.format("variant rate 1 / %d confident variants per base", variantRateInverse(nSNPs))); if ( this.getMaster().evalContainsGenotypes ) { s.add(String.format("heterozygotes %d", nHets)); s.add(String.format("homozygotes %d", nSNPs - nHets)); s.add(String.format("heterozygosity %.5f confident hets per base", variantRate(nHets))); - s.add(String.format("heterozygosity 1 / %d confident hets per base [human single sample expectation is ~1 / 1000]", variantRateInverse(nHets))); + s.add(String.format("heterozygosity 1 / %d confident hets per base", variantRateInverse(nHets))); - s.add(String.format("het to hom ratio %.2f confident hets per confident homozygote non-refs [human single sample genome-wide expectation is 2:1]", + s.add(String.format("het to hom ratio %.2f confident hets per confident homozygote non-refs [single sample genome-wide expectation is 2:1]", ((double)nHets) / (Math.max(nSNPs - nHets, 1)))); } return s; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java index faae1be0b..02ec1c3ca 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java @@ -22,8 +22,9 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA private String dbName; private int nDBSNPs = 0; private int nDBIndels = 0; + private int nDBElse = 0; private int nEvalObs = 0; - private int nOverlapping = 0; + private int nSNPsAtdbSNPs = 0; private int nConcordant = 0; private int nSNPsCalledAtIndels = 0; @@ -44,13 +45,14 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA if (inDB) { if (dbSNP.isSNP()) nDBSNPs++; - if (dbSNP.isIndel()) nDBIndels++; + else if (dbSNP.isIndel()) nDBIndels++; + else { nDBElse++; } } if (inEval) nEvalObs++; if (inDB && inEval) { if (dbSNP.isSNP()) { // changes the calculation a bit - nOverlapping++; + nSNPsAtdbSNPs++; if (!discordantP(dbSNP, eval)) nConcordant++; @@ -75,8 +77,8 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA return nEvalObs; } - public int nOverlappingSites() { - return nOverlapping; + public int nSNPsAtdbSNPs() { + return nSNPsAtdbSNPs; } public int nConcordant() { @@ -84,7 +86,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA } public int nNovelSites() { - return Math.abs(nEvalSites() - nOverlappingSites()); + return Math.abs(nEvalSites() - nSNPsAtdbSNPs()); } public int nSNPsAtIndels() { @@ -106,11 +108,11 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA * @return */ public double fractionEvalSitesCoveredByDB() { - return nOverlappingSites() / (1.0 * nEvalSites()); + return nSNPsAtdbSNPs() / (1.0 * nEvalSites()); } public double concordanceRate() { - return nConcordant() / (1.0 * nOverlappingSites()); + return nConcordant() / (1.0 * nSNPsAtdbSNPs()); } public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { @@ -119,7 +121,6 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA //TODO process correctly all the returned dbSNP rods at each location Variation dbsnp = (Variation) BrokenRODSimulator.simulate_lookup(dbName,context.getLocation(),tracker); - boolean isSNP = dbsnp != null && dbsnp.isSNP(); inc(dbsnp, eval); if (dbsnp != null && eval != null) { @@ -142,7 +143,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA * @return */ public double fractionDBSitesDiscoveredInEval() { - return nOverlappingSites() / (1.0 * nDBSNPs()); + return nSNPsAtdbSNPs() / (1.0 * nDBSNPs()); } public List done() { @@ -150,15 +151,17 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA //s.add(String.format("%d\t%d\t%d\t%.2f\t%.2f", nDBSNPs(), nEvalSites(), nOverlappingSites(), fractionEvalSitesCoveredByDB(), fractionDBSitesDiscoveredInEval())); s.add(String.format("name %s", dbName)); - s.add(String.format("n_db_sites %d", nDBSNPs() + nDBIndels())); + s.add(String.format("n_db_sites %d", nDBSNPs() + nDBIndels() + nDBElse)); s.add(String.format("n_db_snps %d", nDBSNPs())); s.add(String.format("n_db_indels %d", nDBIndels())); + s.add(String.format("n_db_others %d", nDBElse)); s.add(String.format("n_eval_sites %d", nEvalSites())); - s.add(String.format("n_overlapping_sites %d", nOverlappingSites())); + s.add(String.format("n_overlapping_sites %d", nSNPsAtdbSNPs())); s.add(String.format("n_concordant %d", nConcordant())); s.add(String.format("n_novel_sites %d", nNovelSites())); - s.add(String.format("percent_eval_sites_in_db %.2f", 100 * fractionEvalSitesCoveredByDB())); + + s.add(String.format("dbsnp_rate %.2f # percent eval snps at dbsnp snps", 100 * fractionEvalSitesCoveredByDB())); s.add(String.format("concordance_rate %.2f", 100 * concordanceRate())); s.add(String.format("percent_db_sites_in_eval %.2f", 100 * fractionDBSitesDiscoveredInEval())); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index ae6de1fc0..762854a05 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -72,7 +72,8 @@ public class VariantEvalWalker extends RefWalker { // the types of analysis we support, and the string tags we associate with the enumerated value enum ANALYSIS_TYPE { - ALL_SNPS("all"), SINGLETON_SNPS("singletons"), TWOHIT_SNPS("2plus_hit"), KNOWN_SNPS("known"), NOVEL_SNPS("novel"); + // todo -- differeniate into three classes -- snps at known snp sites, snps not at known snp site but covered by known indel, and novel + ALL_SNPS("all"), SINGLETON_SNPS("singletons"), TWOHIT_SNPS("2plus_hit"), KNOWN_SNPS("known"), SNPS_AT_NON_SNP_SITES("snp_at_known_non_snps"), NOVEL_SNPS("novel"); private final String value; ANALYSIS_TYPE(String value) { this.value = value;} @@ -85,9 +86,11 @@ public class VariantEvalWalker extends RefWalker { ANALYSIS_TYPE.SINGLETON_SNPS, ANALYSIS_TYPE.TWOHIT_SNPS, ANALYSIS_TYPE.KNOWN_SNPS, + ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES, ANALYSIS_TYPE.NOVEL_SNPS}; final ANALYSIS_TYPE[] GENOTYPE_ANALYSIS_NAMES = {ANALYSIS_TYPE.ALL_SNPS, ANALYSIS_TYPE.KNOWN_SNPS, + ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES, ANALYSIS_TYPE.NOVEL_SNPS}; final ANALYSIS_TYPE[] SIMPLE_ANALYSIS_NAMES = {ANALYSIS_TYPE.ALL_SNPS}; ANALYSIS_TYPE[] ALL_ANALYSIS_NAMES = null; @@ -258,7 +261,15 @@ public class VariantEvalWalker extends RefWalker { if (eval != null) { Variation dbsnp = (Variation) BrokenRODSimulator.simulate_lookup("dbSNP", ref.getLocus(), tracker); - ANALYSIS_TYPE noveltySet = dbsnp == null ? ANALYSIS_TYPE.NOVEL_SNPS : ANALYSIS_TYPE.KNOWN_SNPS; + ANALYSIS_TYPE noveltySet = null; + if ( dbsnp == null ) noveltySet = ANALYSIS_TYPE.NOVEL_SNPS; + else if ( dbsnp.isSNP() ) noveltySet = ANALYSIS_TYPE.KNOWN_SNPS; + else { // if ( dbsnp.isIndel() ) + noveltySet = ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES; + } + //else { ; } // lots of wierd things in there } + // // throw new StingException("Unexpected dbSNP record type: " + dbsnp); } + updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context); } @@ -312,7 +323,7 @@ public class VariantEvalWalker extends RefWalker { private String getLineHeader(final ANALYSIS_TYPE analysisSetName, final String keyword, final String analysis) { String s = Utils.join(",", Arrays.asList(analysisSetName, keyword, analysis)); - return s + Utils.dupString(' ', 50 - s.length()); + return s + Utils.dupString(' ', Math.max(50 - s.length(), 1)); } private void printAnalysisSet(final ANALYSIS_TYPE analysisSetName) { diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java index 316c5c98d..5d5d1e58c 100644 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java @@ -18,8 +18,8 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantROD() { HashMap md5 = new HashMap(); - md5.put("", "86582b57ec27dd9c4a6e0252eb010376"); - md5.put("-A", "ead904e334956435e6b586b697cd1905"); + md5.put("", "b4adb7fed93005b3c28d72c3578ebc33"); + md5.put("-A", "d34c1598a5b35098bd5524a52fbbc0fd"); /** * the above MD5 was calculated from running the following command: @@ -52,7 +52,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantRODConfSix() { List md5 = new ArrayList(); - md5.add("875dea21073bb5b24a771105bdeb2225"); + md5.add("4421a0b379577ea0fcde2c9866f9ca9f"); /** * the above MD5 was calculated from running the following command: @@ -84,7 +84,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantRODOutputViolations() { List md5 = new ArrayList(); - md5.add("d84e5b2a23ab1cf028145f09cd1e9f5b"); + md5.add("e1ce229698725aa06c9b08b21fe628ef"); /** * the above MD5 was calculated from running the following command: @@ -116,7 +116,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalGenotypeROD() { List md5 = new ArrayList(); - md5.add("87ec21995ab4227404da1dc144c1f4c7"); + md5.add("fd95465965116e05e2e0356e2d105321"); /** * the above MD5 was calculated after running the following command: * @@ -150,7 +150,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalMarksGenotypingExample() { List md5 = new ArrayList(); - md5.add("e9aebff0e3ccba7ba6f64433ba7d3533"); + md5.add("58ef0487c9231aec7f356919e8cbd305"); /** * Run with the following commands: * @@ -177,7 +177,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalRuntimeWithLotsOfIntervals() { List md5 = new ArrayList(); - md5.add("bfdc82c3fd8a286f5855d3932ede3124"); + md5.add(""); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T VariantEval -R /broad/1KG/reference/human_b36_both.fasta " + "-B eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.pilot_3.all.geli.calls " +