diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java index 02ec1c3ca..3aa380205 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java @@ -1,8 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.BrokenRODSimulator; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.Variation; @@ -21,77 +20,21 @@ import java.util.List; public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { private String dbName; private int nDBSNPs = 0; - private int nDBIndels = 0; - private int nDBElse = 0; private int nEvalObs = 0; private int nSNPsAtdbSNPs = 0; private int nConcordant = 0; - private int nSNPsCalledAtIndels = 0; - public VariantDBCoverage(final String name) { super("db_coverage"); dbName = name; - // THIS IS A HACK required in order to reproduce the behavior of old (and imperfect) RODIterator and - // hence to pass the integration test. The new iterator this code is now using does see ALL the SNPs, - // whether masked by overlapping indels/other events or not. - //TODO process correctly all the returned dbSNP rods at each location - BrokenRODSimulator.attach(name); } - public void inc(Variation dbSNP, Variation eval) { - boolean inDB = dbSNP != null; - boolean inEval = eval != null; + public int nDBSNPs() { return nDBSNPs; } + public int nEvalSites() { return nEvalObs; } + public int nSNPsAtdbSNPs() { return nSNPsAtdbSNPs; } + public int nConcordant() { return nConcordant; } + public int nNovelSites() { return Math.abs(nEvalSites() - nSNPsAtdbSNPs()); } - if (inDB) { - if (dbSNP.isSNP()) nDBSNPs++; - else if (dbSNP.isIndel()) nDBIndels++; - else { nDBElse++; } - } - - if (inEval) nEvalObs++; - if (inDB && inEval) { - if (dbSNP.isSNP()) { // changes the calculation a bit - nSNPsAtdbSNPs++; - - if (!discordantP(dbSNP, eval)) - nConcordant++; - } - - if (dbSNP.isIndel() && eval.isSNP()) - nSNPsCalledAtIndels++; - } - } - - public int callCount = 0; - - public int nDBSNPs() { - return nDBSNPs; - } - - public int nDBIndels() { - return nDBIndels; - } - - public int nEvalSites() { - return nEvalObs; - } - - public int nSNPsAtdbSNPs() { - return nSNPsAtdbSNPs; - } - - public int nConcordant() { - return nConcordant; - } - - public int nNovelSites() { - return Math.abs(nEvalSites() - nSNPsAtdbSNPs()); - } - - public int nSNPsAtIndels() { - return nSNPsCalledAtIndels; - } public boolean discordantP(Variation dbSNP, Variation eval) { if (eval != null) { char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : Utils.stringToChar(eval.getReference()); @@ -107,7 +50,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA * * @return */ - public double fractionEvalSitesCoveredByDB() { + public double dbSNPRate() { return nSNPsAtdbSNPs() / (1.0 * nEvalSites()); } @@ -116,59 +59,47 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA } public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + rodDbSNP dbSNP = rodDbSNP.getFirstRealSNP(tracker.getTrackData( dbName, null )); + String result = null; - // There are four cases here: - //TODO process correctly all the returned dbSNP rods at each location - Variation dbsnp = (Variation) BrokenRODSimulator.simulate_lookup(dbName,context.getLocation(),tracker); + if (dbSNP != null) nDBSNPs++; // count the number of real dbSNP events + if ( eval != null && eval.isSNP() ) { // ignore indels right now + nEvalObs++; // count the number of eval snps we've seen - inc(dbsnp, eval); + if (dbSNP != null) { // both eval and dbSNP have real snps + nSNPsAtdbSNPs++; - if (dbsnp != null && eval != null) { - - if (dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval)) { - return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval); - } else if (dbsnp.isIndel() && eval.isSNP()) { - return String.format("SNP-at-indel DBSNP=%s %s", Utils.join("",dbsnp.getAlleleList()), eval); - } else { - return null; + if (!discordantP(dbSNP, eval)) // count whether we're concordant or not with the dbSNP value + nConcordant++; + else + result = String.format("Discordant [DBSNP %s] [EVAL %s]", dbSNP, eval); } - } else { - return null; } - } - /** - * What fraction of the DB sites were discovered in the evalution calls? - * - * @return - */ - public double fractionDBSitesDiscoveredInEval() { - return nSNPsAtdbSNPs() / (1.0 * nDBSNPs()); + if ( dbSNP != null && dbSNP.isSNP() ) { + BrokenRODSimulator.attach("dbSNP"); + rodDbSNP dbsnp = (rodDbSNP) BrokenRODSimulator.simulate_lookup("dbSNP", context.getLocation(), tracker); + if ( ! dbSNP.getRS_ID().equals(dbsnp.getRS_ID()) && dbsnp.isSNP() ) { + System.out.printf("Discordant site! %n%s%n vs.%n%s%n", dbSNP, dbsnp); + } + } + + return result; } public List done() { List s = new ArrayList(); - //s.add(String.format("%d\t%d\t%d\t%.2f\t%.2f", nDBSNPs(), nEvalSites(), nOverlappingSites(), fractionEvalSitesCoveredByDB(), fractionDBSitesDiscoveredInEval())); s.add(String.format("name %s", dbName)); - s.add(String.format("n_db_sites %d", nDBSNPs() + nDBIndels() + nDBElse)); s.add(String.format("n_db_snps %d", nDBSNPs())); - s.add(String.format("n_db_indels %d", nDBIndels())); - s.add(String.format("n_db_others %d", nDBElse)); s.add(String.format("n_eval_sites %d", nEvalSites())); s.add(String.format("n_overlapping_sites %d", nSNPsAtdbSNPs())); s.add(String.format("n_concordant %d", nConcordant())); s.add(String.format("n_novel_sites %d", nNovelSites())); - - s.add(String.format("dbsnp_rate %.2f # percent eval snps at dbsnp snps", 100 * fractionEvalSitesCoveredByDB())); + s.add(String.format("dbsnp_rate %.2f # percent eval snps at dbsnp snps", 100 * dbSNPRate())); s.add(String.format("concordance_rate %.2f", 100 * concordanceRate())); - s.add(String.format("percent_db_sites_in_eval %.2f", 100 * fractionDBSitesDiscoveredInEval())); - - s.add(String.format("n_snp_calls_at_indels %d", nSNPsAtIndels())); - s.add(String.format("percent_calls_at_indels %.2f", nSNPsAtIndels() / (0.01 * nEvalSites()))); - return s; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index db5c9235e..7f4bb4689 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -113,7 +113,7 @@ public class VariantEvalWalker extends RefWalker { // hence to pass the integration test. The new iterator this code is now using does see ALL the SNPs, // whether masked by overlapping indels/other events or not. //TODO process correctly all the returned dbSNP rods at each location - BrokenRODSimulator.attach("dbSNP"); + //BrokenRODSimulator.attach("dbSNP"); } @@ -259,17 +259,7 @@ public class VariantEvalWalker extends RefWalker { // update the known / novel set by checking whether the knownSNPDBName track has an entry here if (eval != null) { - Variation dbsnp = (Variation) BrokenRODSimulator.simulate_lookup("dbSNP", ref.getLocus(), tracker); - - ANALYSIS_TYPE noveltySet = null; - if ( dbsnp == null ) noveltySet = ANALYSIS_TYPE.NOVEL_SNPS; - else if ( dbsnp.isSNP() ) noveltySet = ANALYSIS_TYPE.KNOWN_SNPS; - else { // if ( dbsnp.isIndel() ) - noveltySet = ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES; - } - //else { ; } // lots of wierd things in there } - // // throw new StingException("Unexpected dbSNP record type: " + dbsnp); } - + ANALYSIS_TYPE noveltySet = getNovelAnalysisType(tracker); updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context); } @@ -286,6 +276,30 @@ public class VariantEvalWalker extends RefWalker { return 1; } + private ANALYSIS_TYPE getNovelAnalysisType(RefMetaDataTracker tracker) { + RODRecordList dbsnpList = tracker.getTrackData("dbsnp", null); + + if (dbsnpList == null) + return ANALYSIS_TYPE.NOVEL_SNPS; + + for (ReferenceOrderedDatum d : dbsnpList) { + if (((rodDbSNP) d).isSNP()) { + return ANALYSIS_TYPE.KNOWN_SNPS; + } + } + + return ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES; + + // old and busted way of doing this +// Variation dbsnp = (Variation) BrokenRODSimulator.simulate_lookup("dbSNP", ref.getLocus(), tracker); +// +// ANALYSIS_TYPE noveltySet = null; +// if ( dbsnp == null ) noveltySet = ANALYSIS_TYPE.NOVEL_SNPS; +// else if ( dbsnp.isSNP() ) noveltySet = ANALYSIS_TYPE.KNOWN_SNPS; +// else { // if ( dbsnp.isIndel() ) +// noveltySet = ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES; + } + public boolean includeViolations() { return mIncludeViolations; } public void updateAnalysisSet(final ANALYSIS_TYPE analysisSetName, Variation eval, diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java index 82329c72c..ca6b8ddc2 100644 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java @@ -18,8 +18,8 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantROD() { HashMap md5 = new HashMap(); - md5.put("", "b4adb7fed93005b3c28d72c3578ebc33"); - md5.put("-A", "d34c1598a5b35098bd5524a52fbbc0fd"); + md5.put("", "d6b8c2d6c37d42d1ca2288799a8bd8e4"); + md5.put("-A", "b868aac194f6d0bd1fd2c0c63ddfaeab"); /** * the above MD5 was calculated from running the following command: @@ -52,7 +52,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantRODConfSix() { List md5 = new ArrayList(); - md5.add("4421a0b379577ea0fcde2c9866f9ca9f"); + md5.add("85cfefcac2dfb06545792605a3043a52"); /** * the above MD5 was calculated from running the following command: @@ -84,7 +84,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantRODOutputViolations() { List md5 = new ArrayList(); - md5.add("e1ce229698725aa06c9b08b21fe628ef"); + md5.add("e24732ffd95a78385a2c6986d1d3a359"); /** * the above MD5 was calculated from running the following command: @@ -116,7 +116,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalGenotypeROD() { List md5 = new ArrayList(); - md5.add("fd95465965116e05e2e0356e2d105321"); + md5.add("8b85148ec20f31481a30ceddd8645ba1"); /** * the above MD5 was calculated after running the following command: * @@ -150,7 +150,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalMarksGenotypingExample() { List md5 = new ArrayList(); - md5.add("58ef0487c9231aec7f356919e8cbd305"); + md5.add("7d5a98c01051f96a684a383786da3d76"); /** * Run with the following commands: *