Bug fixes for VariantEval. dbCoverage now reports dbSNP rate, not some wierd eval_snps_in_db as before. We now separate non-indel and non-snp db sites in dbcoverage. Some dbSNP records don't fit into these two categories. Also fixed a consistency issue where novel / known sites where being determined solely by whether dbSNP had a record there, rather than the stricter dbcoverage screen for isSNP().

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2180 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-11-30 01:39:01 +00:00
parent 2ea93385be
commit af22ca1b47
4 changed files with 40 additions and 26 deletions

View File

@ -62,16 +62,16 @@ public class VariantCounter extends BasicVariantAnalysis implements GenotypeAnal
s.add(String.format("n bases covered %d", nBasesCovered));
s.add(String.format("variants %d", nSNPs));
s.add(String.format("variant rate %.5f confident variants per base", variantRate(nSNPs)));
s.add(String.format("variant rate 1 / %d confident variants per base [human single sample genome-wide expectation is ~1 / 666]", variantRateInverse(nSNPs)));
s.add(String.format("variant rate 1 / %d confident variants per base", variantRateInverse(nSNPs)));
if ( this.getMaster().evalContainsGenotypes ) {
s.add(String.format("heterozygotes %d", nHets));
s.add(String.format("homozygotes %d", nSNPs - nHets));
s.add(String.format("heterozygosity %.5f confident hets per base", variantRate(nHets)));
s.add(String.format("heterozygosity 1 / %d confident hets per base [human single sample expectation is ~1 / 1000]", variantRateInverse(nHets)));
s.add(String.format("heterozygosity 1 / %d confident hets per base", variantRateInverse(nHets)));
s.add(String.format("het to hom ratio %.2f confident hets per confident homozygote non-refs [human single sample genome-wide expectation is 2:1]",
s.add(String.format("het to hom ratio %.2f confident hets per confident homozygote non-refs [single sample genome-wide expectation is 2:1]",
((double)nHets) / (Math.max(nSNPs - nHets, 1))));
}
return s;

View File

@ -22,8 +22,9 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
private String dbName;
private int nDBSNPs = 0;
private int nDBIndels = 0;
private int nDBElse = 0;
private int nEvalObs = 0;
private int nOverlapping = 0;
private int nSNPsAtdbSNPs = 0;
private int nConcordant = 0;
private int nSNPsCalledAtIndels = 0;
@ -44,13 +45,14 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
if (inDB) {
if (dbSNP.isSNP()) nDBSNPs++;
if (dbSNP.isIndel()) nDBIndels++;
else if (dbSNP.isIndel()) nDBIndels++;
else { nDBElse++; }
}
if (inEval) nEvalObs++;
if (inDB && inEval) {
if (dbSNP.isSNP()) { // changes the calculation a bit
nOverlapping++;
nSNPsAtdbSNPs++;
if (!discordantP(dbSNP, eval))
nConcordant++;
@ -75,8 +77,8 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
return nEvalObs;
}
public int nOverlappingSites() {
return nOverlapping;
public int nSNPsAtdbSNPs() {
return nSNPsAtdbSNPs;
}
public int nConcordant() {
@ -84,7 +86,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
}
public int nNovelSites() {
return Math.abs(nEvalSites() - nOverlappingSites());
return Math.abs(nEvalSites() - nSNPsAtdbSNPs());
}
public int nSNPsAtIndels() {
@ -106,11 +108,11 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
* @return
*/
public double fractionEvalSitesCoveredByDB() {
return nOverlappingSites() / (1.0 * nEvalSites());
return nSNPsAtdbSNPs() / (1.0 * nEvalSites());
}
public double concordanceRate() {
return nConcordant() / (1.0 * nOverlappingSites());
return nConcordant() / (1.0 * nSNPsAtdbSNPs());
}
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
@ -119,7 +121,6 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
//TODO process correctly all the returned dbSNP rods at each location
Variation dbsnp = (Variation) BrokenRODSimulator.simulate_lookup(dbName,context.getLocation(),tracker);
boolean isSNP = dbsnp != null && dbsnp.isSNP();
inc(dbsnp, eval);
if (dbsnp != null && eval != null) {
@ -142,7 +143,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
* @return
*/
public double fractionDBSitesDiscoveredInEval() {
return nOverlappingSites() / (1.0 * nDBSNPs());
return nSNPsAtdbSNPs() / (1.0 * nDBSNPs());
}
public List<String> done() {
@ -150,15 +151,17 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
//s.add(String.format("%d\t%d\t%d\t%.2f\t%.2f", nDBSNPs(), nEvalSites(), nOverlappingSites(), fractionEvalSitesCoveredByDB(), fractionDBSitesDiscoveredInEval()));
s.add(String.format("name %s", dbName));
s.add(String.format("n_db_sites %d", nDBSNPs() + nDBIndels()));
s.add(String.format("n_db_sites %d", nDBSNPs() + nDBIndels() + nDBElse));
s.add(String.format("n_db_snps %d", nDBSNPs()));
s.add(String.format("n_db_indels %d", nDBIndels()));
s.add(String.format("n_db_others %d", nDBElse));
s.add(String.format("n_eval_sites %d", nEvalSites()));
s.add(String.format("n_overlapping_sites %d", nOverlappingSites()));
s.add(String.format("n_overlapping_sites %d", nSNPsAtdbSNPs()));
s.add(String.format("n_concordant %d", nConcordant()));
s.add(String.format("n_novel_sites %d", nNovelSites()));
s.add(String.format("percent_eval_sites_in_db %.2f", 100 * fractionEvalSitesCoveredByDB()));
s.add(String.format("dbsnp_rate %.2f # percent eval snps at dbsnp snps", 100 * fractionEvalSitesCoveredByDB()));
s.add(String.format("concordance_rate %.2f", 100 * concordanceRate()));
s.add(String.format("percent_db_sites_in_eval %.2f", 100 * fractionDBSitesDiscoveredInEval()));

View File

@ -72,7 +72,8 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
// the types of analysis we support, and the string tags we associate with the enumerated value
enum ANALYSIS_TYPE {
ALL_SNPS("all"), SINGLETON_SNPS("singletons"), TWOHIT_SNPS("2plus_hit"), KNOWN_SNPS("known"), NOVEL_SNPS("novel");
// todo -- differeniate into three classes -- snps at known snp sites, snps not at known snp site but covered by known indel, and novel
ALL_SNPS("all"), SINGLETON_SNPS("singletons"), TWOHIT_SNPS("2plus_hit"), KNOWN_SNPS("known"), SNPS_AT_NON_SNP_SITES("snp_at_known_non_snps"), NOVEL_SNPS("novel");
private final String value;
ANALYSIS_TYPE(String value) { this.value = value;}
@ -85,9 +86,11 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
ANALYSIS_TYPE.SINGLETON_SNPS,
ANALYSIS_TYPE.TWOHIT_SNPS,
ANALYSIS_TYPE.KNOWN_SNPS,
ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES,
ANALYSIS_TYPE.NOVEL_SNPS};
final ANALYSIS_TYPE[] GENOTYPE_ANALYSIS_NAMES = {ANALYSIS_TYPE.ALL_SNPS,
ANALYSIS_TYPE.KNOWN_SNPS,
ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES,
ANALYSIS_TYPE.NOVEL_SNPS};
final ANALYSIS_TYPE[] SIMPLE_ANALYSIS_NAMES = {ANALYSIS_TYPE.ALL_SNPS};
ANALYSIS_TYPE[] ALL_ANALYSIS_NAMES = null;
@ -258,7 +261,15 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
if (eval != null) {
Variation dbsnp = (Variation) BrokenRODSimulator.simulate_lookup("dbSNP", ref.getLocus(), tracker);
ANALYSIS_TYPE noveltySet = dbsnp == null ? ANALYSIS_TYPE.NOVEL_SNPS : ANALYSIS_TYPE.KNOWN_SNPS;
ANALYSIS_TYPE noveltySet = null;
if ( dbsnp == null ) noveltySet = ANALYSIS_TYPE.NOVEL_SNPS;
else if ( dbsnp.isSNP() ) noveltySet = ANALYSIS_TYPE.KNOWN_SNPS;
else { // if ( dbsnp.isIndel() )
noveltySet = ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES;
}
//else { ; } // lots of wierd things in there }
// // throw new StingException("Unexpected dbSNP record type: " + dbsnp); }
updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context);
}
@ -312,7 +323,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
private String getLineHeader(final ANALYSIS_TYPE analysisSetName, final String keyword, final String analysis) {
String s = Utils.join(",", Arrays.asList(analysisSetName, keyword, analysis));
return s + Utils.dupString(' ', 50 - s.length());
return s + Utils.dupString(' ', Math.max(50 - s.length(), 1));
}
private void printAnalysisSet(final ANALYSIS_TYPE analysisSetName) {

View File

@ -18,8 +18,8 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalVariantROD() {
HashMap<String, String> md5 = new HashMap<String, String>();
md5.put("", "86582b57ec27dd9c4a6e0252eb010376");
md5.put("-A", "ead904e334956435e6b586b697cd1905");
md5.put("", "b4adb7fed93005b3c28d72c3578ebc33");
md5.put("-A", "d34c1598a5b35098bd5524a52fbbc0fd");
/**
* the above MD5 was calculated from running the following command:
@ -52,7 +52,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalVariantRODConfSix() {
List<String> md5 = new ArrayList<String>();
md5.add("875dea21073bb5b24a771105bdeb2225");
md5.add("4421a0b379577ea0fcde2c9866f9ca9f");
/**
* the above MD5 was calculated from running the following command:
@ -84,7 +84,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalVariantRODOutputViolations() {
List<String> md5 = new ArrayList<String>();
md5.add("d84e5b2a23ab1cf028145f09cd1e9f5b");
md5.add("e1ce229698725aa06c9b08b21fe628ef");
/**
* the above MD5 was calculated from running the following command:
@ -116,7 +116,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalGenotypeROD() {
List<String> md5 = new ArrayList<String>();
md5.add("87ec21995ab4227404da1dc144c1f4c7");
md5.add("fd95465965116e05e2e0356e2d105321");
/**
* the above MD5 was calculated after running the following command:
*
@ -150,7 +150,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalMarksGenotypingExample() {
List<String> md5 = new ArrayList<String>();
md5.add("e9aebff0e3ccba7ba6f64433ba7d3533");
md5.add("58ef0487c9231aec7f356919e8cbd305");
/**
* Run with the following commands:
*
@ -177,7 +177,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalRuntimeWithLotsOfIntervals() {
List<String> md5 = new ArrayList<String>();
md5.add("bfdc82c3fd8a286f5855d3932ede3124");
md5.add("");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T VariantEval -R /broad/1KG/reference/human_b36_both.fasta " +
"-B eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.pilot_3.all.geli.calls " +