Now with dbSNP rate

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6010 348d0f76-0448-11de-a6fe-93d51630548a
2011-06-17 13:21:40 +00:00 · 2011-06-17 13:21:40 +00:00 · 285da580f3
parent dd1d9cd76f
commit 285da580f3
1 changed files with 31 additions and 6 deletions
--- a/R/exomeQC.R
+++ b/R/exomeQC.R
@ -143,11 +143,12 @@ addSection <- function(name) {
 }
 
 createMetricsBySamples <- function(VariantEvalRoot) {
-  byAFEval <- expandVEReport(gsa.read.gatkreport(paste(VariantEvalRoot, ".bySample.eval", sep="")))
-  r = merge(byAFEval$TiTvVariantEvaluator, byAFEval$CountVariants)
+  bySampleEval <- expandVEReport(gsa.read.gatkreport(paste(VariantEvalRoot, ".bySample.eval", sep="")))
+  r = merge(bySampleEval$TiTvVariantEvaluator, bySampleEval$CountVariants)
+  r = merge(r, bySampleEval$CompOverlap)
  if ( ! is.na(preQCFile) ) {
-    preQCMetrics <- read.table(preQCFile, header=T) 
-    r = merge(merge(byAFEval$TiTvVariantEvaluator, byAFEval$CountVariants), preQCMetrics)
+    preQCMetrics <- read.table(preQCFile, header=T)
+    r = merge(r, preQCMetrics)
  }
  # order the samples by nSNPs -- it's the natural ordering.
  x = subset(r, Novelty=="all")
@ -170,7 +171,7 @@ perSamplePlots <- function(metricsBySamples) {
  sampleTextLabelScale <- scale_size("Highlighted samples", to=c(3,5), breaks=c(1,2), labels=c("regular", "highlighted"))
  xAxis <- scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
  myRug <- geom_rug(position="jitter")
-  #myRug <- geom_rug(aes(x = NULL))
+  #myRug <- geom_rug(aes(x=NULL), position="jitter")

  measures = c("nSNPs", "tiTvRatio", "nSingletons", "nIndels", "nInsertions", "nDeletions", "deletionInsertionRatio")
  name = "by sample"
@ -196,7 +197,31 @@ perSamplePlots <- function(metricsBySamples) {
    p <- p + xAxis
    print(p)
  }
-  
+
+  # known / novel ratio by sample
+  # TODO -- would ideally not conflate SNPs and Indels
+  d = subset(metricsBySamples, Novelty == "all" & CompRod == "dbsnp")
+  title <- opts(title = "Novelty rate by sample")
+
+  # distribution
+  p <- ggplot(data=d, aes(x=compRate))
+  p <- p + title
+  p <- p + geom_density(alpha=0.5)
+  p <- p + geom_rug(aes(y=NULL, position="jitter"))
+  p <- p + scale_x_continuous("Percent of variants in dbSNP")
+  # how do we remove the labels?
+  print(p)
+
+  p <- ggplot(data=d, aes(x=Sample, y=compRate))
+  p <- p + title
+  p <- p + geom_smooth(alpha=0.5, aes(group=Novelty))
+  p <- p + sampleTextLabel + sampleTextLabelScale
+  p <- p + geom_rug(aes(x=NULL, position="jitter"))
+  #p <- p + myRug
+  # how do we remove the labels?
+  p <- p + xAxis
+  print(p)
+
  for ( novelty in c("all", "known", "novel") ) {
    # TODO -- how can I color it as before?
    # TODO -- add marginal distributions?