Now with dbSNP rate
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6010 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
dd1d9cd76f
commit
285da580f3
37
R/exomeQC.R
37
R/exomeQC.R
|
|
@ -143,11 +143,12 @@ addSection <- function(name) {
|
||||||
}
|
}
|
||||||
|
|
||||||
createMetricsBySamples <- function(VariantEvalRoot) {
|
createMetricsBySamples <- function(VariantEvalRoot) {
|
||||||
byAFEval <- expandVEReport(gsa.read.gatkreport(paste(VariantEvalRoot, ".bySample.eval", sep="")))
|
bySampleEval <- expandVEReport(gsa.read.gatkreport(paste(VariantEvalRoot, ".bySample.eval", sep="")))
|
||||||
r = merge(byAFEval$TiTvVariantEvaluator, byAFEval$CountVariants)
|
r = merge(bySampleEval$TiTvVariantEvaluator, bySampleEval$CountVariants)
|
||||||
|
r = merge(r, bySampleEval$CompOverlap)
|
||||||
if ( ! is.na(preQCFile) ) {
|
if ( ! is.na(preQCFile) ) {
|
||||||
preQCMetrics <- read.table(preQCFile, header=T)
|
preQCMetrics <- read.table(preQCFile, header=T)
|
||||||
r = merge(merge(byAFEval$TiTvVariantEvaluator, byAFEval$CountVariants), preQCMetrics)
|
r = merge(r, preQCMetrics)
|
||||||
}
|
}
|
||||||
# order the samples by nSNPs -- it's the natural ordering.
|
# order the samples by nSNPs -- it's the natural ordering.
|
||||||
x = subset(r, Novelty=="all")
|
x = subset(r, Novelty=="all")
|
||||||
|
|
@ -170,7 +171,7 @@ perSamplePlots <- function(metricsBySamples) {
|
||||||
sampleTextLabelScale <- scale_size("Highlighted samples", to=c(3,5), breaks=c(1,2), labels=c("regular", "highlighted"))
|
sampleTextLabelScale <- scale_size("Highlighted samples", to=c(3,5), breaks=c(1,2), labels=c("regular", "highlighted"))
|
||||||
xAxis <- scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
|
xAxis <- scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
|
||||||
myRug <- geom_rug(position="jitter")
|
myRug <- geom_rug(position="jitter")
|
||||||
#myRug <- geom_rug(aes(x = NULL))
|
#myRug <- geom_rug(aes(x=NULL), position="jitter")
|
||||||
|
|
||||||
measures = c("nSNPs", "tiTvRatio", "nSingletons", "nIndels", "nInsertions", "nDeletions", "deletionInsertionRatio")
|
measures = c("nSNPs", "tiTvRatio", "nSingletons", "nIndels", "nInsertions", "nDeletions", "deletionInsertionRatio")
|
||||||
name = "by sample"
|
name = "by sample"
|
||||||
|
|
@ -196,7 +197,31 @@ perSamplePlots <- function(metricsBySamples) {
|
||||||
p <- p + xAxis
|
p <- p + xAxis
|
||||||
print(p)
|
print(p)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# known / novel ratio by sample
|
||||||
|
# TODO -- would ideally not conflate SNPs and Indels
|
||||||
|
d = subset(metricsBySamples, Novelty == "all" & CompRod == "dbsnp")
|
||||||
|
title <- opts(title = "Novelty rate by sample")
|
||||||
|
|
||||||
|
# distribution
|
||||||
|
p <- ggplot(data=d, aes(x=compRate))
|
||||||
|
p <- p + title
|
||||||
|
p <- p + geom_density(alpha=0.5)
|
||||||
|
p <- p + geom_rug(aes(y=NULL, position="jitter"))
|
||||||
|
p <- p + scale_x_continuous("Percent of variants in dbSNP")
|
||||||
|
# how do we remove the labels?
|
||||||
|
print(p)
|
||||||
|
|
||||||
|
p <- ggplot(data=d, aes(x=Sample, y=compRate))
|
||||||
|
p <- p + title
|
||||||
|
p <- p + geom_smooth(alpha=0.5, aes(group=Novelty))
|
||||||
|
p <- p + sampleTextLabel + sampleTextLabelScale
|
||||||
|
p <- p + geom_rug(aes(x=NULL, position="jitter"))
|
||||||
|
#p <- p + myRug
|
||||||
|
# how do we remove the labels?
|
||||||
|
p <- p + xAxis
|
||||||
|
print(p)
|
||||||
|
|
||||||
for ( novelty in c("all", "known", "novel") ) {
|
for ( novelty in c("all", "known", "novel") ) {
|
||||||
# TODO -- how can I color it as before?
|
# TODO -- how can I color it as before?
|
||||||
# TODO -- add marginal distributions?
|
# TODO -- add marginal distributions?
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue