Removed nDeletions and nInsertions as independent plots -- just not useful given nIndels and insertions/deletion ratio. Fixed jitter problems with rug plots by removing the rugs entirely. Recovered, and improved, comparative features lost by removing the rug plots by getting viewports to work (trivial) so now per sample metrics and distributions are displayed on the same page!

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6026 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-06-22 13:59:46 +00:00
parent a1f8aa90c0
commit 9b54239f37
1 changed files with 51 additions and 43 deletions

View File

@ -50,6 +50,20 @@ expandVEReport <- function(d) {
return(d)
}
# -------------------------------------------------------
# Utilities for displaying multiple plots per page
# -------------------------------------------------------
# Viewport (layout 2 graphs top to bottom)
distributePerSampleGraph <- function(distgraph, perSampleGraph, heights = c(2,1)) {
Layout <- grid.layout(nrow = 2, ncol = 1, heights=heights)
grid.newpage()
pushViewport(viewport(layout = Layout))
subplot <- function(x) viewport(layout.pos.row = x, layout.pos.col = 1)
print(perSampleGraph, vp = subplot(1))
print(distgraph, vp = subplot(2))
}
createMetricsBySites <- function(VariantEvalRoot, PreQCMetrics) {
# Metrics by sites:
# bySite -> counts of SNPs and Indels by novelty, with expectations
@ -105,9 +119,9 @@ summaryPlots <- function(metricsBySites) {
p <- p + geom_line(size=1)
p <- p + facet_grid(variable ~ ., scales="free")
p <- p + scale_x_continuous("Allele count (AC)")
print(p)
p <- p + scale_x_log10("Allele count (AC)")
print(p)
p2 <- p + scale_x_log10("Allele count (AC)")
p2 <- p2 + opts(title = "")
distributePerSampleGraph(p2, p, c(1,1))
# Counts vs. Allele frequency
name = "Variant counts by allele count"
@ -133,9 +147,9 @@ summaryPlots <- function(metricsBySites) {
p <- p + geom_smooth(size=2)
p <- p + geom_point(aes(size=log10(nSNPs), weight=nSNPs), alpha=0.5)
p <- p + scale_x_continuous("Allele count (AC)")
print(p)
p <- p + scale_x_log10("Allele count (AC)")
print(p)
p2 <- p + scale_x_log10("Allele count (AC)")
p2 <- p2 + opts(title = "")
distributePerSampleGraph(p2, p, c(1,1))
# SNPs to indels ratio by allele frequency
name = "SNPs to indels ratio by allele frequency"
@ -164,6 +178,10 @@ addSection <- function(name) {
title(name, cex=2)
}
# -------------------------------------------------------
# read functions
# -------------------------------------------------------
createMetricsBySamples <- function(VariantEvalRoot) {
bySampleEval <- expandVEReport(gsa.read.gatkreport(paste(VariantEvalRoot, ".bySample.eval", sep="")))
r = merge(bySampleEval$TiTvVariantEvaluator, bySampleEval$CountVariants)
@ -192,57 +210,48 @@ perSamplePlots <- function(metricsBySamples) {
sampleTextLabel <- geom_text(aes(label=Sample, size=highlightTextSizes))
sampleTextLabelScale <- scale_size("Highlighted samples", to=c(3,5), breaks=c(1,2), labels=c("regular", "highlighted"))
xAxis <- scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
myRug <- geom_rug(position="jitter")
#myRug <- geom_rug(aes(x=NULL), position="jitter")
measures = c("nSNPs", "tiTvRatio", "nSingletons", "nIndels", "nInsertions", "nDeletions", "deletionInsertionRatio")
measures = c("nSNPs", "tiTvRatio", "nSingletons", "nIndels", "deletionInsertionRatio")
name = "by sample"
for ( measure in measures ) {
molten = melt(metricsBySamples, id.vars=c("Novelty", "Sample", "highlightTextSizes"), measure.vars=c(measure))
# distribution
p <- ggplot(data=molten, aes(x=value, group=Novelty, fill=Novelty))
p <- p + opts(title = paste(measure, name))
p <- p + geom_density(alpha=0.5)
p <- p + geom_rug(aes(y=NULL, color=Novelty, position="jitter"))
p <- p + scale_x_continuous(measure)
# how do we remove the labels?
print(p)
p1 <- ggplot(data=molten, aes(x=value, group=Novelty, fill=Novelty))
#p1 <- p1 + opts(title = paste(measure, name))
p1 <- p1 + geom_density(alpha=0.5)
p1 <- p1 + geom_rug(aes(y=NULL, color=Novelty, position="jitter"))
p1 <- p1 + scale_x_continuous(measure)
p <- ggplot(data=molten, aes(x=Sample, y=value, group=Novelty, color=Novelty), y=value)
p <- p + opts(title = paste(measure, name))
p <- p + geom_smooth(alpha=0.5, aes(group=Novelty))
p <- p + sampleTextLabel + sampleTextLabelScale
p <- p + myRug
p <- p + facet_grid(Novelty ~ ., scales="free")
# how do we remove the labels?
p <- p + xAxis
print(p)
p2 <- ggplot(data=molten, aes(x=Sample, y=value, group=Novelty, color=Novelty), y=value)
p2 <- p2 + opts(title = paste(measure, name))
p2 <- p2 + geom_smooth(alpha=0.5, aes(group=Novelty))
p2 <- p2 + sampleTextLabel + sampleTextLabelScale
p2 <- p2 + facet_grid(Novelty ~ ., scales="free")
p2 <- p2 + xAxis
distributePerSampleGraph(p1, p2)
}
# known / novel ratio by sample
# TODO -- would ideally not conflate SNPs and Indels
d = subset(metricsBySamples, Novelty == "all" & CompRod == "dbsnp")
title <- opts(title = "Novelty rate by sample")
# distribution
p <- ggplot(data=d, aes(x=compRate))
p <- p + title
p <- p + geom_density(alpha=0.5)
p <- p + geom_rug(aes(y=NULL, position="jitter"))
p <- p + scale_x_continuous("Percent of variants in dbSNP")
# how do we remove the labels?
print(p)
p1 <- ggplot(data=d, aes(x=compRate))
p1 <- p1 + geom_density(alpha=0.5)
p1 <- p1 + geom_rug(aes(y=NULL, position="jitter"))
p1 <- p1 + scale_x_continuous("Percent of variants in dbSNP")
p <- ggplot(data=d, aes(x=Sample, y=compRate))
p <- p + title
p <- p + geom_smooth(alpha=0.5, aes(group=Novelty))
p <- p + sampleTextLabel + sampleTextLabelScale
p <- p + geom_rug(aes(x=NULL, position="jitter"))
#p <- p + myRug
# how do we remove the labels?
p <- p + xAxis
print(p)
p2 <- ggplot(data=d, aes(x=Sample, y=compRate))
p2 <- p2 + title
p2 <- p2 + geom_smooth(alpha=0.5, aes(group=Novelty))
p2 <- p2 + sampleTextLabel + sampleTextLabelScale
p2 <- p2 + geom_rug(aes(x=NULL, position="jitter"))
p2 <- p2 + xAxis
p2 <- p2 + scale_y_continuous("Percent of variants in dbSNP")
distributePerSampleGraph(p1, p2)
for ( novelty in c("all", "known", "novel") ) {
# TODO -- how can I color it as before?
@ -253,7 +262,6 @@ perSamplePlots <- function(metricsBySamples) {
# p <- p + scale_y_log10("Number of variants")
# p <- p + geom_point(alpha=0.5, size=4)
p <- p + sampleTextLabel + sampleTextLabelScale
p <- p + myRug
p <- p + facet_grid(variable ~ ., scales="free")
# how do we remove the labels?
p <- p + xAxis