Improve variantCallQC.R
-- Refactor plotting utilities into master utility in gsalib. Everyone can use it now -- Better plots for standard variantCallQC
This commit is contained in:
parent
3f6b2423d8
commit
87be63c7e4
|
|
@ -0,0 +1,236 @@
|
|||
library(gplots)
|
||||
library(ggplot2)
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Utilities for displaying multiple plots per page
|
||||
# -------------------------------------------------------
|
||||
|
||||
distributeGraphRows <- function(graphs, heights = c()) {
|
||||
# Viewport layout 2 graphs top to bottom with given relative heights
|
||||
#
|
||||
#
|
||||
if (length(heights) == 0) {
|
||||
heights <- rep.int(1, length(graphs))
|
||||
}
|
||||
heights <- heights[!is.na(graphs)]
|
||||
graphs <- graphs[!is.na(graphs)]
|
||||
numGraphs <- length(graphs)
|
||||
Layout <- grid.layout(nrow = numGraphs, ncol = 1, heights=heights)
|
||||
grid.newpage()
|
||||
pushViewport(viewport(layout = Layout))
|
||||
subplot <- function(x) viewport(layout.pos.row = x, layout.pos.col = 1)
|
||||
for (i in 1:numGraphs) {
|
||||
print(graphs[[i]], vp = subplot(i))
|
||||
}
|
||||
}
|
||||
|
||||
distributeLogGraph <- function(graph, xName) {
|
||||
continuousGraph <- graph + scale_x_continuous(xName)
|
||||
logGraph <- graph + scale_x_log10(xName) + opts(title="")
|
||||
distributeGraphRows(list(continuousGraph, logGraph))
|
||||
}
|
||||
|
||||
distributePerSampleGraph <- function(perSampleGraph, distGraph, ratio=c(2,1)) {
|
||||
distributeGraphRows(list(perSampleGraph, distGraph), ratio)
|
||||
}
|
||||
|
||||
removeExtraStrats <- function(variantEvalDataFrame, moreToRemove=c()) {
|
||||
# Remove the standard extra stratification columns FunctionalClass, Novelty, and others in moreToRemove from the variantEvalDataFrame
|
||||
#
|
||||
# Only keeps the column marked with "all" for each removed column
|
||||
#
|
||||
for ( toRemove in c("FunctionalClass", "Novelty", moreToRemove) ) {
|
||||
if (toRemove %in% colnames(variantEvalDataFrame)) {
|
||||
variantEvalDataFrame <- variantEvalDataFrame[variantEvalDataFrame[[toRemove]] == "all",]
|
||||
}
|
||||
}
|
||||
variantEvalDataFrame
|
||||
}
|
||||
|
||||
openPDF <- function(outputPDF) {
|
||||
# Open the outputPDF file with standard dimensions, if outputPDF is not NA
|
||||
if ( ! is.na(outputPDF) ) {
|
||||
pdf(outputPDF, height=8.5, width=11)
|
||||
}
|
||||
}
|
||||
|
||||
closePDF <- function(outputPDF) {
|
||||
# close the outputPDF file if not NA, and try to compact the PDF if possible
|
||||
if ( ! is.na(outputPDF) ) {
|
||||
dev.off()
|
||||
if (exists("compactPDF")) {
|
||||
compactPDF(outputPDF)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
makeRatioDataFrame <- function(ACs, num, denom, widths = NULL) {
|
||||
if ( is.null(widths) ) widths <- rep(1, length(ACs))
|
||||
|
||||
value = NULL
|
||||
titv <- data.frame(AC=ACs, width = widths, num=num, denom = denom, ratio = num / denom)
|
||||
}
|
||||
|
||||
.reduceACs <- function(binWidthForAC, ACs) {
|
||||
# computes data structures necessary to reduce the full range of ACs
|
||||
#
|
||||
# binWidthForAC returns the number of upcoming bins that should be merged into
|
||||
# that AC bin. ACs is a vector of all AC values from 0 to 2N that should be
|
||||
# merged together
|
||||
#
|
||||
# Returns a list containing the reduced ACs starts, their corresponding widths,
|
||||
# and a map from original ACs to their new ones (1 -> 1, 2 -> 2, 3 -> 2, etc)
|
||||
maxAC <- max(ACs)
|
||||
newACs <- c()
|
||||
widths <- c()
|
||||
newACMap <- c()
|
||||
ac <- 0
|
||||
while ( ac < maxAC ) {
|
||||
newACs <- c(newACs, ac)
|
||||
width <- binWidthForAC(ac)
|
||||
widths <- c(widths, width)
|
||||
newACMap <- c(newACMap, rep(ac, width))
|
||||
ac <- ac + width
|
||||
}
|
||||
list(ACs = newACs, widths=widths, newACMap = newACMap)
|
||||
}
|
||||
|
||||
# geometricACs <- function(k, ACs) {
|
||||
# nBins <- round(k * log10(max(ACs)))
|
||||
#
|
||||
# binWidthForAC <- function(ac) {
|
||||
# max(ceiling(ac / nBins), 1)
|
||||
# }
|
||||
#
|
||||
# return(reduceACs(binWidthForAC, ACs))
|
||||
# }
|
||||
|
||||
reduce.AC.on.LogLinear.intervals <- function(scaleFactor, ACs) {
|
||||
# map the full range of AC values onto a log linear scale
|
||||
#
|
||||
# Reduce the full AC range onto one where the width of each new AC increases at a rate of
|
||||
# 10^scaleFactor in size with growing AC values. This is primarily useful for accurately
|
||||
# computing ratios or other quantities by AC that aren't well determined when the AC
|
||||
# values are very large
|
||||
#
|
||||
# Returns a list containing the reduced ACs starts, their corresponding widths,
|
||||
# and a map from original ACs to their new ones (1 -> 1, 2 -> 2, 3 -> 2, etc)
|
||||
maxAC <- max(ACs)
|
||||
afs <- ACs / maxAC
|
||||
breaks <- 10^(seq(-4, -1, scaleFactor))
|
||||
widths <- c()
|
||||
lastBreak <- 1
|
||||
for ( i in length(breaks):1 ) {
|
||||
b <- breaks[i]
|
||||
width <- sum(afs < lastBreak & afs >= b)
|
||||
widths <- c(widths, width)
|
||||
lastBreak <- b
|
||||
}
|
||||
widths <- rev(widths)
|
||||
|
||||
binWidthForAC <- function(ac) {
|
||||
af <- ac / maxAC
|
||||
value = 1
|
||||
for ( i in length(breaks):1 )
|
||||
if ( af >= breaks[i] ) {
|
||||
value = widths[i]
|
||||
break
|
||||
}
|
||||
|
||||
return(value)
|
||||
}
|
||||
|
||||
return(.reduceACs(binWidthForAC, ACs))
|
||||
}
|
||||
|
||||
.remapACs <- function(remapper, k, df) {
|
||||
newACs <- remapper(k, df$AC)
|
||||
|
||||
n = length(newACs$ACs)
|
||||
num = rep(0, n)
|
||||
denom = rep(0, n)
|
||||
for ( i in 1:dim(df)[1] ) {
|
||||
rowI = df$AC == i
|
||||
row = df[rowI,]
|
||||
newAC = newACs$newACMap[row$AC]
|
||||
newRowI = newACs$ACs == newAC
|
||||
num[newRowI] = num[newRowI] + df$num[rowI]
|
||||
denom[newRowI] = denom[newRowI] + df$denom[rowI]
|
||||
}
|
||||
|
||||
newdf <- makeRatioDataFrame(newACs$ACs, num, denom, newACs$widths )
|
||||
newdf
|
||||
}
|
||||
|
||||
compute.ratio.on.LogLinear.AC.intervals <- function(ACs, num, denom, scaleFactor = 0.1) {
|
||||
df = makeRatioDataFrame(ACs, num, denom, 1)
|
||||
return(.remapACs(reduce.AC.on.LogLinear.intervals, scaleFactor, df))
|
||||
}
|
||||
|
||||
plotVariantQC <- function(metrics, measures, requestedStrat = "Sample",
|
||||
fixHistogramX=F, anotherStrat = NULL, nObsField = "n_indels",
|
||||
onSamePage=F, facetVariableOnXPerSample = F, facetVariableOnXForDist = T, moreTitle="") {
|
||||
metrics$strat = metrics[[requestedStrat]]
|
||||
|
||||
otherFacet = "."
|
||||
id.vars = c("strat", "nobs")
|
||||
metrics$nobs <- metrics[[nObsField]]
|
||||
|
||||
# keep track of the other strat and it's implied facet value
|
||||
if (! is.null(anotherStrat)) {
|
||||
id.vars = c(id.vars, anotherStrat)
|
||||
otherFacet = anotherStrat
|
||||
}
|
||||
|
||||
molten <- melt(metrics, id.vars=id.vars, measure.vars=c(measures))
|
||||
perSampleGraph <- ggplot(data=molten, aes(x=strat, y=value, group=variable, color=variable, fill=variable))
|
||||
title <- opts(title=paste(paste(paste(measures, collapse=", "), "by", requestedStrat), moreTitle))
|
||||
|
||||
determineFacet <- function(onX) {
|
||||
if ( onX ) {
|
||||
paste(otherFacet, "~ variable")
|
||||
} else {
|
||||
paste("variable ~", otherFacet)
|
||||
}
|
||||
}
|
||||
|
||||
sampleFacet = determineFacet(facetVariableOnXPerSample)
|
||||
distFacet = determineFacet(facetVariableOnXForDist)
|
||||
|
||||
if ( requestedStrat == "Sample" ) {
|
||||
perSampleGraph <- perSampleGraph + geom_text(aes(label=strat), size=1.5) + geom_blank() # don't display a scale
|
||||
perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
|
||||
} else {
|
||||
perSampleGraph <- perSampleGraph + geom_point(aes(size=log10(nobs))) #+ geom_smooth(aes(weight=log10(nobs)))
|
||||
perSampleGraph <- perSampleGraph + scale_x_log10("AlleleCount")
|
||||
}
|
||||
perSampleGraph <- perSampleGraph + ylab("Variable value") + title
|
||||
perSampleGraph <- perSampleGraph + facet_grid(sampleFacet, scales="free")
|
||||
|
||||
nValues = length(unique(molten$value))
|
||||
if (nValues > 2) {
|
||||
if ( requestedStrat == "Sample" ) {
|
||||
distGraph <- ggplot(data=molten, aes(x=value, group=variable, fill=variable))
|
||||
} else {
|
||||
distGraph <- ggplot(data=molten, aes(x=value, group=variable, fill=variable, weight=nobs))
|
||||
}
|
||||
distGraph <- distGraph + geom_histogram(aes(y=..ndensity..))
|
||||
distGraph <- distGraph + geom_density(alpha=0.5, aes(y=..scaled..))
|
||||
distGraph <- distGraph + geom_rug(aes(y=NULL, color=variable, position="jitter"))
|
||||
scale = "free"
|
||||
if ( fixHistogramX ) scale = "fixed"
|
||||
distGraph <- distGraph + facet_grid(distFacet, scales=scale)
|
||||
distGraph <- distGraph + ylab("Relative frequency")
|
||||
distGraph <- distGraph + xlab("Variable value (see facet for variable by color)")
|
||||
distGraph <- distGraph + opts(axis.text.x=theme_text(angle=-45)) # , legend.position="none")
|
||||
} else {
|
||||
distGraph <- NA
|
||||
}
|
||||
|
||||
if ( onSamePage ) {
|
||||
suppressMessages(distributePerSampleGraph(perSampleGraph, distGraph))
|
||||
} else {
|
||||
suppressMessages(print(perSampleGraph))
|
||||
suppressMessages(print(distGraph + title))
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue