Added density plots by sample for each metric. New command line argument ordering. No longer requires the per-sample.tsv suppl. data -- will conditionally load if available
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6003 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b4c30bf124
commit
9254faa27e
51
R/exomeQC.R
51
R/exomeQC.R
|
|
@ -16,11 +16,13 @@ parseHighlightSamples <- function(s) {
|
||||||
return(unlist(strsplit(s, ",", fixed=T)))
|
return(unlist(strsplit(s, ",", fixed=T)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
preQCFile = NA
|
||||||
if ( onCMDLine ) {
|
if ( onCMDLine ) {
|
||||||
ProjectName = args[1]
|
ProjectName = args[1]
|
||||||
VariantEvalRoot = args[2]
|
VariantEvalRoot = args[2]
|
||||||
preQCFile = args[3]
|
outputPDF = args[3]
|
||||||
outputPDF = args[4]
|
if ( ! is.na(args[4]) )
|
||||||
|
preQCFile = args[4]
|
||||||
if ( ! is.na(args[5]) )
|
if ( ! is.na(args[5]) )
|
||||||
highlightSamples = parseHighlightSamples(args[5])
|
highlightSamples = parseHighlightSamples(args[5])
|
||||||
else
|
else
|
||||||
|
|
@ -33,6 +35,13 @@ if ( onCMDLine ) {
|
||||||
highlightSamples = c() # parseHighlightSamples("29029,47243")
|
highlightSamples = c() # parseHighlightSamples("29029,47243")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
print("Report")
|
||||||
|
print(paste("Project :", ProjectName))
|
||||||
|
print(paste("VariantEvalRoot :", VariantEvalRoot))
|
||||||
|
print(paste("outputPDF :", outputPDF))
|
||||||
|
print(paste("preQCFile :", preQCFile))
|
||||||
|
print(paste("highlightSamples :", highlightSamples))
|
||||||
|
|
||||||
expandVEReport <- function(d) {
|
expandVEReport <- function(d) {
|
||||||
d$TiTvVariantEvaluator$tiTvRatio = round(d$TiTvVariantEvaluator$tiTvRatio,2)
|
d$TiTvVariantEvaluator$tiTvRatio = round(d$TiTvVariantEvaluator$tiTvRatio,2)
|
||||||
d$CountVariants$deletionInsertionRatio = round(d$CountVariants$deletionInsertionRatio,2)
|
d$CountVariants$deletionInsertionRatio = round(d$CountVariants$deletionInsertionRatio,2)
|
||||||
|
|
@ -72,7 +81,8 @@ summaryPlots <- function(metricsBySites) {
|
||||||
p <- ggplot(data=molten, aes(x=AC, y=value+1, color=Novelty, fill=Novelty), group=variable)
|
p <- ggplot(data=molten, aes(x=AC, y=value+1, color=Novelty, fill=Novelty), group=variable)
|
||||||
p <- p + opts(title = name)
|
p <- p + opts(title = name)
|
||||||
p <- p + scale_y_log10("Number of variants")
|
p <- p + scale_y_log10("Number of variants")
|
||||||
p <- p + geom_area()
|
p <- p + geom_point(alpha=0.5, size=3)
|
||||||
|
p <- p + geom_line(size=1)
|
||||||
p <- p + facet_grid(variable ~ ., scales="free")
|
p <- p + facet_grid(variable ~ ., scales="free")
|
||||||
p <- p + scale_x_continuous("Allele count (AC)")
|
p <- p + scale_x_continuous("Allele count (AC)")
|
||||||
print(p)
|
print(p)
|
||||||
|
|
@ -134,9 +144,11 @@ addSection <- function(name) {
|
||||||
|
|
||||||
createMetricsBySamples <- function(VariantEvalRoot) {
|
createMetricsBySamples <- function(VariantEvalRoot) {
|
||||||
byAFEval <- expandVEReport(gsa.read.gatkreport(paste(VariantEvalRoot, ".bySample.eval", sep="")))
|
byAFEval <- expandVEReport(gsa.read.gatkreport(paste(VariantEvalRoot, ".bySample.eval", sep="")))
|
||||||
preQCMetrics <- read.table(preQCFile, header=T)
|
r = merge(byAFEval$TiTvVariantEvaluator, byAFEval$CountVariants)
|
||||||
r = merge(merge(byAFEval$TiTvVariantEvaluator, byAFEval$CountVariants), preQCMetrics)
|
if ( ! is.na(preQCFile) ) {
|
||||||
|
preQCMetrics <- read.table(preQCFile, header=T)
|
||||||
|
r = merge(merge(byAFEval$TiTvVariantEvaluator, byAFEval$CountVariants), preQCMetrics)
|
||||||
|
}
|
||||||
# order the samples by nSNPs -- it's the natural ordering.
|
# order the samples by nSNPs -- it's the natural ordering.
|
||||||
x = subset(r, Novelty=="all")
|
x = subset(r, Novelty=="all")
|
||||||
r$Sample <- factor(x$Sample, levels=x$Sample[order(x$nSNPs)])
|
r$Sample <- factor(x$Sample, levels=x$Sample[order(x$nSNPs)])
|
||||||
|
|
@ -148,12 +160,6 @@ createMetricsBySamples <- function(VariantEvalRoot) {
|
||||||
return(subset(r, Sample != "all"))
|
return(subset(r, Sample != "all"))
|
||||||
}
|
}
|
||||||
|
|
||||||
# actually load the data.
|
|
||||||
if ( onCMDLine || LOAD_DATA ) {
|
|
||||||
metricsBySites <- createMetricsBySites(VariantEvalRoot)
|
|
||||||
metricsBySamples <- createMetricsBySamples(VariantEvalRoot)
|
|
||||||
}
|
|
||||||
|
|
||||||
# -------------------------------------------------------
|
# -------------------------------------------------------
|
||||||
# Per sample plots
|
# Per sample plots
|
||||||
# -------------------------------------------------------
|
# -------------------------------------------------------
|
||||||
|
|
@ -167,12 +173,21 @@ perSamplePlots <- function(metricsBySamples) {
|
||||||
#myRug <- geom_rug(aes(x = NULL))
|
#myRug <- geom_rug(aes(x = NULL))
|
||||||
|
|
||||||
measures = c("nSNPs", "tiTvRatio", "nSingletons", "nIndels", "nInsertions", "nDeletions", "deletionInsertionRatio")
|
measures = c("nSNPs", "tiTvRatio", "nSingletons", "nIndels", "nInsertions", "nDeletions", "deletionInsertionRatio")
|
||||||
# Counts vs. Allele frequency
|
name = "by sample"
|
||||||
name = "Variant counts by allele count"
|
|
||||||
for ( measure in measures ) {
|
for ( measure in measures ) {
|
||||||
molten = melt(metricsBySamples, id.vars=c("Novelty", "Sample", "highlightTextSizes"), measure.vars=c(measure))
|
molten = melt(metricsBySamples, id.vars=c("Novelty", "Sample", "highlightTextSizes"), measure.vars=c(measure))
|
||||||
|
|
||||||
|
# distribution
|
||||||
|
p <- ggplot(data=molten, aes(x=value, group=Novelty, fill=Novelty))
|
||||||
|
p <- p + opts(title = paste(measure, name))
|
||||||
|
p <- p + geom_density(alpha=0.5)
|
||||||
|
p <- p + geom_rug(aes(y=NULL, color=Novelty, position="jitter"))
|
||||||
|
p <- p + scale_x_continuous(measure)
|
||||||
|
# how do we remove the labels?
|
||||||
|
print(p)
|
||||||
|
|
||||||
p <- ggplot(data=molten, aes(x=Sample, y=value, group=Novelty, color=Novelty), y=value)
|
p <- ggplot(data=molten, aes(x=Sample, y=value, group=Novelty, color=Novelty), y=value)
|
||||||
p <- p + opts(title = paste(name, ":", measure))
|
p <- p + opts(title = paste(measure, name))
|
||||||
p <- p + geom_smooth(alpha=0.5, aes(group=Novelty))
|
p <- p + geom_smooth(alpha=0.5, aes(group=Novelty))
|
||||||
p <- p + sampleTextLabel + sampleTextLabelScale
|
p <- p + sampleTextLabel + sampleTextLabelScale
|
||||||
p <- p + myRug
|
p <- p + myRug
|
||||||
|
|
@ -203,6 +218,12 @@ perSamplePlots <- function(metricsBySamples) {
|
||||||
# Actually invoke the above plotting functions
|
# Actually invoke the above plotting functions
|
||||||
# -------------------------------------------------------
|
# -------------------------------------------------------
|
||||||
|
|
||||||
|
# load the data.
|
||||||
|
if ( onCMDLine || LOAD_DATA ) {
|
||||||
|
metricsBySites <- createMetricsBySites(VariantEvalRoot)
|
||||||
|
metricsBySamples <- createMetricsBySamples(VariantEvalRoot)
|
||||||
|
}
|
||||||
|
|
||||||
if ( ! is.na(outputPDF) ) {
|
if ( ! is.na(outputPDF) ) {
|
||||||
pdf(outputPDF, height=8.5, width=11)
|
pdf(outputPDF, height=8.5, width=11)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue