Merge pull request #680 from broadinstitute/ldg_VQSRscript
Update VQSR Rnd BQSR script generation code for compatibility with late...
This commit is contained in:
commit
b8751ad598
|
|
@ -510,17 +510,17 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
stream.println("dummyData <- " + dataFrame + "[1,]");
|
||||
stream.println("dummyData$x <- NaN");
|
||||
stream.println("dummyData$y <- NaN");
|
||||
stream.println("p <- ggplot(data=" + surfaceFrame + ", aes(x=x, y=y)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
||||
stream.println("p1 = p + opts(title=\"model PDF\") + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + geom_tile(aes(fill = lod)) + scale_fill_gradient(high=\"green\", low=\"red\")");
|
||||
stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=retained, alpha=I(1/7),legend=FALSE) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
||||
stream.println("p <- ggplot(data=" + surfaceFrame + ", aes(x=x, y=y)) +theme(panel.background = element_rect(fill = \"white\"), panel.grid.minor = element_line(colour = \"white\"), panel.grid.major = element_line(colour = \"white\"))");
|
||||
stream.println("p1 = p +ggtitle(\"model PDF\") + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + geom_tile(aes(fill = lod)) + scale_fill_gradient(high=\"green\", low=\"red\", space=\"rgb\")");
|
||||
stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=retained, alpha=I(1/7),legend=FALSE) +theme(panel.background = element_rect(fill = \"white\"), panel.grid.minor = element_line(colour = \"white\"), panel.grid.major = element_line(colour = \"white\"))");
|
||||
stream.println("q <- geom_point(aes(x=x,y=y,color=retained),data=dummyData, alpha=1.0, na.rm=TRUE)");
|
||||
stream.println("p2 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(name=\"outcome\", high=\"black\", low=\"red\",breaks=c(-1,1),labels=c(\"filtered\",\"retained\"))");
|
||||
stream.println("p <- qplot(x,y,data="+ dataFrame + "["+dataFrame+"$training != 0,], color=training, alpha=I(1/7)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
||||
stream.println("p2 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(name=\"outcome\", high=\"black\", low=\"red\",breaks=c(-1,1),guide=\"legend\",labels=c(\"filtered\",\"retained\"))");
|
||||
stream.println("p <- qplot(x,y,data="+ dataFrame + "["+dataFrame+"$training != 0,], color=training, alpha=I(1/7)) +theme(panel.background = element_rect(fill = \"white\"), panel.grid.minor = element_line(colour = \"white\"), panel.grid.major = element_line(colour = \"white\"))");
|
||||
stream.println("q <- geom_point(aes(x=x,y=y,color=training),data=dummyData, alpha=1.0, na.rm=TRUE)");
|
||||
stream.println("p3 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(high=\"green\", low=\"purple\",breaks=c(-1,1), labels=c(\"neg\", \"pos\"))");
|
||||
stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=novelty, alpha=I(1/7)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
||||
stream.println("p3 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(high=\"green\", low=\"purple\",breaks=c(-1,1),guide=\"legend\", labels=c(\"neg\", \"pos\"))");
|
||||
stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=novelty, alpha=I(1/7)) +theme(panel.background = element_rect(fill = \"white\"), panel.grid.minor = element_line(colour = \"white\"), panel.grid.major = element_line(colour = \"white\"))");
|
||||
stream.println("q <- geom_point(aes(x=x,y=y,color=novelty),data=dummyData, alpha=1.0, na.rm=TRUE)");
|
||||
stream.println("p4 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(name=\"novelty\", high=\"blue\", low=\"red\",breaks=c(-1,1), labels=c(\"novel\",\"known\"))");
|
||||
stream.println("p4 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(name=\"novelty\", high=\"blue\", low=\"red\",breaks=c(-1,1),guide=\"legend\", labels=c(\"novel\",\"known\"))");
|
||||
stream.println("arrange(p1, p2, p3, p4, ncol=2)");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ if ( onCMDLine ) {
|
|||
inputFileName = args[1]
|
||||
outputPDF = args[2]
|
||||
} else {
|
||||
inputFileName = "~/Desktop/broadLocal/projects/pipelinePerformance/FullProcessingPipeline.jobreport.txt"
|
||||
inputFileName = "~/workspaces/scratch/SingleSample_VQSRwNewSOR.jobreport.txt"
|
||||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
|
||||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
|
||||
outputPDF = NA
|
||||
|
|
@ -57,7 +57,7 @@ plotJobsGantt <- function(gatkReport, sortOverall, title, includeText) {
|
|||
p <- p + xlim(0, maxRelTime * 1.3)
|
||||
p <- p + xlab(paste("Start time, relative to first job", RUNTIME_UNITS))
|
||||
p <- p + ylab("Job number")
|
||||
p <- p + opts(title = title)
|
||||
p <- p + ggtitle(title)
|
||||
print(p)
|
||||
}
|
||||
|
||||
|
|
@ -95,7 +95,7 @@ plotProgressByTime <- function(gatkReport) {
|
|||
p <- p + facet_grid(variable ~ ., scales="free")
|
||||
p <- p + geom_line(size=2)
|
||||
p <- p + xlab(paste("Time since start of first job", RUNTIME_UNITS))
|
||||
p <- p + opts(title = "Job scheduling")
|
||||
p <- p + ggtitle("Job scheduling")
|
||||
print(p)
|
||||
}
|
||||
|
||||
|
|
@ -123,7 +123,7 @@ plotGroup <- function(groupTable) {
|
|||
# todo -- how do we group by annotations?
|
||||
p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
|
||||
p <- p + xlab(paste("runtime", RUNTIME_UNITS)) + ylab("No. of jobs")
|
||||
p <- p + opts(title=paste("Job runtime histogram for", name))
|
||||
p <- p + ggtitle(paste("Job runtime histogram for", name))
|
||||
print(p)
|
||||
}
|
||||
|
||||
|
|
@ -171,9 +171,9 @@ plotTimeByHost <- function(gatkReportData) {
|
|||
p = p + facet_grid(analysisName ~ ., scale="free")
|
||||
p = p + vis()
|
||||
p = p + xlab("Job execution host")
|
||||
p = p + opts(title = paste(name, "of job runtimes by analysis name and execution host"))
|
||||
p = p + ggtitle(paste(name, "of job runtimes by analysis name and execution host"))
|
||||
p = p + ylab(paste("Distribution of runtimes", RUNTIME_UNITS))
|
||||
p = p + opts(axis.text.x=theme_text(angle=45, hjust=1, vjust=1))
|
||||
p = p + theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1))
|
||||
print(p)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,19 +6,21 @@ library("tools") #For compactPDF in R 2.13+
|
|||
library(gsalib)
|
||||
|
||||
|
||||
|
||||
if ( interactive() ) {
|
||||
args <- c("NA12878.6.1.dedup.realign.recal.bqsr.grp.csv", "NA12878.6.1.dedup.realign.recal.bqsr.grp", NA)
|
||||
} else {
|
||||
args <- commandArgs(TRUE)
|
||||
}
|
||||
|
||||
data <- read.csv(args[1])
|
||||
|
||||
data$Recalibration = as.factor(sapply(as.character(data$Recalibration),function(x) {
|
||||
xu = toupper(x);
|
||||
if (xu == "ORIGINAL") "BEFORE" else
|
||||
if (xu == "RECALIBRATED") "AFTER" else
|
||||
if (xu == "RECALIBRATION") "BQSR" else
|
||||
xu }));
|
||||
if (xu == "RECALIBRATED") "AFTER" else
|
||||
if (xu == "RECALIBRATION") "BQSR" else
|
||||
xu }));
|
||||
|
||||
gsa.report <- gsa.read.gatkreport(args[2])
|
||||
|
||||
|
|
@ -31,12 +33,12 @@ gsa.report$Arguments$Value[gsa.report$Argument$Value == "null"] = "None";
|
|||
|
||||
gsa.report.covariate.argnum = gsa.report$Arguments$Argument == "covariate";
|
||||
gsa.report$Arguments$Value[gsa.report.covariate.argnum] = sapply(strsplit(gsa.report$Arguments$Value[gsa.report.covariate.argnum],","),function(x) {
|
||||
y = sub("(^.+)Covariate","\\1",x); paste(y,collapse=",") } );
|
||||
y = sub("(^.+)Covariate","\\1",x); paste(y,collapse=",") } );
|
||||
|
||||
data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType))))
|
||||
|
||||
numRG = length(unique(data$ReadGroup))
|
||||
blankTheme = opts(panel.grid.major = theme_blank(), panel.grid.minor = theme_blank(), panel.background = theme_blank(), axis.ticks = theme_blank())
|
||||
blankTheme = theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.ticks = element_blank())
|
||||
|
||||
# Viewport (layout 2 graphs top to bottom)
|
||||
distributeGraphRows <- function(graphs, heights = c()) {
|
||||
|
|
@ -72,7 +74,7 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn
|
|||
dIns=dIns[sample.int(length(dIns[,1]),min(length(dIns[,1]),2000)),] # don't plot too many values because it makes the PDFs too massive
|
||||
dDel=dDel[sample.int(length(dDel[,1]),min(length(dDel[,1]),2000)),] # don't plot too many values because it makes the PDFs too massive
|
||||
d=rbind(dSub, dIns, dDel)
|
||||
|
||||
|
||||
if( cov != "QualityScore" ) {
|
||||
p <- ggplot(d, aes(x=CovariateValue,y=Accuracy,alpha=log10(Observations))) + ylim(min(-10,d$Accuracy),max(10,d$Accuracy)) +
|
||||
geom_abline(intercept=0, slope=0, linetype=2) +
|
||||
|
|
@ -81,25 +83,25 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn
|
|||
blankTheme
|
||||
if(cov == "Cycle") {
|
||||
b <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) + facet_grid(.~EventType) +
|
||||
opts(axis.text.x=theme_text(angle=90, hjust=0))
|
||||
theme(axis.text.x=element_text(angle=90, hjust=0))
|
||||
|
||||
p <- ggplot(d, aes(x=CovariateValue,y=AverageReportedQuality,alpha=log10(Observations))) +
|
||||
xlab(paste(cov,"Covariate")) +
|
||||
ylab("Mean Quality Score") + ylim(0,max(42,d$AverageReportedQuality)) +
|
||||
blankTheme
|
||||
e <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) + facet_grid(.~EventType) +
|
||||
opts(axis.text.x=theme_text(angle=90, hjust=0))
|
||||
theme(axis.text.x=element_text(angle=90, hjust=0))
|
||||
|
||||
|
||||
} else {
|
||||
c <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) + facet_grid(.~EventType) +
|
||||
opts(axis.text.x=theme_text(angle=90, hjust=0)) + xlab(paste(cov,"Covariate (3 base suffix)"))
|
||||
theme(axis.text.x=element_text(angle=90, hjust=0)) + xlab(paste(cov,"Covariate (3 base suffix)"))
|
||||
p <- ggplot(d, aes(x=CovariateValue,y=AverageReportedQuality,alpha=log10(Observations))) +
|
||||
xlab(paste(cov,"Covariate (3 base suffix)")) +
|
||||
ylab("Mean Quality Score") +
|
||||
blankTheme
|
||||
f <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) + facet_grid(.~EventType) +
|
||||
opts(axis.text.x=theme_text(angle=90, hjust=0))
|
||||
theme(axis.text.x=element_text(angle=90, hjust=0))
|
||||
|
||||
}
|
||||
} else {
|
||||
|
|
@ -117,7 +119,7 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn
|
|||
d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations,y=..ndensity..),alpha=0.6,binwidth=1,position="identity")
|
||||
d <- d + scale_fill_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black"))
|
||||
d <- d + facet_grid(.~EventType)
|
||||
# d <- d + scale_y_continuous(formatter="comma")
|
||||
# d <- d + scale_y_continuous(formatter="comma")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -154,3 +156,4 @@ if ( ! is.na(args[3]) ) {
|
|||
compactPDF(args[2])
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ distributeGraphRows <- function(graphs, heights = c()) {
|
|||
|
||||
distributeLogGraph <- function(graph, xName) {
|
||||
continuousGraph <- graph + scale_x_continuous(xName)
|
||||
logGraph <- graph + scale_x_log10(xName) + opts(title="")
|
||||
logGraph <- graph + scale_x_log10(xName) + ggtitle("")
|
||||
distributeGraphRows(list(continuousGraph, logGraph))
|
||||
}
|
||||
|
||||
|
|
@ -194,7 +194,7 @@ plotVariantQC <- function(metrics, measures, requestedStrat = "Sample",
|
|||
titleText=paste(titleText, note, sep="\n")
|
||||
}
|
||||
paste(titleText)
|
||||
title <- opts(title=titleText)
|
||||
title <- ggtitle(titleText)
|
||||
|
||||
determineFacet <- function(onX) {
|
||||
if ( onX ) {
|
||||
|
|
@ -232,7 +232,7 @@ plotVariantQC <- function(metrics, measures, requestedStrat = "Sample",
|
|||
distGraph <- distGraph + facet_grid(distFacet, scales=scale)
|
||||
distGraph <- distGraph + ylab("Relative frequency")
|
||||
distGraph <- distGraph + xlab("Variable value (see facet for variable by color)")
|
||||
distGraph <- distGraph + opts(axis.text.x=theme_text(angle=-45)) # , legend.position="none")
|
||||
distGraph <- distGraph + theme(axis.text.x=element_text(angle=-45)) # , legend.position="none")
|
||||
} else {
|
||||
distGraph <- NA
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue