#!/broad/tools/apps/R-2.6.0/bin/Rscript args <- commandArgs(TRUE) verbose = TRUE input = args[1] covariateName = args[2] outfile = paste(input, ".qual_diff_v_", covariateName, ".pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) c <- read.table(input, header=T) c <- c[sort.list(c[,1]),] # # Plot residual error as a function of the covariate # d.good <- c[c$nBases >= 1000,] d.1000 <- c[c$nBases < 1000,] rmseGood = sqrt(sum((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases) / sum(d.good$nBases) ) rmseAll = sqrt(sum((c$Qempirical-c$Qreported)^2 * c$nBases) / sum(c$nBases) ) theTitle = paste("RMSE_good = ", round(rmseGood,digits=3), ", RMSE_all = ", round(rmseAll,digits=3)) if( length(d.good$nBases) == length(c$nBases) ) { theTitle = paste("RMSE = ", round(rmseAll,digits=3)) } # Don't let residual error go off the edge of the plot d.good$residualError = d.good$Qempirical-d.good$Qreported d.good$residualError[which(d.good$residualError > 10)] = 10 d.good$residualError[which(d.good$residualError < -10)] = -10 d.1000$residualError = d.1000$Qempirical-d.1000$Qreported d.1000$residualError[which(d.1000$residualError > 10)] = 10 d.1000$residualError[which(d.1000$residualError < -10)] = -10 c$residualError = c$Qempirical-c$Qreported c$residualError[which(c$residualError > 10)] = 10 c$residualError[which(c$residualError < -10)] = -10 if( is.numeric(c$Covariate) ) { plot(d.good$Covariate, d.good$residualError, type="p", main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(-10, 10), xlim=c(min(c$Covariate),max(c$Covariate))) points(d.1000$Covariate, d.1000$residualError, type="p", col="cornflowerblue", pch=20) } else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice plot(c$Covariate, c$residualError, type="l", main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", ylim=c(-10, 10)) points(d.1000$Covariate, d.1000$residualError, type="l", col="cornflowerblue") } dev.off() # # Plot histogram of the covariate # e = d.good f = d.1000 outfile = paste(input, ".", covariateName,"_hist.pdf", sep="") pdf(outfile, height=7, width=7) hst=subset(data.frame(e$Covariate, e$nBases), e.nBases != 0) hst2=subset(data.frame(f$Covariate, f$nBases), f.nBases != 0) if( is.numeric(c$Covariate) ) { plot(hst$e.Covariate, hst$e.nBases, type="h", lwd=2, main=paste(covariateName,"histogram"), xlab=covariateName, ylab="Count",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate))) points(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=2, col="cornflowerblue") axis(2,axTicks(2), format(axTicks(2), scientific=F)) } else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice hst=subset(data.frame(c$Covariate, c$nBases), c.nBases != 0) plot(1:length(hst$c.Covariate), hst$c.nBases, type="h", lwd=7, main=paste(covariateName,"histogram"), xlab=covariateName, ylab="Count",yaxt="n",xaxt="n") axis(1, at=seq(1,length(hst$c.Covariate),2), labels = hst$c.Covariate[seq(1,length(hst$c.Covariate),2)]) axis(2,axTicks(2), format(axTicks(2), scientific=F)) } dev.off()