diff --git a/build.xml b/build.xml index d3377a763..7389503bb 100644 --- a/build.xml +++ b/build.xml @@ -679,20 +679,6 @@ - - - - - - - - - - - - - - @@ -750,12 +736,6 @@ - - - - - - diff --git a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_OtherCovariate.R b/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_OtherCovariate.R deleted file mode 100644 index 15c6fc8f0..000000000 --- a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_OtherCovariate.R +++ /dev/null @@ -1,121 +0,0 @@ -#!/bin/env Rscript - -library(tools) - -args <- commandArgs(TRUE) -verbose = TRUE - -input = args[1] -covariateName = args[2] - -outfile = paste(input, ".qual_diff_v_", covariateName, ".pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -c <- read.table(input, header=T) -c <- c[sort.list(c[,1]),] - -# -# Plot residual error as a function of the covariate -# - -d.good <- c[c$nBases >= 1000,] -d.1000 <- c[c$nBases < 1000,] -rmseGood = sqrt( sum(as.numeric((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases)) / sum(as.numeric(d.good$nBases)) ) # prevent integer overflow with as.numeric, ugh -rmseAll = sqrt( sum(as.numeric((c$Qempirical-c$Qreported)^2 * c$nBases)) / sum(as.numeric(c$nBases)) ) -theTitle = paste("RMSE_good =", round(rmseGood,digits=3), ", RMSE_all =", round(rmseAll,digits=3)) -if( length(d.good$nBases) == length(c$nBases) ) { - theTitle = paste("RMSE =", round(rmseAll,digits=3)) -} -# Don't let residual error go off the edge of the plot -d.good$residualError = d.good$Qempirical-d.good$Qreported -d.good$residualError[which(d.good$residualError > 10)] = 10 -d.good$residualError[which(d.good$residualError < -10)] = -10 -d.1000$residualError = d.1000$Qempirical-d.1000$Qreported -d.1000$residualError[which(d.1000$residualError > 10)] = 10 -d.1000$residualError[which(d.1000$residualError < -10)] = -10 -c$residualError = c$Qempirical-c$Qreported -c$residualError[which(c$residualError > 10)] = 10 -c$residualError[which(c$residualError < -10)] = -10 -pointType = "p" -if( length(c$Covariate) <= 20 ) { - pointType = "o" -} -if( is.numeric(c$Covariate) ) { - plot(d.good$Covariate, d.good$residualError, type=pointType, main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(-10, 10), xlim=c(min(c$Covariate),max(c$Covariate))) - points(d.1000$Covariate, d.1000$residualError, type=pointType, col="cornflowerblue", pch=20) -} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice - plot(c$Covariate, c$residualError, type="l", main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", ylim=c(-10, 10)) - points(d.1000$Covariate, d.1000$residualError, type="l", col="cornflowerblue") -} -dev.off() - -if (exists('compactPDF')) { - compactPDF(outfile) -} - -# -# Plot mean quality versus the covariate -# - -outfile = paste(input, ".reported_qual_v_", covariateName, ".pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -pointType = "p" -if( length(c$Covariate) <= 20 ) { - pointType = "o" -} -theTitle = paste("Quality By", covariateName); -if( is.numeric(c$Covariate) ) { - plot(d.good$Covariate, d.good$Qreported, type=pointType, main=theTitle, ylab="Mean Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(0, 40), xlim=c(min(c$Covariate),max(c$Covariate))) - points(d.1000$Covariate, d.1000$Qreported, type=pointType, col="cornflowerblue", pch=20) -} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice - plot(c$Covariate, c$Qreported, type="l", main=theTitle, ylab="Mean Reported Quality", xlab=covariateName, col="blue", ylim=c(0, 40)) - points(d.1000$Covariate, d.1000$Qreported, type="l", col="cornflowerblue") -} -dev.off() - -if (exists('compactPDF')) { - compactPDF(outfile) -} - -# -# Plot histogram of the covariate -# - -e = d.good -f = d.1000 -outfile = paste(input, ".", covariateName,"_hist.pdf", sep="") -pdf(outfile, height=7, width=7) -hst=subset(data.frame(e$Covariate, e$nBases), e.nBases != 0) -hst2=subset(data.frame(f$Covariate, f$nBases), f.nBases != 0) - -lwdSize=2 -if( length(c$Covariate) <= 20 ) { - lwdSize=7 -} else if( length(c$Covariate) <= 70 ) { - lwdSize=4 -} - -if( is.numeric(c$Covariate) ) { - if( length(hst$e.Covariate) == 0 ) { - plot(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=lwdSize, col="cornflowerblue", main=paste(covariateName,"histogram"), ylim=c(0, max(hst2$f.nBases)), xlab=covariateName, ylab="Count",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate))) - } else { - plot(hst$e.Covariate, hst$e.nBases, type="h", lwd=lwdSize, main=paste(covariateName,"histogram"), xlab=covariateName, ylim=c(0, max(hst$e.nBases)),ylab="Number of Bases",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate))) - points(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=lwdSize, col="cornflowerblue") - } - axis(2,axTicks(2), format(axTicks(2), scientific=F)) -} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice - hst=subset(data.frame(c$Covariate, c$nBases), c.nBases != 0) - plot(1:length(hst$c.Covariate), hst$c.nBases, type="h", lwd=lwdSize, main=paste(covariateName,"histogram"), ylim=c(0, max(hst$c.nBases)),xlab=covariateName, ylab="Number of Bases",yaxt="n",xaxt="n") - if( length(hst$c.Covariate) > 9 ) { - axis(1, at=seq(1,length(hst$c.Covariate),2), labels = hst$c.Covariate[seq(1,length(hst$c.Covariate),2)]) - } else { - axis(1, at=seq(1,length(hst$c.Covariate),1), labels = hst$c.Covariate) - } - axis(2,axTicks(2), format(axTicks(2), scientific=F)) -} -dev.off() - -if (exists('compactPDF')) { - compactPDF(outfile) -} diff --git a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_QualityScoreCovariate.R b/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_QualityScoreCovariate.R deleted file mode 100644 index 33eeb1f16..000000000 --- a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_QualityScoreCovariate.R +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/env Rscript - -library(tools) - -args <- commandArgs(TRUE) - -input = args[1] -Qcutoff = as.numeric(args[2]) -maxQ = as.numeric(args[3]) -maxHist = as.numeric(args[4]) - -t=read.table(input, header=T) - -# -# Plot of reported quality versus empirical quality -# - -outfile = paste(input, ".quality_emp_v_stated.pdf", sep="") -pdf(outfile, height=7, width=7) -d.good <- t[t$nBases >= 10000 & t$Qreported >= Qcutoff,] -d.1000 <- t[t$nBases < 1000 & t$Qreported >= Qcutoff,] -d.10000 <- t[t$nBases < 10000 & t$nBases >= 1000 & t$Qreported >= Qcutoff,] -f <- t[t$Qreported < Qcutoff,] -e <- rbind(d.good, d.1000, d.10000) -rmseGood = sqrt( sum(as.numeric((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases)) / sum(as.numeric(d.good$nBases)) ) # prevent integer overflow with as.numeric, ugh -rmseAll = sqrt( sum(as.numeric((e$Qempirical-e$Qreported)^2 * e$nBases)) / sum(as.numeric(e$nBases)) ) -theTitle = paste("RMSE_good =", round(rmseGood,digits=3), ", RMSE_all =", round(rmseAll,digits=3)) -if( length(t$nBases) - length(f$nBases) == length(d.good$nBases) ) { - theTitle = paste("RMSE =", round(rmseAll,digits=3)); -} -plot(d.good$Qreported, d.good$Qempirical, type="p", col="blue", main=theTitle, xlim=c(0,maxQ), ylim=c(0,maxQ), pch=16, xlab="Reported quality score", ylab="Empirical quality score") -points(d.1000$Qreported, d.1000$Qempirical, type="p", col="lightblue", pch=16) -points(d.10000$Qreported, d.10000$Qempirical, type="p", col="cornflowerblue", pch=16) -points(f$Qreported, f$Qempirical, type="p", col="maroon1", pch=16) -abline(0,1, lty=2) -dev.off() - -if (exists('compactPDF')) { - compactPDF(outfile) -} - -# -# Plot Q empirical histogram -# - -outfile = paste(input, ".quality_emp_hist.pdf", sep="") -pdf(outfile, height=7, width=7) -hst=subset(data.frame(e$Qempirical, e$nBases), e.nBases != 0) -hst2=subset(data.frame(f$Qempirical, f$nBases), f.nBases != 0) -percentBases=hst$e.nBases / sum(as.numeric(hst$e.nBases)) -entropy = -sum(log2(percentBases)*percentBases) -yMax = max(hst$e.nBases) -if(maxHist != 0) { -yMax = maxHist -} -plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n") -points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1") -axis(2,axTicks(2), format(axTicks(2), scientific=F)) -dev.off() - -if (exists('compactPDF')) { - compactPDF(outfile) -} - -# -# Plot Q reported histogram -# - -outfile = paste(input, ".quality_rep_hist.pdf", sep="") -pdf(outfile, height=7, width=7) -hst=subset(data.frame(e$Qreported, e$nBases), e.nBases != 0) -hst2=subset(data.frame(f$Qreported, f$nBases), f.nBases != 0) -yMax = max(hst$e.nBases) -if(maxHist != 0) { -yMax = maxHist -} -plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n") -points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1") -axis(2,axTicks(2), format(axTicks(2), scientific=F)) -dev.off() - -if (exists('compactPDF')) { - compactPDF(outfile) -} diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index dcdef5aab..883436582 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -195,7 +195,6 @@ public class GATKExtensionsGenerator extends CommandLineProgram { private static final List gatkPackages = Arrays.asList( "org.broadinstitute.sting.gatk", "org.broadinstitute.sting.pipeline", - "org.broadinstitute.sting.analyzecovariates", "org.broadinstitute.sting.gatk.datasources.reads.utilities"); /** @@ -252,7 +251,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram { */ private void writeFilter(String className, List argumentFields, Set> dependents) throws IOException { String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction", - className, "", false, String.format(" + \" --read_filter %s\"", className), argumentFields, dependents); + className, "", false, String.format(" + required(\"--read_filter\", \"%s\")", className), argumentFields, dependents); writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content); } diff --git a/public/packages/AnalyzeCovariates.xml b/public/packages/AnalyzeCovariates.xml deleted file mode 100644 index 27a72eabd..000000000 --- a/public/packages/AnalyzeCovariates.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - diff --git a/public/packages/GATK-Picard.xml b/public/packages/GATK-Picard.xml deleted file mode 100644 index 251f584f7..000000000 --- a/public/packages/GATK-Picard.xml +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/public/packages/GATKEngine.xml b/public/packages/GATKEngine.xml index 78f3f0cea..2de0273f3 100644 --- a/public/packages/GATKEngine.xml +++ b/public/packages/GATKEngine.xml @@ -48,11 +48,11 @@ - - - - - - + + + + + + diff --git a/public/packages/GenomeAnalysisTK.xml b/public/packages/GenomeAnalysisTK.xml index b6c83274c..e95c992b6 100644 --- a/public/packages/GenomeAnalysisTK.xml +++ b/public/packages/GenomeAnalysisTK.xml @@ -33,10 +33,8 @@ - - - - + + diff --git a/public/packages/GenomeAnalysisTKLite.xml b/public/packages/GenomeAnalysisTKLite.xml index fed85271d..b2f73434c 100644 --- a/public/packages/GenomeAnalysisTKLite.xml +++ b/public/packages/GenomeAnalysisTKLite.xml @@ -33,9 +33,6 @@ - - - diff --git a/public/packages/Queue.xml b/public/packages/Queue.xml index b57a5c1c2..621a549d5 100644 --- a/public/packages/Queue.xml +++ b/public/packages/Queue.xml @@ -32,7 +32,6 @@ - diff --git a/public/packages/QueueLite.xml b/public/packages/QueueLite.xml index 73fd1884a..0ead68fb0 100644 --- a/public/packages/QueueLite.xml +++ b/public/packages/QueueLite.xml @@ -32,7 +32,6 @@ -