Added an option to AnalyzeCovariates to set the max value of the histograms to make them easier to directly compare.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2753 348d0f76-0448-11de-a6fe-93d51630548a
2010-01-31 23:13:57 +00:00 · 2010-01-31 23:13:57 +00:00 · 64fc76e4bf
parent f6da57dc79
commit 64fc76e4bf
2 changed files with 14 additions and 3 deletions
--- a/R/plot_residualError_QualityScoreCovariate.R
+++ b/R/plot_residualError_QualityScoreCovariate.R
@ -5,6 +5,7 @@ args <- commandArgs(TRUE)
 input = args[1]
 Qcutoff = as.numeric(args[2])
 maxQ = as.numeric(args[3])
+maxHist = as.numeric(args[4])

 t=read.table(input, header=T)

@ -42,7 +43,11 @@ hst=subset(data.frame(e$Qempirical, e$nBases), e.nBases != 0)
 hst2=subset(data.frame(f$Qempirical, f$nBases), f.nBases != 0)
 percentBases=hst$e.nBases / sum(as.numeric(hst$e.nBases))
 entropy = -sum(log2(percentBases)*percentBases)
-plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,max(hst$e.nBases)), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n")
+yMax = max(hst$e.nBases)
+if(maxHist != 0) {
+yMax = maxHist
+}
+plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n")
 points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1")
 axis(2,axTicks(2), format(axTicks(2), scientific=F))
 dev.off()
@ -55,7 +60,11 @@ outfile = paste(input, ".quality_rep_hist.pdf", sep="")
 pdf(outfile, height=7, width=7)
 hst=subset(data.frame(e$Qreported, e$nBases), e.nBases != 0)
 hst2=subset(data.frame(f$Qreported, f$nBases), f.nBases != 0)
-plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,max(hst$e.nBases)), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n")
+yMax = max(hst$e.nBases)
+if(maxHist != 0) {
+yMax = maxHist
+}
+plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n")
 points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1")
 axis(2,axTicks(2), format(axTicks(2), scientific=F))
 dev.off()
--- a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@ -65,6 +65,8 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
    private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups
    @Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 40")
    private int MAX_QUALITY_SCORE = 40;
+    @Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots")
+    private int MAX_HISTOGRAM_VALUE = 0;


    /////////////////////////////
@ -274,7 +276,7 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
                            // Analyze reported quality
                            p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " +
                                        OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
-                                        IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
+                                        IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE + " " + MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
                            if(numReadGroups % 3 == 0) { // Don't want to spawn all the RScript jobs too quickly. So wait for this one to finish
                                p.waitFor();
                            }