diff --git a/R/plot_residualError_QualityScoreCovariate.R b/R/plot_residualError_QualityScoreCovariate.R index 2678e657d..0df6966a6 100644 --- a/R/plot_residualError_QualityScoreCovariate.R +++ b/R/plot_residualError_QualityScoreCovariate.R @@ -5,6 +5,7 @@ args <- commandArgs(TRUE) input = args[1] Qcutoff = as.numeric(args[2]) maxQ = as.numeric(args[3]) +maxHist = as.numeric(args[4]) t=read.table(input, header=T) @@ -42,7 +43,11 @@ hst=subset(data.frame(e$Qempirical, e$nBases), e.nBases != 0) hst2=subset(data.frame(f$Qempirical, f$nBases), f.nBases != 0) percentBases=hst$e.nBases / sum(as.numeric(hst$e.nBases)) entropy = -sum(log2(percentBases)*percentBases) -plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,max(hst$e.nBases)), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n") +yMax = max(hst$e.nBases) +if(maxHist != 0) { +yMax = maxHist +} +plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n") points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1") axis(2,axTicks(2), format(axTicks(2), scientific=F)) dev.off() @@ -55,7 +60,11 @@ outfile = paste(input, ".quality_rep_hist.pdf", sep="") pdf(outfile, height=7, width=7) hst=subset(data.frame(e$Qreported, e$nBases), e.nBases != 0) hst2=subset(data.frame(f$Qreported, f$nBases), f.nBases != 0) -plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,max(hst$e.nBases)), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n") +yMax = max(hst$e.nBases) +if(maxHist != 0) { +yMax = maxHist +} +plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n") points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1") axis(2,axTicks(2), format(axTicks(2), scientific=F)) dev.off() diff --git a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index 26236beda..9158cdda4 100755 --- a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -65,6 +65,8 @@ class AnalyzeCovariatesCLP extends CommandLineProgram { private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups @Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 40") private int MAX_QUALITY_SCORE = 40; + @Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots") + private int MAX_HISTOGRAM_VALUE = 0; ///////////////////////////// @@ -274,7 +276,7 @@ class AnalyzeCovariatesCLP extends CommandLineProgram { // Analyze reported quality p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " + OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + - IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored + IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE + " " + MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored if(numReadGroups % 3 == 0) { // Don't want to spawn all the RScript jobs too quickly. So wait for this one to finish p.waitFor(); }