Added an option to AnalyzeCovariates to set the max value of the histograms to make them easier to directly compare.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2753 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f6da57dc79
commit
64fc76e4bf
|
|
@ -5,6 +5,7 @@ args <- commandArgs(TRUE)
|
|||
input = args[1]
|
||||
Qcutoff = as.numeric(args[2])
|
||||
maxQ = as.numeric(args[3])
|
||||
maxHist = as.numeric(args[4])
|
||||
|
||||
t=read.table(input, header=T)
|
||||
|
||||
|
|
@ -42,7 +43,11 @@ hst=subset(data.frame(e$Qempirical, e$nBases), e.nBases != 0)
|
|||
hst2=subset(data.frame(f$Qempirical, f$nBases), f.nBases != 0)
|
||||
percentBases=hst$e.nBases / sum(as.numeric(hst$e.nBases))
|
||||
entropy = -sum(log2(percentBases)*percentBases)
|
||||
plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,max(hst$e.nBases)), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n")
|
||||
yMax = max(hst$e.nBases)
|
||||
if(maxHist != 0) {
|
||||
yMax = maxHist
|
||||
}
|
||||
plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n")
|
||||
points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1")
|
||||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
dev.off()
|
||||
|
|
@ -55,7 +60,11 @@ outfile = paste(input, ".quality_rep_hist.pdf", sep="")
|
|||
pdf(outfile, height=7, width=7)
|
||||
hst=subset(data.frame(e$Qreported, e$nBases), e.nBases != 0)
|
||||
hst2=subset(data.frame(f$Qreported, f$nBases), f.nBases != 0)
|
||||
plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,max(hst$e.nBases)), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n")
|
||||
yMax = max(hst$e.nBases)
|
||||
if(maxHist != 0) {
|
||||
yMax = maxHist
|
||||
}
|
||||
plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n")
|
||||
points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1")
|
||||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
dev.off()
|
||||
|
|
|
|||
|
|
@ -65,6 +65,8 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
|
|||
private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups
|
||||
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 40")
|
||||
private int MAX_QUALITY_SCORE = 40;
|
||||
@Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots")
|
||||
private int MAX_HISTOGRAM_VALUE = 0;
|
||||
|
||||
|
||||
/////////////////////////////
|
||||
|
|
@ -274,7 +276,7 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
|
|||
// Analyze reported quality
|
||||
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " +
|
||||
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
|
||||
IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
|
||||
IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE + " " + MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
|
||||
if(numReadGroups % 3 == 0) { // Don't want to spawn all the RScript jobs too quickly. So wait for this one to finish
|
||||
p.waitFor();
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue