From 088363ce421d982fb66ec5aaca4e1783b7056626 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Thu, 10 Dec 2009 21:57:35 +0000 Subject: [PATCH] Added entropy calculation to histogram of quality scores git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2316 348d0f76-0448-11de-a6fe-93d51630548a --- R/plot_residualError_QualityScoreCovariate.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/plot_residualError_QualityScoreCovariate.R b/R/plot_residualError_QualityScoreCovariate.R index 7b3c166bf..92fe439d0 100644 --- a/R/plot_residualError_QualityScoreCovariate.R +++ b/R/plot_residualError_QualityScoreCovariate.R @@ -39,7 +39,9 @@ outfile = paste(input, ".quality_emp_hist.pdf", sep="") pdf(outfile, height=7, width=7) hst=subset(data.frame(e$Qempirical, e$nBases), e.nBases != 0) hst2=subset(data.frame(f$Qempirical, f$nBases), f.nBases != 0) -plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,40), ylim=c(0,max(hst$e.nBases)), main="Empirical quality score histogram", xlab="Empirical quality score", ylab="Number of Bases",yaxt="n") +percentBases=hst$e.nBases / sum(as.numeric(hst$e.nBases)) +entropy = -sum(log2(percentBases)*percentBases) +plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,40), ylim=c(0,max(hst$e.nBases)), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n") points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1") axis(2,axTicks(2), format(axTicks(2), scientific=F)) dev.off() @@ -52,7 +54,7 @@ outfile = paste(input, ".quality_rep_hist.pdf", sep="") pdf(outfile, height=7, width=7) hst=subset(data.frame(e$Qreported, e$nBases), e.nBases != 0) hst2=subset(data.frame(f$Qreported, f$nBases), f.nBases != 0) -plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,40), ylim=c(0,max(hst$e.nBases)), main="Reported quality score histogram", xlab="Reported quality score", ylab="Number of Bases",yaxt="n") +plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,40), ylim=c(0,max(hst$e.nBases)), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n") points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1") axis(2,axTicks(2), format(axTicks(2), scientific=F)) dev.off()