From 46cd22761303c1d8ebd801fba3ef830ce9b91bcd Mon Sep 17 00:00:00 2001 From: depristo Date: Wed, 15 Dec 2010 14:56:12 +0000 Subject: [PATCH] Stabilitity improvements to GATK run report system. R code is now robust. XML parser uses the C backend in python, 10x faster. Added shell script that runs the daily reports, and linked the /humgen/ runme.csh to this script. Script now emails the group the daily PDFs to gsamembers git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4845 348d0f76-0448-11de-a6fe-93d51630548a --- R/GATKRunReport.R | 58 +++++++++++++++++++++++++------------ python/analyzeRunReports.py | 2 +- shell/runGATKReport.csh | 46 +++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 20 deletions(-) create mode 100755 shell/runGATKReport.csh diff --git a/R/GATKRunReport.R b/R/GATKRunReport.R index 5ab9c0dc3..10088d7e9 100644 --- a/R/GATKRunReport.R +++ b/R/GATKRunReport.R @@ -10,27 +10,47 @@ if ( onCMDLine ) { d$end.time = as.Date(d$end.time) } # only read into d if its' available, otherwise assume the data is already loaded +noRecords <- function(name) { + print(paste("No records", name)) + frame() + title(paste("No records of", name), cex=2) +} + reportCountingPlot <- function(values, name, moreMargin = 0, ...) { - par(las=2) # make label text perpendicular to axis - oldMar <- par("mar") - par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin. - barplot(sort(table(factor(values))), horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", log="x", ...) - par("mar" = oldMar) - par("las" = 1) + #print(length(values)) + if ( length(values) > 0 ) { + par(las=2) # make label text perpendicular to axis + oldMar <- par("mar") + par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin. + t = table(factor(values)) + barplot(sort(t), horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", log="x", ...) + par("mar" = oldMar) + par("las" = 1) + } else { + noRecords(name) + } } reportConditionalCountingPlot <- function(values, conditions, name, moreMargin = 0, ...) { - par(las=2) # make label text perpendicular to axis - oldMar <- par("mar") - par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin. - t = table(values, conditions) - t = t[, order(colSums(t))] - #print(list(t = t)) - nconds = dim(t)[2] - cols = rainbow(nconds) - barplot(t, legend.text = T, horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", col=cols, cex=0.5, ...) - par("mar" = oldMar) - par("las" = 1) + if ( length(values) > 0 ) { + t = table(values, conditions) + t = t[, order(colSums(t))] + #print(list(t = t)) + if ( ! is.null(dim(t)) ) { + par(las=2) # make label text perpendicular to axis + oldMar <- par("mar") + par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin. + nconds = dim(t)[2] + cols = rainbow(nconds) + barplot(t, legend.text = T, horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", col=cols, cex=0.5, ...) + par("mar" = oldMar) + par("las" = 1) + } else { + noRecords(name) + } + } else { + noRecords(name) + } } @@ -150,10 +170,10 @@ generateOneReport <- function(d, header, includeByWeek = T) { reportHist(log10(d$run.time / min), head("Run time (log10[min])")) reportCountingPlot(d$user.name, head("user")) - reportCountingPlot(d$host.name, head("host")) + #reportCountingPlot(d$host.name, head("host")) reportCountingPlot(d$java, head("Java version")) - reportCountingPlot(d$machine, head("Machine")) + #reportCountingPlot(d$machine, head("Machine")) #reportCountingPlot(d$working.directory, head("Working directory")) } diff --git a/python/analyzeRunReports.py b/python/analyzeRunReports.py index 6dbe6d34f..91ac835e7 100755 --- a/python/analyzeRunReports.py +++ b/python/analyzeRunReports.py @@ -2,7 +2,7 @@ import os.path import sys from optparse import OptionParser from itertools import * -from xml.etree.ElementTree import * +from xml.etree.cElementTree import * import gzip import datetime import re diff --git a/shell/runGATKReport.csh b/shell/runGATKReport.csh new file mode 100755 index 000000000..ccbf231e0 --- /dev/null +++ b/shell/runGATKReport.csh @@ -0,0 +1,46 @@ +#!/bin/tcsh + +source /broad/tools/scripts/useuse + +# reuse Python-2.5 +# use R-2.11 + +setenv DIR /humgen/gsa-hpprojects/GATK/reports +setenv ARCHIVE_DIR $DIR/archive +setenv SUMMARY_DIR $DIR/summaries +setenv DATE `date +"%m_%d_%Y"` +setenv ARCHIVE $ARCHIVE_DIR/$DATE +setenv SUMMARY $SUMMARY_DIR/$DATE +setenv GATK ~/dev/GenomeAnalysisTK/trunk + +cd $DIR + +# echo "Archiving recently submitted jobs" +# python $GATK/python/analyzeRunReports.py archive $DIR/submitted -o $ARCHIVE.gz -D + +# echo "All runs" +# python $GATK/python/analyzeRunReports.py summary $ARCHIVE.gz --max_days 1 + +# echo "No-dev" +# python $GATK/python/analyzeRunReports.py summary $ARCHIVE.gz --max_days 1 --no-dev +# python $GATK/python/analyzeRunReports.py exceptions $ARCHIVE.gz --max_days 1 -E sting --no-dev + +#echo "Archive directory contents" +#ls -ltrh $ARCHIVE_DIR + +foreach maxDays ( 7 30 360 ) + echo "Creating table" + setenv table $ARCHIVE.${maxDays}_days.table + python $GATK/python/analyzeRunReports.py table $ARCHIVE_DIR/*.gz -o $table --max_days $maxDays + + echo "Creating summary" + Rscript $GATK/R/GATKRunReport.R $table $SUMMARY.${maxDays}_days.pdf "of previous $maxDays days" + + echo "Creating exception report" + python $GATK/python/analyzeRunReports.py exceptions $ARCHIVE_DIR/*.gz -o $SUMMARY.${maxDays}_days.sting.exceptions.txt --max_days $maxDays -E sting --no-dev + python $GATK/python/analyzeRunReports.py exceptions $ARCHIVE_DIR/*.gz -o $SUMMARY.${maxDays}_days.user.exceptions.txt --max_days $maxDays -E user --no-dev + + rm $table +end + +echo "GATK daily run report" | mutt -a $SUMMARY.30_days.pdf -a $SUMMARY.360_days.pdf -a $SUMMARY.7_days.pdf -s "GATK Run report PDFs for $DATE" gsamembers