Stabilitity improvements to GATK run report system. R code is now robust. XML parser uses the C backend in python, 10x faster. Added shell script that runs the daily reports, and linked the /humgen/ runme.csh to this script. Script now emails the group the daily PDFs to gsamembers

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4845 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-12-15 14:56:12 +00:00
parent 5a27d231fa
commit 46cd227613
3 changed files with 86 additions and 20 deletions

View File

@ -10,27 +10,47 @@ if ( onCMDLine ) {
d$end.time = as.Date(d$end.time)
} # only read into d if its' available, otherwise assume the data is already loaded
noRecords <- function(name) {
print(paste("No records", name))
frame()
title(paste("No records of", name), cex=2)
}
reportCountingPlot <- function(values, name, moreMargin = 0, ...) {
par(las=2) # make label text perpendicular to axis
oldMar <- par("mar")
par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin.
barplot(sort(table(factor(values))), horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", log="x", ...)
par("mar" = oldMar)
par("las" = 1)
#print(length(values))
if ( length(values) > 0 ) {
par(las=2) # make label text perpendicular to axis
oldMar <- par("mar")
par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin.
t = table(factor(values))
barplot(sort(t), horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", log="x", ...)
par("mar" = oldMar)
par("las" = 1)
} else {
noRecords(name)
}
}
reportConditionalCountingPlot <- function(values, conditions, name, moreMargin = 0, ...) {
par(las=2) # make label text perpendicular to axis
oldMar <- par("mar")
par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin.
t = table(values, conditions)
t = t[, order(colSums(t))]
#print(list(t = t))
nconds = dim(t)[2]
cols = rainbow(nconds)
barplot(t, legend.text = T, horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", col=cols, cex=0.5, ...)
par("mar" = oldMar)
par("las" = 1)
if ( length(values) > 0 ) {
t = table(values, conditions)
t = t[, order(colSums(t))]
#print(list(t = t))
if ( ! is.null(dim(t)) ) {
par(las=2) # make label text perpendicular to axis
oldMar <- par("mar")
par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin.
nconds = dim(t)[2]
cols = rainbow(nconds)
barplot(t, legend.text = T, horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", col=cols, cex=0.5, ...)
par("mar" = oldMar)
par("las" = 1)
} else {
noRecords(name)
}
} else {
noRecords(name)
}
}
@ -150,10 +170,10 @@ generateOneReport <- function(d, header, includeByWeek = T) {
reportHist(log10(d$run.time / min), head("Run time (log10[min])"))
reportCountingPlot(d$user.name, head("user"))
reportCountingPlot(d$host.name, head("host"))
#reportCountingPlot(d$host.name, head("host"))
reportCountingPlot(d$java, head("Java version"))
reportCountingPlot(d$machine, head("Machine"))
#reportCountingPlot(d$machine, head("Machine"))
#reportCountingPlot(d$working.directory, head("Working directory"))
}

View File

@ -2,7 +2,7 @@ import os.path
import sys
from optparse import OptionParser
from itertools import *
from xml.etree.ElementTree import *
from xml.etree.cElementTree import *
import gzip
import datetime
import re

View File

@ -0,0 +1,46 @@
#!/bin/tcsh
source /broad/tools/scripts/useuse
# reuse Python-2.5
# use R-2.11
setenv DIR /humgen/gsa-hpprojects/GATK/reports
setenv ARCHIVE_DIR $DIR/archive
setenv SUMMARY_DIR $DIR/summaries
setenv DATE `date +"%m_%d_%Y"`
setenv ARCHIVE $ARCHIVE_DIR/$DATE
setenv SUMMARY $SUMMARY_DIR/$DATE
setenv GATK ~/dev/GenomeAnalysisTK/trunk
cd $DIR
# echo "Archiving recently submitted jobs"
# python $GATK/python/analyzeRunReports.py archive $DIR/submitted -o $ARCHIVE.gz -D
# echo "All runs"
# python $GATK/python/analyzeRunReports.py summary $ARCHIVE.gz --max_days 1
# echo "No-dev"
# python $GATK/python/analyzeRunReports.py summary $ARCHIVE.gz --max_days 1 --no-dev
# python $GATK/python/analyzeRunReports.py exceptions $ARCHIVE.gz --max_days 1 -E sting --no-dev
#echo "Archive directory contents"
#ls -ltrh $ARCHIVE_DIR
foreach maxDays ( 7 30 360 )
echo "Creating table"
setenv table $ARCHIVE.${maxDays}_days.table
python $GATK/python/analyzeRunReports.py table $ARCHIVE_DIR/*.gz -o $table --max_days $maxDays
echo "Creating summary"
Rscript $GATK/R/GATKRunReport.R $table $SUMMARY.${maxDays}_days.pdf "of previous $maxDays days"
echo "Creating exception report"
python $GATK/python/analyzeRunReports.py exceptions $ARCHIVE_DIR/*.gz -o $SUMMARY.${maxDays}_days.sting.exceptions.txt --max_days $maxDays -E sting --no-dev
python $GATK/python/analyzeRunReports.py exceptions $ARCHIVE_DIR/*.gz -o $SUMMARY.${maxDays}_days.user.exceptions.txt --max_days $maxDays -E user --no-dev
rm $table
end
echo "GATK daily run report" | mutt -a $SUMMARY.30_days.pdf -a $SUMMARY.360_days.pdf -a $SUMMARY.7_days.pdf -s "GATK Run report PDFs for $DATE" gsamembers