improvements to the report code

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4280 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-09-15 00:45:13 +00:00
parent 4e83ba411f
commit b57a0a0310
2 changed files with 130 additions and 57 deletions

View File

@ -6,7 +6,7 @@ if (! is.na(args[3]) ) { name = args[3] } else { name = "" }
if ( onCMDLine ) {
print(paste("Reading data from", args[1]))
d = read.table(args[1], header=T, sep="\t")
d$start.time = as.Date(d$start.time)
#d$start.time = as.Date(d$start.time)
d$end.time = as.Date(d$end.time)
} # only read into d if its' available, otherwise assume the data is already loaded
@ -14,11 +14,26 @@ reportCountingPlot <- function(values, name, moreMargin = 0, ...) {
par(las=2) # make label text perpendicular to axis
oldMar <- par("mar")
par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin.
barplot(sort(table(values)), horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", ...)
barplot(sort(table(factor(values))), horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", ...)
par("mar" = oldMar)
par("las" = 1)
}
reportConditionalCountingPlot <- function(values, conditions, name, moreMargin = 0, ...) {
par(las=2) # make label text perpendicular to axis
oldMar <- par("mar")
par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin.
t = table(values, conditions)
t = t[, order(colSums(t))]
print(list(t = t))
nconds = dim(t)[2]
cols = rainbow(nconds)
barplot(t, legend.text = T, horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", col=cols, cex=0.5, ...)
par("mar" = oldMar)
par("las" = 1)
}
reportHist <- function(values, name, ...) {
if ( ! all(is.na(values) ) )
hist(values, main=name, 20, xlab="", col="cornflowerblue", ...)
@ -37,18 +52,20 @@ myTable <- function(x, y, reqRowNonZero = F) {
# todo -- must be robust to smaller sizes
plotTable <- function(table, name) {
plotTable <- function(table, name, ...) {
ncols = dim(table)[2]
nrows = dim(table)[1]
cols = rainbow(nrows)
tableMin = min(apply(table, 2, min))
tableMax = max(apply(table, 2, max))
plot( as.numeric(apply(table, 2, sum)), ylim=c(tableMin, tableMax), type="n", main = name, ylab="Frequency", xlab="Date", xaxt="n")
axis(1, 1:ncols, labels=colnames(table))
for ( i in 1:nrows )
points(table[i,], type="b", col=cols[i])
legend("topright", row.names(table), fill=cols, cex=0.5)
#return(table)
if ( ! is.null(nrows) ) {
cols = rainbow(nrows)
tableMin = min(apply(table, 2, min))
tableMax = max(apply(table, 2, max))
plot( as.numeric(apply(table, 2, sum)), ylim=c(tableMin, tableMax), type="n", main = name, ylab="Frequency", xlab="Date", xaxt="n", ...)
axis(1, 1:ncols, labels=colnames(table))
for ( i in 1:nrows )
points(table[i,], type="b", col=cols[i])
legend("topright", row.names(table), fill=cols, cex=0.5)
#return(table)
}
}
RUNNING_GATK_RUNTIME <- 60 * 5 # 5 minutes => bad failure
@ -61,38 +78,61 @@ successfulRuns <- function(d) {
return(x)
}
addSection <- function(name) {
par("mar", c(5, 4, 4, 2))
frame()
title(name, cex=2)
}
generateOneReport <- function(d, header, includeByWeek = T) {
head <- function(s) {
return(paste("Section:", header, ":", s))
return(paste("Section:", header, "\n", s))
}
excepted <- subset(d, exception.msg != "NA")
badExcepted <- subset(excepted, run.time > RUNNING_GATK_RUNTIME)
UserExceptions <- subset(excepted, is.user.exception == "true")
StingExceptions <- subset(excepted, is.user.exception == "false" | is.user.exception == "NA" | is.na(is.user.exception))
addSection(paste("GATK run report", name, "for", Sys.Date(), "\nwith", dim(d)[1], "run repository records"))
reportCountingPlot(d$walker.name, head("Walker invocations"))
reportConditionalCountingPlot(d$user.name, d$walker.name, head("Walker invocations by user"))
reportCountingPlot(d$svn.version, head("SVN version"))
reportConditionalCountingPlot(d$svn.version, d$user.name, head("SVN by user"))
par("mar", c(5, 4, 4, 2))
frame()
title(paste("GATK run report", name, "for", Sys.Date(), "\nwith", dim(d)[1], "run repository records"), cex=2)
# cuts by time
if ( includeByWeek ) {
plotTable(table(rep("GATK Invocations", length(d$start.time)), cut(d$start.time, "weeks")), head("GATK Invocations by week"))
plotTable(myTable(successfulRuns(d), cut(d$start.time, "weeks")), head("Successful and failing GATK invocations per week"))
plotTable(table(rep("GATK Invocations", length(d$end.time)), cut(d$end.time, "weeks")), head("GATK Invocations by week"))
plotTable(myTable(successfulRuns(d), cut(d$end.time, "weeks")), head("Successful and failing GATK invocations per week"))
plotTable(myTable(d$svn.version, cut(d$start.time, "weeks")), head("SVN version by week"))
plotTable(myTable(excepted$walker.name, cut(excepted$start.time, "weeks"), reqRowNonZero = T), head("Walkers with exceptions by week"))
plotTable(myTable(d$svn.version, cut(d$end.time, "weeks")), head("SVN version by week"))
}
plotTable(table(rep("GATK Invocations", length(d$start.time)), d$start.time), head("GATK Invocations by day"))
plotTable(myTable(d$svn.version, d$start.time), head("SVN version by day"))
plotTable(table(rep("GATK Invocations", length(d$end.time)), d$end.time), head("GATK Invocations by day"))
plotTable(myTable(d$svn.version, d$end.time), head("SVN version by day"))
reportCountingPlot(d$walker.name, head("Walker invocations"))
reportCountingPlot(d$svn.version, head("GATK SVN version"))
#
# Exception handling
#
addExceptionSection <- function(subd, subname, exceptionColor) {
addSection(paste(subname))
#print(list(subd = length(subd$end.time), name=subname))
reportCountingPlot(subd$walker.name, head(paste("Walkers with", subname)), col=exceptionColor)
reportCountingPlot(subd$exception.at, head(paste(subname, "locations")), 12, col=exceptionColor)
reportCountingPlot(subd$exception.msg, head(paste(subname, "messages")), 12, col=exceptionColor)
reportConditionalCountingPlot(subd$user.name, subd$exception.at, head("Walker invocations by user"), 12)
if ( includeByWeek && length(subd$end.time) > 0 ) {
plotTable(myTable(subd$walker.name, cut(subd$end.time, "weeks"), reqRowNonZero = T), head(paste("Walkers with", subname,"by week")), col=exceptionColor)
}
}
addExceptionSection(excepted, "Exceptions", "grey")
reportCountingPlot(excepted$user.name, head("Usernames generating exceptions"), col="grey")
addExceptionSection(StingExceptions, "StingExceptions", "red")
addExceptionSection(UserExceptions, "UserExceptions", "blue")
# reportCountingPlot(d$java.tmp.directory, head("Java tmp directory"))
reportCountingPlot(d$working.directory, head("Working directory"))
reportCountingPlot(d$user.name, head("user"))
reportCountingPlot(d$host.name, head("host"))
reportCountingPlot(d$java, head("Java version"))
reportCountingPlot(d$machine, head("Machine"))
Gb <- 1024^3
reportHist(d$total.memory / Gb, head("Used memory"))
@ -100,21 +140,20 @@ generateOneReport <- function(d, header, includeByWeek = T) {
min <- 60
reportHist(log10(d$run.time / min), head("Run time (log10[min])"))
exceptionColor = "red"
reportCountingPlot(excepted$walker.name, head("Walker exceptions"), col=exceptionColor)
reportCountingPlot(subset(excepted, run.time > RUNNING_GATK_RUNTIME)$walker.name, paste(head("Long-running walker exceptions (>"),RUNNING_GATK_RUNTIME,"seconds runtime)"), col=exceptionColor)
reportCountingPlot(subset(excepted, run.time < RUNNING_GATK_RUNTIME)$walker.name, paste(head("Start-up walker exceptions (<"),RUNNING_GATK_RUNTIME,"seconds runtime)"), col=exceptionColor)
reportCountingPlot(excepted$user.name, head("Usernames generating exceptions"), col=exceptionColor)
reportCountingPlot(excepted$exception.msg, head("Exception messages"), 12)
reportCountingPlot(excepted$exception.at, head("Exception locations"), 12)
reportCountingPlot(d$user.name, head("user"))
reportCountingPlot(d$host.name, head("host"))
reportCountingPlot(d$java, head("Java version"))
reportCountingPlot(d$machine, head("Machine"))
reportCountingPlot(d$working.directory, head("Working directory"))
}
RUNME = T
if ( RUNME ) {
lastWeek = levels(cut(d$start.time, "weeks"))[-1]
lastWeek = levels(cut(d$end.time, "weeks"))[-1]
generateOneReport(d, "Overall")
#generateOneReport(subset(d, start.time >= lastWeek), "Just last week to date", includeByWeek = F)
#generateOneReport(subset(d, end.time >= lastWeek), "Just last week to date", includeByWeek = F)
}
if ( onCMDLine ) dev.off()

View File

@ -5,6 +5,7 @@ from itertools import *
from xml.etree.ElementTree import *
import gzip
import datetime
import re
MISSING_VALUE = "NA"
RUN_REPORT_LIST = "GATK-run-reports"
@ -101,9 +102,20 @@ def eltIsException(elt):
def parseException(elt):
msgElt = elt.find("message")
msgText = "MISSING"
userException = "NA"
if msgElt != None: msgText = msgElt.text
return msgText, elt.find("stacktrace").find("string").text
stackTrace = elt.find("stacktrace").find("string").text
if elt.find("is-user-exception") != None:
#print elt.find("is-user-exception")
userException = elt.find("is-user-exception").text
return msgText, stackTrace, userException
def javaExceptionFile(javaException):
m = re.search("\((.*\.java:.*)\)", javaException)
if m != None:
return m.group(1)
else:
javaException
class RecordDecoder:
def __init__(self):
@ -118,6 +130,12 @@ class RecordDecoder:
def formatExceptionAt(elt):
return '%s' % parseException(elt)[1]
def formatExceptionAtBrief(elt):
return '%s' % javaExceptionFile(parseException(elt)[1])
def formatExceptionUser(elt):
return '%s' % parseException(elt)[2]
def add(names, func):
for name in names:
@ -132,7 +150,7 @@ class RecordDecoder:
add(["run-time", "java-tmp-directory", "working-directory", "user-name", "host-name"], id)
add(["java", "machine"], toString)
add(["max-memory", "total-memory", "iterations", "reads"], id)
addComplex("exception", ["exception-msg", "exception-at"], [formatExceptionMsg, formatExceptionAt])
addComplex("exception", ["exception-msg", "exception-at", "exception-at-brief", "is-user-exception"], [formatExceptionMsg, formatExceptionAt, formatExceptionAtBrief, formatExceptionUser])
# add(["command-line"], toString)
def decode(self, report):
@ -234,17 +252,18 @@ class ExceptionReport(StageHandler):
commonExceptions = sorted(commonExceptions, None, lambda x: x.counts)
for common in commonExceptions:
msg, at, svns, walkers, counts, ids, duration, users = common.toStrings()
msg, at, svns, walkers, counts, ids, duration, users, userError = common.toStrings()
print >> self.out, ''.join(['*'] * 80)
print >> self.out, 'Exception :', msg
print >> self.out, ' at :', at
print >> self.out, ' walkers :', walkers
print >> self.out, ' svns :', svns
print >> self.out, ' duration :', duration
print >> self.out, ' occurrences :', counts
print >> self.out, ' users :', users
print >> self.out, ' ids :', ids
print >> self.out, 'Exception :', msg
print >> self.out, ' is-user-exception? :', userError
print >> self.out, ' at :', at
print >> self.out, ' walkers :', walkers
print >> self.out, ' svns :', svns
print >> self.out, ' duration :', duration
print >> self.out, ' occurrences :', counts
print >> self.out, ' users :', users
print >> self.out, ' ids :', ids
class CommonException:
MAX_SET_ITEMS_TO_SHOW = 5
@ -254,6 +273,7 @@ class CommonException:
self.at = ex['exception-at']
self.svns = set([ex['svn-version']])
self.users = set([ex['user-name']])
self.userError = ex['is-user-exception']
self.counts = 1
self.times = set([decodeTime(ex['start-time'])])
self.walkers = set([ex['walker-name']])
@ -285,11 +305,17 @@ class CommonException:
return ','.join(s)
def duration(self):
x = sorted(self.times)
return "-".join(map(lambda x: x.strftime("%m/%d/%y"), [x[0], x[-1]]))
x = sorted(filter(lambda x: x != "ND", self.times))
if len(x) >= 2:
return "-".join(map(lambda x: x.strftime("%m/%d/%y"), [x[0], x[-1]]))
elif len(x) == 1:
return x[0]
else:
return "ND"
def toStrings(self):
return [self.bestExample(self.msgs), self.at, self.setString(self.svns), self.setString(self.walkers), self.counts, self.setString(self.ids), self.duration(), self.setString(self.users)]
return [self.bestExample(self.msgs), self.at, self.setString(self.svns), self.setString(self.walkers), self.counts, self.setString(self.ids), self.duration(), self.setString(self.users), self.userError]
addHandler('exceptions', ExceptionReport)
@ -354,7 +380,10 @@ def resolveFiles(paths):
return allFiles
def decodeTime(time):
return datetime.datetime.strptime(time.split()[0], "%Y/%m/%d")
if time == "ND":
return "ND"
else:
return datetime.datetime.strptime(time.split()[0], "%Y/%m/%d")
#return datetime.datetime.strptime(time, "%Y/%m/%d %H.%M.%S")
def passesFilters(elt):
@ -377,7 +406,12 @@ def readReports(files):
for file in files:
if OPTIONS.verbose: print 'Reading file', file
input = openFile(file)
tree = ElementTree(file=input)
try:
tree = ElementTree(file=input)
except:
print "EXCEPTING FILE", file
raise
elem = tree.getroot()
if elem.tag == RUN_REPORT_LIST:
for sub in elem: