improvements to the report code
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4280 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4e83ba411f
commit
b57a0a0310
|
|
@ -6,7 +6,7 @@ if (! is.na(args[3]) ) { name = args[3] } else { name = "" }
|
|||
if ( onCMDLine ) {
|
||||
print(paste("Reading data from", args[1]))
|
||||
d = read.table(args[1], header=T, sep="\t")
|
||||
d$start.time = as.Date(d$start.time)
|
||||
#d$start.time = as.Date(d$start.time)
|
||||
d$end.time = as.Date(d$end.time)
|
||||
} # only read into d if its' available, otherwise assume the data is already loaded
|
||||
|
||||
|
|
@ -14,11 +14,26 @@ reportCountingPlot <- function(values, name, moreMargin = 0, ...) {
|
|||
par(las=2) # make label text perpendicular to axis
|
||||
oldMar <- par("mar")
|
||||
par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin.
|
||||
barplot(sort(table(values)), horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", ...)
|
||||
barplot(sort(table(factor(values))), horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", ...)
|
||||
par("mar" = oldMar)
|
||||
par("las" = 1)
|
||||
}
|
||||
|
||||
reportConditionalCountingPlot <- function(values, conditions, name, moreMargin = 0, ...) {
|
||||
par(las=2) # make label text perpendicular to axis
|
||||
oldMar <- par("mar")
|
||||
par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin.
|
||||
t = table(values, conditions)
|
||||
t = t[, order(colSums(t))]
|
||||
print(list(t = t))
|
||||
nconds = dim(t)[2]
|
||||
cols = rainbow(nconds)
|
||||
barplot(t, legend.text = T, horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", col=cols, cex=0.5, ...)
|
||||
par("mar" = oldMar)
|
||||
par("las" = 1)
|
||||
}
|
||||
|
||||
|
||||
reportHist <- function(values, name, ...) {
|
||||
if ( ! all(is.na(values) ) )
|
||||
hist(values, main=name, 20, xlab="", col="cornflowerblue", ...)
|
||||
|
|
@ -37,18 +52,20 @@ myTable <- function(x, y, reqRowNonZero = F) {
|
|||
|
||||
# todo -- must be robust to smaller sizes
|
||||
|
||||
plotTable <- function(table, name) {
|
||||
plotTable <- function(table, name, ...) {
|
||||
ncols = dim(table)[2]
|
||||
nrows = dim(table)[1]
|
||||
cols = rainbow(nrows)
|
||||
tableMin = min(apply(table, 2, min))
|
||||
tableMax = max(apply(table, 2, max))
|
||||
plot( as.numeric(apply(table, 2, sum)), ylim=c(tableMin, tableMax), type="n", main = name, ylab="Frequency", xlab="Date", xaxt="n")
|
||||
axis(1, 1:ncols, labels=colnames(table))
|
||||
for ( i in 1:nrows )
|
||||
points(table[i,], type="b", col=cols[i])
|
||||
legend("topright", row.names(table), fill=cols, cex=0.5)
|
||||
#return(table)
|
||||
if ( ! is.null(nrows) ) {
|
||||
cols = rainbow(nrows)
|
||||
tableMin = min(apply(table, 2, min))
|
||||
tableMax = max(apply(table, 2, max))
|
||||
plot( as.numeric(apply(table, 2, sum)), ylim=c(tableMin, tableMax), type="n", main = name, ylab="Frequency", xlab="Date", xaxt="n", ...)
|
||||
axis(1, 1:ncols, labels=colnames(table))
|
||||
for ( i in 1:nrows )
|
||||
points(table[i,], type="b", col=cols[i])
|
||||
legend("topright", row.names(table), fill=cols, cex=0.5)
|
||||
#return(table)
|
||||
}
|
||||
}
|
||||
|
||||
RUNNING_GATK_RUNTIME <- 60 * 5 # 5 minutes => bad failure
|
||||
|
|
@ -61,38 +78,61 @@ successfulRuns <- function(d) {
|
|||
return(x)
|
||||
}
|
||||
|
||||
addSection <- function(name) {
|
||||
par("mar", c(5, 4, 4, 2))
|
||||
frame()
|
||||
title(name, cex=2)
|
||||
}
|
||||
|
||||
generateOneReport <- function(d, header, includeByWeek = T) {
|
||||
head <- function(s) {
|
||||
return(paste("Section:", header, ":", s))
|
||||
return(paste("Section:", header, "\n", s))
|
||||
}
|
||||
|
||||
excepted <- subset(d, exception.msg != "NA")
|
||||
badExcepted <- subset(excepted, run.time > RUNNING_GATK_RUNTIME)
|
||||
UserExceptions <- subset(excepted, is.user.exception == "true")
|
||||
StingExceptions <- subset(excepted, is.user.exception == "false" | is.user.exception == "NA" | is.na(is.user.exception))
|
||||
|
||||
addSection(paste("GATK run report", name, "for", Sys.Date(), "\nwith", dim(d)[1], "run repository records"))
|
||||
|
||||
reportCountingPlot(d$walker.name, head("Walker invocations"))
|
||||
reportConditionalCountingPlot(d$user.name, d$walker.name, head("Walker invocations by user"))
|
||||
reportCountingPlot(d$svn.version, head("SVN version"))
|
||||
reportConditionalCountingPlot(d$svn.version, d$user.name, head("SVN by user"))
|
||||
|
||||
par("mar", c(5, 4, 4, 2))
|
||||
frame()
|
||||
title(paste("GATK run report", name, "for", Sys.Date(), "\nwith", dim(d)[1], "run repository records"), cex=2)
|
||||
|
||||
# cuts by time
|
||||
if ( includeByWeek ) {
|
||||
plotTable(table(rep("GATK Invocations", length(d$start.time)), cut(d$start.time, "weeks")), head("GATK Invocations by week"))
|
||||
plotTable(myTable(successfulRuns(d), cut(d$start.time, "weeks")), head("Successful and failing GATK invocations per week"))
|
||||
plotTable(table(rep("GATK Invocations", length(d$end.time)), cut(d$end.time, "weeks")), head("GATK Invocations by week"))
|
||||
plotTable(myTable(successfulRuns(d), cut(d$end.time, "weeks")), head("Successful and failing GATK invocations per week"))
|
||||
|
||||
plotTable(myTable(d$svn.version, cut(d$start.time, "weeks")), head("SVN version by week"))
|
||||
plotTable(myTable(excepted$walker.name, cut(excepted$start.time, "weeks"), reqRowNonZero = T), head("Walkers with exceptions by week"))
|
||||
plotTable(myTable(d$svn.version, cut(d$end.time, "weeks")), head("SVN version by week"))
|
||||
}
|
||||
plotTable(table(rep("GATK Invocations", length(d$start.time)), d$start.time), head("GATK Invocations by day"))
|
||||
plotTable(myTable(d$svn.version, d$start.time), head("SVN version by day"))
|
||||
plotTable(table(rep("GATK Invocations", length(d$end.time)), d$end.time), head("GATK Invocations by day"))
|
||||
plotTable(myTable(d$svn.version, d$end.time), head("SVN version by day"))
|
||||
|
||||
reportCountingPlot(d$walker.name, head("Walker invocations"))
|
||||
reportCountingPlot(d$svn.version, head("GATK SVN version"))
|
||||
#
|
||||
# Exception handling
|
||||
#
|
||||
addExceptionSection <- function(subd, subname, exceptionColor) {
|
||||
addSection(paste(subname))
|
||||
#print(list(subd = length(subd$end.time), name=subname))
|
||||
reportCountingPlot(subd$walker.name, head(paste("Walkers with", subname)), col=exceptionColor)
|
||||
reportCountingPlot(subd$exception.at, head(paste(subname, "locations")), 12, col=exceptionColor)
|
||||
reportCountingPlot(subd$exception.msg, head(paste(subname, "messages")), 12, col=exceptionColor)
|
||||
reportConditionalCountingPlot(subd$user.name, subd$exception.at, head("Walker invocations by user"), 12)
|
||||
|
||||
if ( includeByWeek && length(subd$end.time) > 0 ) {
|
||||
plotTable(myTable(subd$walker.name, cut(subd$end.time, "weeks"), reqRowNonZero = T), head(paste("Walkers with", subname,"by week")), col=exceptionColor)
|
||||
}
|
||||
}
|
||||
|
||||
addExceptionSection(excepted, "Exceptions", "grey")
|
||||
reportCountingPlot(excepted$user.name, head("Usernames generating exceptions"), col="grey")
|
||||
|
||||
addExceptionSection(StingExceptions, "StingExceptions", "red")
|
||||
addExceptionSection(UserExceptions, "UserExceptions", "blue")
|
||||
|
||||
# reportCountingPlot(d$java.tmp.directory, head("Java tmp directory"))
|
||||
reportCountingPlot(d$working.directory, head("Working directory"))
|
||||
reportCountingPlot(d$user.name, head("user"))
|
||||
reportCountingPlot(d$host.name, head("host"))
|
||||
reportCountingPlot(d$java, head("Java version"))
|
||||
reportCountingPlot(d$machine, head("Machine"))
|
||||
|
||||
Gb <- 1024^3
|
||||
reportHist(d$total.memory / Gb, head("Used memory"))
|
||||
|
|
@ -100,21 +140,20 @@ generateOneReport <- function(d, header, includeByWeek = T) {
|
|||
|
||||
min <- 60
|
||||
reportHist(log10(d$run.time / min), head("Run time (log10[min])"))
|
||||
|
||||
exceptionColor = "red"
|
||||
reportCountingPlot(excepted$walker.name, head("Walker exceptions"), col=exceptionColor)
|
||||
reportCountingPlot(subset(excepted, run.time > RUNNING_GATK_RUNTIME)$walker.name, paste(head("Long-running walker exceptions (>"),RUNNING_GATK_RUNTIME,"seconds runtime)"), col=exceptionColor)
|
||||
reportCountingPlot(subset(excepted, run.time < RUNNING_GATK_RUNTIME)$walker.name, paste(head("Start-up walker exceptions (<"),RUNNING_GATK_RUNTIME,"seconds runtime)"), col=exceptionColor)
|
||||
reportCountingPlot(excepted$user.name, head("Usernames generating exceptions"), col=exceptionColor)
|
||||
reportCountingPlot(excepted$exception.msg, head("Exception messages"), 12)
|
||||
reportCountingPlot(excepted$exception.at, head("Exception locations"), 12)
|
||||
|
||||
reportCountingPlot(d$user.name, head("user"))
|
||||
reportCountingPlot(d$host.name, head("host"))
|
||||
|
||||
reportCountingPlot(d$java, head("Java version"))
|
||||
reportCountingPlot(d$machine, head("Machine"))
|
||||
reportCountingPlot(d$working.directory, head("Working directory"))
|
||||
}
|
||||
|
||||
RUNME = T
|
||||
if ( RUNME ) {
|
||||
lastWeek = levels(cut(d$start.time, "weeks"))[-1]
|
||||
lastWeek = levels(cut(d$end.time, "weeks"))[-1]
|
||||
generateOneReport(d, "Overall")
|
||||
#generateOneReport(subset(d, start.time >= lastWeek), "Just last week to date", includeByWeek = F)
|
||||
#generateOneReport(subset(d, end.time >= lastWeek), "Just last week to date", includeByWeek = F)
|
||||
}
|
||||
|
||||
if ( onCMDLine ) dev.off()
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from itertools import *
|
|||
from xml.etree.ElementTree import *
|
||||
import gzip
|
||||
import datetime
|
||||
import re
|
||||
|
||||
MISSING_VALUE = "NA"
|
||||
RUN_REPORT_LIST = "GATK-run-reports"
|
||||
|
|
@ -101,9 +102,20 @@ def eltIsException(elt):
|
|||
def parseException(elt):
|
||||
msgElt = elt.find("message")
|
||||
msgText = "MISSING"
|
||||
userException = "NA"
|
||||
if msgElt != None: msgText = msgElt.text
|
||||
return msgText, elt.find("stacktrace").find("string").text
|
||||
stackTrace = elt.find("stacktrace").find("string").text
|
||||
if elt.find("is-user-exception") != None:
|
||||
#print elt.find("is-user-exception")
|
||||
userException = elt.find("is-user-exception").text
|
||||
return msgText, stackTrace, userException
|
||||
|
||||
def javaExceptionFile(javaException):
|
||||
m = re.search("\((.*\.java:.*)\)", javaException)
|
||||
if m != None:
|
||||
return m.group(1)
|
||||
else:
|
||||
javaException
|
||||
|
||||
class RecordDecoder:
|
||||
def __init__(self):
|
||||
|
|
@ -118,6 +130,12 @@ class RecordDecoder:
|
|||
|
||||
def formatExceptionAt(elt):
|
||||
return '%s' % parseException(elt)[1]
|
||||
|
||||
def formatExceptionAtBrief(elt):
|
||||
return '%s' % javaExceptionFile(parseException(elt)[1])
|
||||
|
||||
def formatExceptionUser(elt):
|
||||
return '%s' % parseException(elt)[2]
|
||||
|
||||
def add(names, func):
|
||||
for name in names:
|
||||
|
|
@ -132,7 +150,7 @@ class RecordDecoder:
|
|||
add(["run-time", "java-tmp-directory", "working-directory", "user-name", "host-name"], id)
|
||||
add(["java", "machine"], toString)
|
||||
add(["max-memory", "total-memory", "iterations", "reads"], id)
|
||||
addComplex("exception", ["exception-msg", "exception-at"], [formatExceptionMsg, formatExceptionAt])
|
||||
addComplex("exception", ["exception-msg", "exception-at", "exception-at-brief", "is-user-exception"], [formatExceptionMsg, formatExceptionAt, formatExceptionAtBrief, formatExceptionUser])
|
||||
# add(["command-line"], toString)
|
||||
|
||||
def decode(self, report):
|
||||
|
|
@ -234,17 +252,18 @@ class ExceptionReport(StageHandler):
|
|||
commonExceptions = sorted(commonExceptions, None, lambda x: x.counts)
|
||||
|
||||
for common in commonExceptions:
|
||||
msg, at, svns, walkers, counts, ids, duration, users = common.toStrings()
|
||||
msg, at, svns, walkers, counts, ids, duration, users, userError = common.toStrings()
|
||||
|
||||
print >> self.out, ''.join(['*'] * 80)
|
||||
print >> self.out, 'Exception :', msg
|
||||
print >> self.out, ' at :', at
|
||||
print >> self.out, ' walkers :', walkers
|
||||
print >> self.out, ' svns :', svns
|
||||
print >> self.out, ' duration :', duration
|
||||
print >> self.out, ' occurrences :', counts
|
||||
print >> self.out, ' users :', users
|
||||
print >> self.out, ' ids :', ids
|
||||
print >> self.out, 'Exception :', msg
|
||||
print >> self.out, ' is-user-exception? :', userError
|
||||
print >> self.out, ' at :', at
|
||||
print >> self.out, ' walkers :', walkers
|
||||
print >> self.out, ' svns :', svns
|
||||
print >> self.out, ' duration :', duration
|
||||
print >> self.out, ' occurrences :', counts
|
||||
print >> self.out, ' users :', users
|
||||
print >> self.out, ' ids :', ids
|
||||
|
||||
class CommonException:
|
||||
MAX_SET_ITEMS_TO_SHOW = 5
|
||||
|
|
@ -254,6 +273,7 @@ class CommonException:
|
|||
self.at = ex['exception-at']
|
||||
self.svns = set([ex['svn-version']])
|
||||
self.users = set([ex['user-name']])
|
||||
self.userError = ex['is-user-exception']
|
||||
self.counts = 1
|
||||
self.times = set([decodeTime(ex['start-time'])])
|
||||
self.walkers = set([ex['walker-name']])
|
||||
|
|
@ -285,11 +305,17 @@ class CommonException:
|
|||
return ','.join(s)
|
||||
|
||||
def duration(self):
|
||||
x = sorted(self.times)
|
||||
return "-".join(map(lambda x: x.strftime("%m/%d/%y"), [x[0], x[-1]]))
|
||||
x = sorted(filter(lambda x: x != "ND", self.times))
|
||||
if len(x) >= 2:
|
||||
return "-".join(map(lambda x: x.strftime("%m/%d/%y"), [x[0], x[-1]]))
|
||||
elif len(x) == 1:
|
||||
return x[0]
|
||||
else:
|
||||
return "ND"
|
||||
|
||||
|
||||
def toStrings(self):
|
||||
return [self.bestExample(self.msgs), self.at, self.setString(self.svns), self.setString(self.walkers), self.counts, self.setString(self.ids), self.duration(), self.setString(self.users)]
|
||||
return [self.bestExample(self.msgs), self.at, self.setString(self.svns), self.setString(self.walkers), self.counts, self.setString(self.ids), self.duration(), self.setString(self.users), self.userError]
|
||||
|
||||
addHandler('exceptions', ExceptionReport)
|
||||
|
||||
|
|
@ -354,7 +380,10 @@ def resolveFiles(paths):
|
|||
return allFiles
|
||||
|
||||
def decodeTime(time):
|
||||
return datetime.datetime.strptime(time.split()[0], "%Y/%m/%d")
|
||||
if time == "ND":
|
||||
return "ND"
|
||||
else:
|
||||
return datetime.datetime.strptime(time.split()[0], "%Y/%m/%d")
|
||||
#return datetime.datetime.strptime(time, "%Y/%m/%d %H.%M.%S")
|
||||
|
||||
def passesFilters(elt):
|
||||
|
|
@ -377,7 +406,12 @@ def readReports(files):
|
|||
for file in files:
|
||||
if OPTIONS.verbose: print 'Reading file', file
|
||||
input = openFile(file)
|
||||
tree = ElementTree(file=input)
|
||||
try:
|
||||
tree = ElementTree(file=input)
|
||||
except:
|
||||
print "EXCEPTING FILE", file
|
||||
raise
|
||||
|
||||
elem = tree.getroot()
|
||||
if elem.tag == RUN_REPORT_LIST:
|
||||
for sub in elem:
|
||||
|
|
|
|||
Loading…
Reference in New Issue