improvements to the report code
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4280 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4e83ba411f
commit
b57a0a0310
|
|
@ -6,7 +6,7 @@ if (! is.na(args[3]) ) { name = args[3] } else { name = "" }
|
||||||
if ( onCMDLine ) {
|
if ( onCMDLine ) {
|
||||||
print(paste("Reading data from", args[1]))
|
print(paste("Reading data from", args[1]))
|
||||||
d = read.table(args[1], header=T, sep="\t")
|
d = read.table(args[1], header=T, sep="\t")
|
||||||
d$start.time = as.Date(d$start.time)
|
#d$start.time = as.Date(d$start.time)
|
||||||
d$end.time = as.Date(d$end.time)
|
d$end.time = as.Date(d$end.time)
|
||||||
} # only read into d if its' available, otherwise assume the data is already loaded
|
} # only read into d if its' available, otherwise assume the data is already loaded
|
||||||
|
|
||||||
|
|
@ -14,11 +14,26 @@ reportCountingPlot <- function(values, name, moreMargin = 0, ...) {
|
||||||
par(las=2) # make label text perpendicular to axis
|
par(las=2) # make label text perpendicular to axis
|
||||||
oldMar <- par("mar")
|
oldMar <- par("mar")
|
||||||
par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin.
|
par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin.
|
||||||
barplot(sort(table(values)), horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", ...)
|
barplot(sort(table(factor(values))), horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", ...)
|
||||||
par("mar" = oldMar)
|
par("mar" = oldMar)
|
||||||
par("las" = 1)
|
par("las" = 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reportConditionalCountingPlot <- function(values, conditions, name, moreMargin = 0, ...) {
|
||||||
|
par(las=2) # make label text perpendicular to axis
|
||||||
|
oldMar <- par("mar")
|
||||||
|
par(mar=c(5,8+moreMargin,4,2)) # increase y-axis margin.
|
||||||
|
t = table(values, conditions)
|
||||||
|
t = t[, order(colSums(t))]
|
||||||
|
print(list(t = t))
|
||||||
|
nconds = dim(t)[2]
|
||||||
|
cols = rainbow(nconds)
|
||||||
|
barplot(t, legend.text = T, horiz=TRUE, cex.names = 0.5, main = name, xlab="Counts", col=cols, cex=0.5, ...)
|
||||||
|
par("mar" = oldMar)
|
||||||
|
par("las" = 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
reportHist <- function(values, name, ...) {
|
reportHist <- function(values, name, ...) {
|
||||||
if ( ! all(is.na(values) ) )
|
if ( ! all(is.na(values) ) )
|
||||||
hist(values, main=name, 20, xlab="", col="cornflowerblue", ...)
|
hist(values, main=name, 20, xlab="", col="cornflowerblue", ...)
|
||||||
|
|
@ -37,18 +52,20 @@ myTable <- function(x, y, reqRowNonZero = F) {
|
||||||
|
|
||||||
# todo -- must be robust to smaller sizes
|
# todo -- must be robust to smaller sizes
|
||||||
|
|
||||||
plotTable <- function(table, name) {
|
plotTable <- function(table, name, ...) {
|
||||||
ncols = dim(table)[2]
|
ncols = dim(table)[2]
|
||||||
nrows = dim(table)[1]
|
nrows = dim(table)[1]
|
||||||
cols = rainbow(nrows)
|
if ( ! is.null(nrows) ) {
|
||||||
tableMin = min(apply(table, 2, min))
|
cols = rainbow(nrows)
|
||||||
tableMax = max(apply(table, 2, max))
|
tableMin = min(apply(table, 2, min))
|
||||||
plot( as.numeric(apply(table, 2, sum)), ylim=c(tableMin, tableMax), type="n", main = name, ylab="Frequency", xlab="Date", xaxt="n")
|
tableMax = max(apply(table, 2, max))
|
||||||
axis(1, 1:ncols, labels=colnames(table))
|
plot( as.numeric(apply(table, 2, sum)), ylim=c(tableMin, tableMax), type="n", main = name, ylab="Frequency", xlab="Date", xaxt="n", ...)
|
||||||
for ( i in 1:nrows )
|
axis(1, 1:ncols, labels=colnames(table))
|
||||||
points(table[i,], type="b", col=cols[i])
|
for ( i in 1:nrows )
|
||||||
legend("topright", row.names(table), fill=cols, cex=0.5)
|
points(table[i,], type="b", col=cols[i])
|
||||||
#return(table)
|
legend("topright", row.names(table), fill=cols, cex=0.5)
|
||||||
|
#return(table)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RUNNING_GATK_RUNTIME <- 60 * 5 # 5 minutes => bad failure
|
RUNNING_GATK_RUNTIME <- 60 * 5 # 5 minutes => bad failure
|
||||||
|
|
@ -61,38 +78,61 @@ successfulRuns <- function(d) {
|
||||||
return(x)
|
return(x)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addSection <- function(name) {
|
||||||
|
par("mar", c(5, 4, 4, 2))
|
||||||
|
frame()
|
||||||
|
title(name, cex=2)
|
||||||
|
}
|
||||||
|
|
||||||
generateOneReport <- function(d, header, includeByWeek = T) {
|
generateOneReport <- function(d, header, includeByWeek = T) {
|
||||||
head <- function(s) {
|
head <- function(s) {
|
||||||
return(paste("Section:", header, ":", s))
|
return(paste("Section:", header, "\n", s))
|
||||||
}
|
}
|
||||||
|
|
||||||
excepted <- subset(d, exception.msg != "NA")
|
excepted <- subset(d, exception.msg != "NA")
|
||||||
badExcepted <- subset(excepted, run.time > RUNNING_GATK_RUNTIME)
|
UserExceptions <- subset(excepted, is.user.exception == "true")
|
||||||
|
StingExceptions <- subset(excepted, is.user.exception == "false" | is.user.exception == "NA" | is.na(is.user.exception))
|
||||||
|
|
||||||
|
addSection(paste("GATK run report", name, "for", Sys.Date(), "\nwith", dim(d)[1], "run repository records"))
|
||||||
|
|
||||||
|
reportCountingPlot(d$walker.name, head("Walker invocations"))
|
||||||
|
reportConditionalCountingPlot(d$user.name, d$walker.name, head("Walker invocations by user"))
|
||||||
|
reportCountingPlot(d$svn.version, head("SVN version"))
|
||||||
|
reportConditionalCountingPlot(d$svn.version, d$user.name, head("SVN by user"))
|
||||||
|
|
||||||
par("mar", c(5, 4, 4, 2))
|
|
||||||
frame()
|
|
||||||
title(paste("GATK run report", name, "for", Sys.Date(), "\nwith", dim(d)[1], "run repository records"), cex=2)
|
|
||||||
|
|
||||||
# cuts by time
|
# cuts by time
|
||||||
if ( includeByWeek ) {
|
if ( includeByWeek ) {
|
||||||
plotTable(table(rep("GATK Invocations", length(d$start.time)), cut(d$start.time, "weeks")), head("GATK Invocations by week"))
|
plotTable(table(rep("GATK Invocations", length(d$end.time)), cut(d$end.time, "weeks")), head("GATK Invocations by week"))
|
||||||
plotTable(myTable(successfulRuns(d), cut(d$start.time, "weeks")), head("Successful and failing GATK invocations per week"))
|
plotTable(myTable(successfulRuns(d), cut(d$end.time, "weeks")), head("Successful and failing GATK invocations per week"))
|
||||||
|
|
||||||
plotTable(myTable(d$svn.version, cut(d$start.time, "weeks")), head("SVN version by week"))
|
plotTable(myTable(d$svn.version, cut(d$end.time, "weeks")), head("SVN version by week"))
|
||||||
plotTable(myTable(excepted$walker.name, cut(excepted$start.time, "weeks"), reqRowNonZero = T), head("Walkers with exceptions by week"))
|
|
||||||
}
|
}
|
||||||
plotTable(table(rep("GATK Invocations", length(d$start.time)), d$start.time), head("GATK Invocations by day"))
|
plotTable(table(rep("GATK Invocations", length(d$end.time)), d$end.time), head("GATK Invocations by day"))
|
||||||
plotTable(myTable(d$svn.version, d$start.time), head("SVN version by day"))
|
plotTable(myTable(d$svn.version, d$end.time), head("SVN version by day"))
|
||||||
|
|
||||||
reportCountingPlot(d$walker.name, head("Walker invocations"))
|
#
|
||||||
reportCountingPlot(d$svn.version, head("GATK SVN version"))
|
# Exception handling
|
||||||
|
#
|
||||||
|
addExceptionSection <- function(subd, subname, exceptionColor) {
|
||||||
|
addSection(paste(subname))
|
||||||
|
#print(list(subd = length(subd$end.time), name=subname))
|
||||||
|
reportCountingPlot(subd$walker.name, head(paste("Walkers with", subname)), col=exceptionColor)
|
||||||
|
reportCountingPlot(subd$exception.at, head(paste(subname, "locations")), 12, col=exceptionColor)
|
||||||
|
reportCountingPlot(subd$exception.msg, head(paste(subname, "messages")), 12, col=exceptionColor)
|
||||||
|
reportConditionalCountingPlot(subd$user.name, subd$exception.at, head("Walker invocations by user"), 12)
|
||||||
|
|
||||||
|
if ( includeByWeek && length(subd$end.time) > 0 ) {
|
||||||
|
plotTable(myTable(subd$walker.name, cut(subd$end.time, "weeks"), reqRowNonZero = T), head(paste("Walkers with", subname,"by week")), col=exceptionColor)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
addExceptionSection(excepted, "Exceptions", "grey")
|
||||||
|
reportCountingPlot(excepted$user.name, head("Usernames generating exceptions"), col="grey")
|
||||||
|
|
||||||
|
addExceptionSection(StingExceptions, "StingExceptions", "red")
|
||||||
|
addExceptionSection(UserExceptions, "UserExceptions", "blue")
|
||||||
|
|
||||||
# reportCountingPlot(d$java.tmp.directory, head("Java tmp directory"))
|
|
||||||
reportCountingPlot(d$working.directory, head("Working directory"))
|
|
||||||
reportCountingPlot(d$user.name, head("user"))
|
|
||||||
reportCountingPlot(d$host.name, head("host"))
|
|
||||||
reportCountingPlot(d$java, head("Java version"))
|
|
||||||
reportCountingPlot(d$machine, head("Machine"))
|
|
||||||
|
|
||||||
Gb <- 1024^3
|
Gb <- 1024^3
|
||||||
reportHist(d$total.memory / Gb, head("Used memory"))
|
reportHist(d$total.memory / Gb, head("Used memory"))
|
||||||
|
|
@ -100,21 +140,20 @@ generateOneReport <- function(d, header, includeByWeek = T) {
|
||||||
|
|
||||||
min <- 60
|
min <- 60
|
||||||
reportHist(log10(d$run.time / min), head("Run time (log10[min])"))
|
reportHist(log10(d$run.time / min), head("Run time (log10[min])"))
|
||||||
|
|
||||||
exceptionColor = "red"
|
reportCountingPlot(d$user.name, head("user"))
|
||||||
reportCountingPlot(excepted$walker.name, head("Walker exceptions"), col=exceptionColor)
|
reportCountingPlot(d$host.name, head("host"))
|
||||||
reportCountingPlot(subset(excepted, run.time > RUNNING_GATK_RUNTIME)$walker.name, paste(head("Long-running walker exceptions (>"),RUNNING_GATK_RUNTIME,"seconds runtime)"), col=exceptionColor)
|
|
||||||
reportCountingPlot(subset(excepted, run.time < RUNNING_GATK_RUNTIME)$walker.name, paste(head("Start-up walker exceptions (<"),RUNNING_GATK_RUNTIME,"seconds runtime)"), col=exceptionColor)
|
reportCountingPlot(d$java, head("Java version"))
|
||||||
reportCountingPlot(excepted$user.name, head("Usernames generating exceptions"), col=exceptionColor)
|
reportCountingPlot(d$machine, head("Machine"))
|
||||||
reportCountingPlot(excepted$exception.msg, head("Exception messages"), 12)
|
reportCountingPlot(d$working.directory, head("Working directory"))
|
||||||
reportCountingPlot(excepted$exception.at, head("Exception locations"), 12)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
RUNME = T
|
RUNME = T
|
||||||
if ( RUNME ) {
|
if ( RUNME ) {
|
||||||
lastWeek = levels(cut(d$start.time, "weeks"))[-1]
|
lastWeek = levels(cut(d$end.time, "weeks"))[-1]
|
||||||
generateOneReport(d, "Overall")
|
generateOneReport(d, "Overall")
|
||||||
#generateOneReport(subset(d, start.time >= lastWeek), "Just last week to date", includeByWeek = F)
|
#generateOneReport(subset(d, end.time >= lastWeek), "Just last week to date", includeByWeek = F)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( onCMDLine ) dev.off()
|
if ( onCMDLine ) dev.off()
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ from itertools import *
|
||||||
from xml.etree.ElementTree import *
|
from xml.etree.ElementTree import *
|
||||||
import gzip
|
import gzip
|
||||||
import datetime
|
import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
MISSING_VALUE = "NA"
|
MISSING_VALUE = "NA"
|
||||||
RUN_REPORT_LIST = "GATK-run-reports"
|
RUN_REPORT_LIST = "GATK-run-reports"
|
||||||
|
|
@ -101,9 +102,20 @@ def eltIsException(elt):
|
||||||
def parseException(elt):
|
def parseException(elt):
|
||||||
msgElt = elt.find("message")
|
msgElt = elt.find("message")
|
||||||
msgText = "MISSING"
|
msgText = "MISSING"
|
||||||
|
userException = "NA"
|
||||||
if msgElt != None: msgText = msgElt.text
|
if msgElt != None: msgText = msgElt.text
|
||||||
return msgText, elt.find("stacktrace").find("string").text
|
stackTrace = elt.find("stacktrace").find("string").text
|
||||||
|
if elt.find("is-user-exception") != None:
|
||||||
|
#print elt.find("is-user-exception")
|
||||||
|
userException = elt.find("is-user-exception").text
|
||||||
|
return msgText, stackTrace, userException
|
||||||
|
|
||||||
|
def javaExceptionFile(javaException):
|
||||||
|
m = re.search("\((.*\.java:.*)\)", javaException)
|
||||||
|
if m != None:
|
||||||
|
return m.group(1)
|
||||||
|
else:
|
||||||
|
javaException
|
||||||
|
|
||||||
class RecordDecoder:
|
class RecordDecoder:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
@ -118,6 +130,12 @@ class RecordDecoder:
|
||||||
|
|
||||||
def formatExceptionAt(elt):
|
def formatExceptionAt(elt):
|
||||||
return '%s' % parseException(elt)[1]
|
return '%s' % parseException(elt)[1]
|
||||||
|
|
||||||
|
def formatExceptionAtBrief(elt):
|
||||||
|
return '%s' % javaExceptionFile(parseException(elt)[1])
|
||||||
|
|
||||||
|
def formatExceptionUser(elt):
|
||||||
|
return '%s' % parseException(elt)[2]
|
||||||
|
|
||||||
def add(names, func):
|
def add(names, func):
|
||||||
for name in names:
|
for name in names:
|
||||||
|
|
@ -132,7 +150,7 @@ class RecordDecoder:
|
||||||
add(["run-time", "java-tmp-directory", "working-directory", "user-name", "host-name"], id)
|
add(["run-time", "java-tmp-directory", "working-directory", "user-name", "host-name"], id)
|
||||||
add(["java", "machine"], toString)
|
add(["java", "machine"], toString)
|
||||||
add(["max-memory", "total-memory", "iterations", "reads"], id)
|
add(["max-memory", "total-memory", "iterations", "reads"], id)
|
||||||
addComplex("exception", ["exception-msg", "exception-at"], [formatExceptionMsg, formatExceptionAt])
|
addComplex("exception", ["exception-msg", "exception-at", "exception-at-brief", "is-user-exception"], [formatExceptionMsg, formatExceptionAt, formatExceptionAtBrief, formatExceptionUser])
|
||||||
# add(["command-line"], toString)
|
# add(["command-line"], toString)
|
||||||
|
|
||||||
def decode(self, report):
|
def decode(self, report):
|
||||||
|
|
@ -234,17 +252,18 @@ class ExceptionReport(StageHandler):
|
||||||
commonExceptions = sorted(commonExceptions, None, lambda x: x.counts)
|
commonExceptions = sorted(commonExceptions, None, lambda x: x.counts)
|
||||||
|
|
||||||
for common in commonExceptions:
|
for common in commonExceptions:
|
||||||
msg, at, svns, walkers, counts, ids, duration, users = common.toStrings()
|
msg, at, svns, walkers, counts, ids, duration, users, userError = common.toStrings()
|
||||||
|
|
||||||
print >> self.out, ''.join(['*'] * 80)
|
print >> self.out, ''.join(['*'] * 80)
|
||||||
print >> self.out, 'Exception :', msg
|
print >> self.out, 'Exception :', msg
|
||||||
print >> self.out, ' at :', at
|
print >> self.out, ' is-user-exception? :', userError
|
||||||
print >> self.out, ' walkers :', walkers
|
print >> self.out, ' at :', at
|
||||||
print >> self.out, ' svns :', svns
|
print >> self.out, ' walkers :', walkers
|
||||||
print >> self.out, ' duration :', duration
|
print >> self.out, ' svns :', svns
|
||||||
print >> self.out, ' occurrences :', counts
|
print >> self.out, ' duration :', duration
|
||||||
print >> self.out, ' users :', users
|
print >> self.out, ' occurrences :', counts
|
||||||
print >> self.out, ' ids :', ids
|
print >> self.out, ' users :', users
|
||||||
|
print >> self.out, ' ids :', ids
|
||||||
|
|
||||||
class CommonException:
|
class CommonException:
|
||||||
MAX_SET_ITEMS_TO_SHOW = 5
|
MAX_SET_ITEMS_TO_SHOW = 5
|
||||||
|
|
@ -254,6 +273,7 @@ class CommonException:
|
||||||
self.at = ex['exception-at']
|
self.at = ex['exception-at']
|
||||||
self.svns = set([ex['svn-version']])
|
self.svns = set([ex['svn-version']])
|
||||||
self.users = set([ex['user-name']])
|
self.users = set([ex['user-name']])
|
||||||
|
self.userError = ex['is-user-exception']
|
||||||
self.counts = 1
|
self.counts = 1
|
||||||
self.times = set([decodeTime(ex['start-time'])])
|
self.times = set([decodeTime(ex['start-time'])])
|
||||||
self.walkers = set([ex['walker-name']])
|
self.walkers = set([ex['walker-name']])
|
||||||
|
|
@ -285,11 +305,17 @@ class CommonException:
|
||||||
return ','.join(s)
|
return ','.join(s)
|
||||||
|
|
||||||
def duration(self):
|
def duration(self):
|
||||||
x = sorted(self.times)
|
x = sorted(filter(lambda x: x != "ND", self.times))
|
||||||
return "-".join(map(lambda x: x.strftime("%m/%d/%y"), [x[0], x[-1]]))
|
if len(x) >= 2:
|
||||||
|
return "-".join(map(lambda x: x.strftime("%m/%d/%y"), [x[0], x[-1]]))
|
||||||
|
elif len(x) == 1:
|
||||||
|
return x[0]
|
||||||
|
else:
|
||||||
|
return "ND"
|
||||||
|
|
||||||
|
|
||||||
def toStrings(self):
|
def toStrings(self):
|
||||||
return [self.bestExample(self.msgs), self.at, self.setString(self.svns), self.setString(self.walkers), self.counts, self.setString(self.ids), self.duration(), self.setString(self.users)]
|
return [self.bestExample(self.msgs), self.at, self.setString(self.svns), self.setString(self.walkers), self.counts, self.setString(self.ids), self.duration(), self.setString(self.users), self.userError]
|
||||||
|
|
||||||
addHandler('exceptions', ExceptionReport)
|
addHandler('exceptions', ExceptionReport)
|
||||||
|
|
||||||
|
|
@ -354,7 +380,10 @@ def resolveFiles(paths):
|
||||||
return allFiles
|
return allFiles
|
||||||
|
|
||||||
def decodeTime(time):
|
def decodeTime(time):
|
||||||
return datetime.datetime.strptime(time.split()[0], "%Y/%m/%d")
|
if time == "ND":
|
||||||
|
return "ND"
|
||||||
|
else:
|
||||||
|
return datetime.datetime.strptime(time.split()[0], "%Y/%m/%d")
|
||||||
#return datetime.datetime.strptime(time, "%Y/%m/%d %H.%M.%S")
|
#return datetime.datetime.strptime(time, "%Y/%m/%d %H.%M.%S")
|
||||||
|
|
||||||
def passesFilters(elt):
|
def passesFilters(elt):
|
||||||
|
|
@ -377,7 +406,12 @@ def readReports(files):
|
||||||
for file in files:
|
for file in files:
|
||||||
if OPTIONS.verbose: print 'Reading file', file
|
if OPTIONS.verbose: print 'Reading file', file
|
||||||
input = openFile(file)
|
input = openFile(file)
|
||||||
tree = ElementTree(file=input)
|
try:
|
||||||
|
tree = ElementTree(file=input)
|
||||||
|
except:
|
||||||
|
print "EXCEPTING FILE", file
|
||||||
|
raise
|
||||||
|
|
||||||
elem = tree.getroot()
|
elem = tree.getroot()
|
||||||
if elem.tag == RUN_REPORT_LIST:
|
if elem.tag == RUN_REPORT_LIST:
|
||||||
for sub in elem:
|
for sub in elem:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue