Better reporting and now with a special mode for listing exceptions
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4183 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cdad243645
commit
0c54bf4195
|
|
@ -34,6 +34,8 @@ myTable <- function(x, y, reqRowNonZero = F) {
|
|||
return(table)
|
||||
}
|
||||
|
||||
# todo -- must be robust to smaller sizes
|
||||
|
||||
plotTable <- function(table, name) {
|
||||
ncols = dim(table)[2]
|
||||
nrows = dim(table)[1]
|
||||
|
|
@ -47,25 +49,34 @@ plotTable <- function(table, name) {
|
|||
}
|
||||
|
||||
RUNNING_GATK_RUNTIME <- 60 * 5 # 5 minutes => bad failure
|
||||
excepted <- subset(d, exception.msg != "NA")
|
||||
badExcepted <- subset(excepted, run.time > RUNNING_GATK_RUNTIME)
|
||||
|
||||
if ( onCMDLine ) pdf(args[2])
|
||||
|
||||
generateOneReport <- function(d, header) {
|
||||
generateOneReport <- function(d, header, includeByWeek = T) {
|
||||
head <- function(s) {
|
||||
return(paste("Section:", header, ":", s))
|
||||
}
|
||||
|
||||
excepted <- subset(d, exception.msg != "NA")
|
||||
badExcepted <- subset(excepted, run.time > RUNNING_GATK_RUNTIME)
|
||||
|
||||
par("mar", c(5, 4, 4, 2))
|
||||
frame()
|
||||
title(paste("Section:", header), cex=2)
|
||||
|
||||
reportCountingPlot(d$walker.name, head("Walker invocations"))
|
||||
reportCountingPlot(d$svn.version, head("GATK SVN version"))
|
||||
reportCountingPlot(d$java.tmp.directory, head("Java tmp directory"))
|
||||
|
||||
# cuts by time
|
||||
plotTable(myTable(d$svn.version, d$start.time), head("SVN version by day"))
|
||||
if ( includeByWeek ) {
|
||||
plotTable(myTable(d$svn.version, cut(d$start.time, "weeks")), head("SVN version by week"))
|
||||
plotTable(myTable(excepted$walker.name, cut(excepted$start.time, "weeks"), reqRowNonZero = T), head("Walkers with exceptions by week"))
|
||||
}
|
||||
|
||||
# reportCountingPlot(d$java.tmp.directory, head("Java tmp directory"))
|
||||
reportCountingPlot(d$working.directory, head("Working directory"))
|
||||
reportCountingPlot(d$user.name, head("User"))
|
||||
reportCountingPlot(d$user.name, head("user"))
|
||||
reportCountingPlot(d$host.name, head("host"))
|
||||
reportCountingPlot(d$java, head("Java version"))
|
||||
reportCountingPlot(d$machine, head("Machine"))
|
||||
|
|
@ -88,15 +99,9 @@ generateOneReport <- function(d, header) {
|
|||
|
||||
RUNME = T
|
||||
if ( RUNME ) {
|
||||
generateOneReport(d, "Overall")
|
||||
|
||||
lastWeek = levels(cut(d$start.time, "weeks"))[-1]
|
||||
generateOneReport(subset(d, start.time == lastWeek), "Just last week to date")
|
||||
|
||||
# cuts by time
|
||||
plotTable(myTable(d$svn.version, d$start.time), "SVN version by day")
|
||||
plotTable(myTable(d$svn.version, cut(d$start.time, "weeks")), "SVN version by week")
|
||||
plotTable(myTable(excepted$walker.name, cut(excepted$start.time, "weeks"), reqRowNonZero = T), "Walkers with exceptions by week")
|
||||
lastWeek = levels(cut(d$start.time, "weeks"))[-1]
|
||||
generateOneReport(d, "Overall")
|
||||
#generateOneReport(subset(d, start.time >= lastWeek), "Just last week to date", includeByWeek = F)
|
||||
}
|
||||
|
||||
if ( onCMDLine ) dev.off()
|
||||
|
|
|
|||
|
|
@ -8,6 +8,13 @@ import itertools
|
|||
import re
|
||||
import vcfReader
|
||||
import string
|
||||
import gzip
|
||||
|
||||
def openMaybeGZ(filename):
|
||||
if ( filename.endswith(".gz") ):
|
||||
return gzip.open(filename)
|
||||
else:
|
||||
return open(filename)
|
||||
|
||||
def average(l):
|
||||
sum = reduce(operator.add, l, 0)
|
||||
|
|
@ -35,7 +42,7 @@ class Sample:
|
|||
|
||||
def flatFileIterator(file, fields = None, skip = 0):
|
||||
count = 0
|
||||
for line in open(file):
|
||||
for line in openMaybeGZ(file):
|
||||
count += 1
|
||||
if count > skip:
|
||||
s = map(string.strip, line.split('\t'))
|
||||
|
|
@ -99,7 +106,7 @@ def findVariantEvalResults(key, file, type=str):
|
|||
else:
|
||||
return None
|
||||
|
||||
return [val for val in map(capture1, open(file)) if val != None]
|
||||
return [val for val in map(capture1, openMaybeGZ(file)) if val != None]
|
||||
|
||||
|
||||
def getDBSNPRate(file):
|
||||
|
|
@ -121,7 +128,7 @@ def countMappedBases(samples, alignmentIndex):
|
|||
if ( OPTIONS.coverageFile != None ):
|
||||
# read from summary file, looking for the line:
|
||||
# Total 340710 1187.14 N/A N/A N/A
|
||||
for parts in map( string.split, open(OPTIONS.coverageFile) ):
|
||||
for parts in map( string.split, openMaybeGZ(OPTIONS.coverageFile) ):
|
||||
if parts[0] == "Total":
|
||||
return -1, int(parts[1])
|
||||
else:
|
||||
|
|
@ -156,7 +163,7 @@ def countSNPs(samples, snpsVCF, useIndels = False):
|
|||
total = 0
|
||||
novel = 0
|
||||
|
||||
header, columnNames, remainingLines = vcfReader.readVCFHeader(open(snpsVCF))
|
||||
header, columnNames, remainingLines = vcfReader.readVCFHeader(openMaybeGZ(snpsVCF))
|
||||
sampleIDs = columnNames[9:]
|
||||
|
||||
print 'Counting SNPs...'
|
||||
|
|
@ -204,7 +211,7 @@ def countIndels(samples, indelsVCF):
|
|||
|
||||
def readSamples(vcf):
|
||||
print 'Reading samples for', OPTIONS.population
|
||||
header, columnNames, remainingLines = vcfReader.readVCFHeader(open(vcf))
|
||||
header, columnNames, remainingLines = vcfReader.readVCFHeader(openMaybeGZ(vcf))
|
||||
samples = map(Sample, columnNames[9:])
|
||||
if ( OPTIONS.onlySample != None ):
|
||||
samples = filter( lambda x: x.getName() == OPTIONS.onlySample, samples )
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from optparse import OptionParser
|
|||
from itertools import *
|
||||
from xml.etree.ElementTree import *
|
||||
import gzip
|
||||
import datetime
|
||||
|
||||
MISSING_VALUE = "NA"
|
||||
RUN_REPORT_LIST = "GATK-run-reports"
|
||||
|
|
@ -12,6 +13,7 @@ def main():
|
|||
global OPTIONS
|
||||
usage = "usage: %prog [options] mode file1 ... fileN"
|
||||
parser = OptionParser(usage=usage)
|
||||
|
||||
parser.add_option("-v", "--verbose", dest="verbose",
|
||||
action='store_true', default=False,
|
||||
help="If provided, verbose progress will be enabled")
|
||||
|
|
@ -19,6 +21,14 @@ def main():
|
|||
parser.add_option("-o", "--o", dest="output",
|
||||
type='string', default=None,
|
||||
help="if provided, output will go here instead of stdout")
|
||||
|
||||
parser.add_option("", "--no-dev", dest="noDev",
|
||||
action='store_true', default=False,
|
||||
help="if provided, only records not coming from a dev version of GATK will be included")
|
||||
|
||||
parser.add_option("", "--max_days", dest="maxDays",
|
||||
type='int', default=None,
|
||||
help="if provided, only records generated within X days of today will be included")
|
||||
|
||||
(OPTIONS, args) = parser.parse_args()
|
||||
if len(args) == 0:
|
||||
|
|
@ -44,7 +54,7 @@ def main():
|
|||
counter += 1
|
||||
|
||||
handler.finalize(files)
|
||||
out.close()
|
||||
if OPTIONS.output != None: out.close()
|
||||
print 'Processed records:', counter
|
||||
|
||||
#
|
||||
|
|
@ -74,24 +84,27 @@ def addHandler(name, handler):
|
|||
|
||||
def getHandler(stage):
|
||||
return HANDLERS[stage]
|
||||
|
||||
def eltIsException(elt):
|
||||
return elt.tag == "exception"
|
||||
|
||||
# def
|
||||
class RecordAsTable(StageHandler):
|
||||
def __init__(self, name, out):
|
||||
StageHandler.__init__(self, name, out)
|
||||
|
||||
def initialize(self, args):
|
||||
def parseException(elt):
|
||||
return elt.find("message").text, elt.find("stacktrace").find("string").text
|
||||
|
||||
|
||||
class RecordDecoder:
|
||||
def __init__(self):
|
||||
self.fields = list()
|
||||
self.formatters = dict()
|
||||
|
||||
def id(elt): return elt.text
|
||||
def toString(elt): return '"%s"' % elt.text
|
||||
def toString(elt): return '%s' % elt.text
|
||||
|
||||
def formatExceptionMsg(elt):
|
||||
return '"%s"' % elt.find("message").text
|
||||
return '%s' % parseException(elt)[0]
|
||||
|
||||
def formatExceptionAt(elt):
|
||||
return '"%s"' % elt.find("stacktrace").find("string").text
|
||||
return '%s' % parseException(elt)[1]
|
||||
|
||||
def add(names, func):
|
||||
for name in names:
|
||||
|
|
@ -107,31 +120,45 @@ class RecordAsTable(StageHandler):
|
|||
add(["java", "machine"], toString)
|
||||
add(["max-memory", "total-memory", "iterations", "reads"], id)
|
||||
addComplex("exception", ["exception-msg", "exception-at"], [formatExceptionMsg, formatExceptionAt])
|
||||
# add(["command-line"], toString)
|
||||
# add(["command-line"], toString)
|
||||
|
||||
def decode(self, report):
|
||||
bindings = dict()
|
||||
for elt in report:
|
||||
if elt.tag in self.formatters:
|
||||
fieldFormats = self.formatters[elt.tag]
|
||||
# we actually care about this tag
|
||||
for field, formatter in fieldFormats:
|
||||
bindings[field] = formatter(elt)
|
||||
|
||||
# add missing data
|
||||
for field in self.fields:
|
||||
if field not in bindings:
|
||||
bindings[field] = MISSING_VALUE
|
||||
|
||||
return bindings
|
||||
|
||||
# def
|
||||
class RecordAsTable(StageHandler):
|
||||
def __init__(self, name, out):
|
||||
StageHandler.__init__(self, name, out)
|
||||
|
||||
print >> self.out, "\t".join(self.fields)
|
||||
def initialize(self, args):
|
||||
self.decoder = RecordDecoder()
|
||||
print >> self.out, "\t".join(self.decoder.fields)
|
||||
|
||||
def processRecord(self, record):
|
||||
parsed = parseReport(record, self.formatters)
|
||||
parsed = self.decoder.decode(record)
|
||||
|
||||
def oneField(field):
|
||||
val = MISSING_VALUE
|
||||
if field in parsed:
|
||||
val = parsed[field]
|
||||
if val.find(" ") != -1:
|
||||
val = "\"" + val + "\""
|
||||
return val
|
||||
|
||||
print >> self.out, "\t".join([ oneField(field) for field in self.fields ])
|
||||
|
||||
def parseReport(report, allFormatters):
|
||||
bindings = dict()
|
||||
for elt in report:
|
||||
if elt.tag in allFormatters:
|
||||
fieldFormats = allFormatters[elt.tag]
|
||||
# we actually care about this tag
|
||||
for field, formatter in fieldFormats:
|
||||
bindings[field] = formatter(elt)
|
||||
return bindings
|
||||
|
||||
print >> self.out, "\t".join([ oneField(field) for field in self.decoder.fields ])
|
||||
|
||||
addHandler('table', RecordAsTable)
|
||||
|
||||
|
|
@ -161,7 +188,125 @@ class Archive(RecordAsXML):
|
|||
|
||||
addHandler('archive', Archive)
|
||||
|
||||
class ExceptionReport(StageHandler):
|
||||
#FIELDS = ["Msg", "At", "SVN.versions", "Walkers", 'Occurrences', 'IDs']
|
||||
def __init__(self, name, out):
|
||||
StageHandler.__init__(self, name, out)
|
||||
self.exceptions = []
|
||||
|
||||
def initialize(self, args):
|
||||
self.decoder = RecordDecoder()
|
||||
#print >> self.out, "\t".join(self.FIELDS)
|
||||
|
||||
def processRecord(self, record):
|
||||
for elt in record:
|
||||
if eltIsException(elt):
|
||||
self.exceptions.append(self.decoder.decode(record))
|
||||
break
|
||||
|
||||
def finalize(self, args):
|
||||
commonExceptions = list()
|
||||
|
||||
def addToCommons(ex):
|
||||
for common in commonExceptions:
|
||||
if common.equals(ex):
|
||||
common.update(ex)
|
||||
return
|
||||
commonExceptions.append(CommonException(ex))
|
||||
|
||||
for ex in self.exceptions:
|
||||
addToCommons(ex)
|
||||
commonExceptions = sorted(commonExceptions, None, lambda x: x.counts)
|
||||
|
||||
for common in commonExceptions:
|
||||
msg, at, svns, walkers, counts, ids, duration = common.toStrings()
|
||||
|
||||
print >> self.out, ''.join(['*'] * 80)
|
||||
print >> self.out, 'Exception :', msg
|
||||
print >> self.out, ' at :', at
|
||||
print >> self.out, ' walkers :', walkers
|
||||
print >> self.out, ' svns :', svns
|
||||
print >> self.out, ' duration :', duration
|
||||
print >> self.out, ' occurrences :', counts
|
||||
print >> self.out, ' ids :', ids
|
||||
|
||||
class CommonException:
|
||||
MAX_SET_ITEMS_TO_SHOW = 5
|
||||
|
||||
def __init__(self, ex):
|
||||
self.msgs = set([ex['exception-msg']])
|
||||
self.at = ex['exception-at']
|
||||
self.svns = set([ex['svn-version']])
|
||||
self.counts = 1
|
||||
self.times = set([decodeTime(ex['start-time'])])
|
||||
self.walkers = set([ex['walker-name']])
|
||||
self.ids = set([ex['id']])
|
||||
|
||||
def equals(self, ex):
|
||||
return self.at == ex['exception-at']
|
||||
|
||||
def update(self, ex):
|
||||
self.msgs.add(ex['exception-msg'])
|
||||
self.svns.add(ex['svn-version'])
|
||||
self.counts += 1
|
||||
self.walkers.add(ex['walker-name'])
|
||||
self.times.add(decodeTime(ex['start-time']))
|
||||
self.ids.add(ex['id'])
|
||||
|
||||
def bestExample(self, examples):
|
||||
def takeShorter(x, y):
|
||||
if len(y) < len(x):
|
||||
return y
|
||||
else:
|
||||
return x
|
||||
return reduce(takeShorter, examples)
|
||||
|
||||
def setString(self, s):
|
||||
if len(s) > self.MAX_SET_ITEMS_TO_SHOW:
|
||||
s = [x for x in s][0:self.MAX_SET_ITEMS_TO_SHOW] + ["..."]
|
||||
return ','.join(s)
|
||||
|
||||
def duration(self):
|
||||
x = sorted(self.times)
|
||||
return "-".join(map(lambda x: x.strftime("%m/%d/%y"), [x[0], x[-1]]))
|
||||
|
||||
def toStrings(self):
|
||||
return [self.bestExample(self.msgs), self.at, self.setString(self.svns), self.setString(self.walkers), self.counts, self.setString(self.ids), self.duration()]
|
||||
|
||||
addHandler('exceptions', ExceptionReport)
|
||||
|
||||
|
||||
#
|
||||
# def long_substr(data):
|
||||
# substr = ''
|
||||
# if len(data) > 1 and len(data[0]) > 0:
|
||||
# for i in range(len(data[0])):
|
||||
# for j in range(len(data[0])-i+1):
|
||||
# if j > len(substr) and is_substr(data[0][i:i+j], data):
|
||||
# substr = data[0][i:i+j]
|
||||
# return substr
|
||||
#
|
||||
# def is_substr(find, data):
|
||||
# if len(data) < 1 and len(find) < 1:
|
||||
# return False
|
||||
# for i in range(len(data)):
|
||||
# if find not in data[i]:
|
||||
# return False
|
||||
# return True
|
||||
#
|
||||
# def parameterizeStrings( strings ):
|
||||
# example = strings[0]
|
||||
# para = ''
|
||||
#
|
||||
# lcs = long_substr(strings)
|
||||
# if lcs == '':
|
||||
# # nothing common at all, we are done
|
||||
# return para
|
||||
# else:
|
||||
# # we need to remove the LCS from all strings
|
||||
# strings = map( lambda x: x.replace(lcs, ''), strings)
|
||||
#
|
||||
|
||||
#
|
||||
# utilities
|
||||
#
|
||||
|
|
@ -190,6 +335,25 @@ def resolveFiles(paths):
|
|||
map( resolve1, paths )
|
||||
return allFiles
|
||||
|
||||
def decodeTime(time):
|
||||
return datetime.datetime.strptime(time.split()[0], "%Y/%m/%d")
|
||||
#return datetime.datetime.strptime(time, "%Y/%m/%d %H.%M.%S")
|
||||
|
||||
def passesFilters(elt):
|
||||
if OPTIONS.noDev and eltTagEquals(elt,'build-type','dev'):
|
||||
return False
|
||||
if OPTIONS.maxDays != None:
|
||||
now = datetime.datetime.today()
|
||||
now = datetime.datetime(now.year, now.month, now.day)
|
||||
# <start-time>2010/08/31 15.38.00</start-time>
|
||||
eltTime = decodeTime(elt.find('start-time').text)
|
||||
diff = now - eltTime
|
||||
#print eltTime, now, diff, diff.days
|
||||
if diff.days > OPTIONS.maxDays:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def readReports(files):
|
||||
#print files
|
||||
for file in files:
|
||||
|
|
@ -198,9 +362,11 @@ def readReports(files):
|
|||
elem = tree.getroot()
|
||||
if elem.tag == RUN_REPORT_LIST:
|
||||
for sub in elem:
|
||||
yield sub
|
||||
if passesFilters(sub):
|
||||
yield sub
|
||||
else:
|
||||
yield elem
|
||||
if passesFilters(elem):
|
||||
yield elem
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue