diff --git a/R/GATKRunReport.R b/R/GATKRunReport.R
index 6c918d6c5..418257a56 100644
--- a/R/GATKRunReport.R
+++ b/R/GATKRunReport.R
@@ -34,6 +34,8 @@ myTable <- function(x, y, reqRowNonZero = F) {
     return(table)
 }
 
+# todo -- must be robust to smaller sizes
+
 plotTable <- function(table, name) {
     ncols = dim(table)[2]
     nrows = dim(table)[1]
@@ -47,25 +49,34 @@ plotTable <- function(table, name) {
 }
 
 RUNNING_GATK_RUNTIME <- 60 * 5 #  5 minutes => bad failure
-excepted <- subset(d, exception.msg != "NA")
-badExcepted <- subset(excepted, run.time > RUNNING_GATK_RUNTIME)
 
 if ( onCMDLine ) pdf(args[2])
 
-generateOneReport <- function(d, header) {
+generateOneReport <- function(d, header, includeByWeek = T) {
     head <- function(s) {
         return(paste("Section:", header, ":", s))
     }
     
+    excepted <- subset(d, exception.msg != "NA")
+    badExcepted <- subset(excepted, run.time > RUNNING_GATK_RUNTIME)
+
     par("mar", c(5, 4, 4, 2))
     frame()
     title(paste("Section:", header), cex=2)
     
     reportCountingPlot(d$walker.name, head("Walker invocations"))
     reportCountingPlot(d$svn.version, head("GATK SVN version"))
-    reportCountingPlot(d$java.tmp.directory, head("Java tmp directory"))
+
+    # cuts by time
+    plotTable(myTable(d$svn.version, d$start.time), head("SVN version by day"))
+    if ( includeByWeek ) {
+        plotTable(myTable(d$svn.version, cut(d$start.time, "weeks")), head("SVN version by week"))
+        plotTable(myTable(excepted$walker.name, cut(excepted$start.time, "weeks"), reqRowNonZero = T), head("Walkers with exceptions by week"))
+    }
+
+    # reportCountingPlot(d$java.tmp.directory, head("Java tmp directory"))
     reportCountingPlot(d$working.directory, head("Working directory"))
-    reportCountingPlot(d$user.name, head("User"))
+    reportCountingPlot(d$user.name, head("user"))
     reportCountingPlot(d$host.name, head("host"))
     reportCountingPlot(d$java, head("Java version"))
     reportCountingPlot(d$machine, head("Machine"))
@@ -88,15 +99,9 @@ generateOneReport <- function(d, header) {
 
 RUNME = T
 if ( RUNME ) {
-generateOneReport(d, "Overall")
-
-lastWeek = levels(cut(d$start.time, "weeks"))[-1]
-generateOneReport(subset(d, start.time == lastWeek), "Just last week to date")
-
-# cuts by time
-plotTable(myTable(d$svn.version, d$start.time), "SVN version by day")
-plotTable(myTable(d$svn.version, cut(d$start.time, "weeks")), "SVN version by week")
-plotTable(myTable(excepted$walker.name, cut(excepted$start.time, "weeks"), reqRowNonZero = T), "Walkers with exceptions by week")
+    lastWeek = levels(cut(d$start.time, "weeks"))[-1]
+    generateOneReport(d, "Overall")
+    #generateOneReport(subset(d, start.time >= lastWeek), "Just last week to date", includeByWeek = F)
 }
 
 if ( onCMDLine ) dev.off()
diff --git a/python/1kgStatsForCalls.py b/python/1kgStatsForCalls.py
index 3a4e90a7f..07d5b50d9 100755
--- a/python/1kgStatsForCalls.py
+++ b/python/1kgStatsForCalls.py
@@ -8,6 +8,13 @@ import itertools
 import re
 import vcfReader
 import string
+import gzip
+
+def openMaybeGZ(filename):
+    if ( filename.endswith(".gz") ):
+        return gzip.open(filename)
+    else:
+        return open(filename)
 
 def average(l):
     sum = reduce(operator.add, l, 0)
@@ -35,7 +42,7 @@ class Sample:
     
 def flatFileIterator(file, fields = None, skip = 0):
     count = 0
-    for line in open(file):
+    for line in openMaybeGZ(file):
         count += 1
         if count > skip:
             s = map(string.strip, line.split('\t'))
@@ -99,7 +106,7 @@ def findVariantEvalResults(key, file, type=str):
         else:
             return None
 
-    return [val for val in map(capture1, open(file)) if val != None]
+    return [val for val in map(capture1, openMaybeGZ(file)) if val != None]
 
 
 def getDBSNPRate(file):
@@ -121,7 +128,7 @@ def countMappedBases(samples, alignmentIndex):
     if ( OPTIONS.coverageFile != None ): 
         # read from summary file, looking for the line:
         # Total   340710  1187.14 N/A     N/A     N/A
-        for parts in map( string.split, open(OPTIONS.coverageFile) ):
+        for parts in map( string.split, openMaybeGZ(OPTIONS.coverageFile) ):
             if parts[0] == "Total":
                 return -1, int(parts[1])
     else:
@@ -156,7 +163,7 @@ def countSNPs(samples, snpsVCF, useIndels = False):
     total = 0
     novel = 0
     
-    header, columnNames, remainingLines = vcfReader.readVCFHeader(open(snpsVCF))
+    header, columnNames, remainingLines = vcfReader.readVCFHeader(openMaybeGZ(snpsVCF))
     sampleIDs = columnNames[9:]
 
     print 'Counting SNPs...'
@@ -204,7 +211,7 @@ def countIndels(samples, indelsVCF):
 
 def readSamples(vcf):
     print 'Reading samples for', OPTIONS.population
-    header, columnNames, remainingLines = vcfReader.readVCFHeader(open(vcf))
+    header, columnNames, remainingLines = vcfReader.readVCFHeader(openMaybeGZ(vcf))
     samples = map(Sample, columnNames[9:])
     if ( OPTIONS.onlySample != None ):
         samples = filter( lambda x: x.getName() == OPTIONS.onlySample, samples )
diff --git a/python/analyzeRunReports.py b/python/analyzeRunReports.py
index 9fa004d08..234fb7335 100755
--- a/python/analyzeRunReports.py
+++ b/python/analyzeRunReports.py
@@ -4,6 +4,7 @@ from optparse import OptionParser
 from itertools import *
 from xml.etree.ElementTree import *
 import gzip
+import datetime
 
 MISSING_VALUE = "NA"
 RUN_REPORT_LIST = "GATK-run-reports"
@@ -12,6 +13,7 @@ def main():
     global OPTIONS
     usage = "usage: %prog [options] mode file1 ... fileN"
     parser = OptionParser(usage=usage)
+    
     parser.add_option("-v", "--verbose", dest="verbose",
                         action='store_true', default=False,
                         help="If provided, verbose progress will be enabled")         
@@ -19,6 +21,14 @@ def main():
     parser.add_option("-o", "--o", dest="output",
                         type='string', default=None,
                         help="if provided, output will go here instead of stdout")
+
+    parser.add_option("", "--no-dev", dest="noDev",
+                        action='store_true', default=False,
+                        help="if provided, only records not coming from a dev version of GATK will be included")
+
+    parser.add_option("", "--max_days", dest="maxDays",
+                        type='int', default=None,
+                        help="if provided, only records generated within X days of today will be included")
          
     (OPTIONS, args) = parser.parse_args()
     if len(args) == 0:
@@ -44,7 +54,7 @@ def main():
         counter += 1
 
     handler.finalize(files)
-    out.close()
+    if OPTIONS.output != None: out.close()
     print 'Processed records:', counter 
 
 #
@@ -74,24 +84,27 @@ def addHandler(name, handler):
     
 def getHandler(stage):
     return HANDLERS[stage]
+
+def eltIsException(elt):
+    return elt.tag == "exception"
     
-# def 
-class RecordAsTable(StageHandler):
-    def __init__(self, name, out):
-        StageHandler.__init__(self, name, out)
-        
-    def initialize(self, args):
+def parseException(elt):
+    return elt.find("message").text, elt.find("stacktrace").find("string").text
+
+
+class RecordDecoder:
+    def __init__(self):
         self.fields = list()
         self.formatters = dict()
     
         def id(elt): return elt.text
-        def toString(elt): return '"%s"' % elt.text
+        def toString(elt): return '%s' % elt.text
         
         def formatExceptionMsg(elt):
-            return '"%s"' % elt.find("message").text
+            return '%s' % parseException(elt)[0]
 
         def formatExceptionAt(elt):
-            return '"%s"' % elt.find("stacktrace").find("string").text
+            return '%s' % parseException(elt)[1]
         
         def add(names, func):
             for name in names:
@@ -107,31 +120,45 @@ class RecordAsTable(StageHandler):
         add(["java", "machine"], toString)
         add(["max-memory", "total-memory", "iterations", "reads"], id)
         addComplex("exception", ["exception-msg", "exception-at"], [formatExceptionMsg, formatExceptionAt])
-        # add(["command-line"], toString)    
+        # add(["command-line"], toString)          
+    
+    def decode(self, report):
+        bindings = dict()
+        for elt in report:
+            if elt.tag in self.formatters:
+                fieldFormats = self.formatters[elt.tag]
+                # we actually care about this tag
+                for field, formatter in fieldFormats:
+                    bindings[field] = formatter(elt)
+
+        # add missing data
+        for field in self.fields:
+            if field not in bindings:
+                bindings[field] = MISSING_VALUE
+
+        return bindings
+    
+# def 
+class RecordAsTable(StageHandler):
+    def __init__(self, name, out):
+        StageHandler.__init__(self, name, out)
         
-        print >> self.out, "\t".join(self.fields)
+    def initialize(self, args):
+        self.decoder = RecordDecoder()
+        print >> self.out, "\t".join(self.decoder.fields)
 
     def processRecord(self, record):
-        parsed = parseReport(record, self.formatters)
+        parsed = self.decoder.decode(record)
 
         def oneField(field):
             val = MISSING_VALUE
             if field in parsed:
                 val = parsed[field]
+                if val.find(" ") != -1:
+                    val = "\"" + val + "\""
             return val
 
-        print >> self.out, "\t".join([ oneField(field) for field in self.fields ])
-
-def parseReport(report, allFormatters):
-    bindings = dict()
-    for elt in report:
-        if elt.tag in allFormatters:
-            fieldFormats = allFormatters[elt.tag]
-            # we actually care about this tag
-            for field, formatter in fieldFormats:
-                bindings[field] = formatter(elt)
-    return bindings
-
+        print >> self.out, "\t".join([ oneField(field) for field in self.decoder.fields ])
 
 addHandler('table', RecordAsTable)
 
@@ -161,7 +188,125 @@ class Archive(RecordAsXML):
 
 addHandler('archive', Archive)
 
+class ExceptionReport(StageHandler):
+    #FIELDS = ["Msg", "At", "SVN.versions", "Walkers", 'Occurrences', 'IDs']
+    def __init__(self, name, out):
+        StageHandler.__init__(self, name, out)
+        self.exceptions = []
 
+    def initialize(self, args):
+        self.decoder = RecordDecoder()
+        #print >> self.out, "\t".join(self.FIELDS)
+        
+    def processRecord(self, record):
+        for elt in record:
+            if eltIsException(elt):
+                self.exceptions.append(self.decoder.decode(record))
+                break
+
+    def finalize(self, args):
+        commonExceptions = list()
+        
+        def addToCommons(ex):
+            for common in commonExceptions:
+                if common.equals(ex):
+                    common.update(ex)
+                    return
+            commonExceptions.append(CommonException(ex))
+
+        for ex in self.exceptions:
+            addToCommons(ex)
+        commonExceptions = sorted(commonExceptions, None, lambda x: x.counts)   
+            
+        for common in commonExceptions:
+            msg, at, svns, walkers, counts, ids, duration = common.toStrings()
+
+            print >> self.out, ''.join(['*'] * 80)
+            print >> self.out, 'Exception       :', msg
+            print >> self.out, '    at          :', at
+            print >> self.out, '    walkers     :', walkers
+            print >> self.out, '    svns        :', svns
+            print >> self.out, '    duration    :', duration
+            print >> self.out, '    occurrences :', counts
+            print >> self.out, '    ids         :', ids
+            
+class CommonException:
+    MAX_SET_ITEMS_TO_SHOW = 5
+
+    def __init__(self, ex):
+        self.msgs = set([ex['exception-msg']])
+        self.at = ex['exception-at']
+        self.svns = set([ex['svn-version']])
+        self.counts = 1
+        self.times = set([decodeTime(ex['start-time'])])
+        self.walkers = set([ex['walker-name']])
+        self.ids = set([ex['id']])
+        
+    def equals(self, ex):
+        return self.at == ex['exception-at']
+        
+    def update(self, ex):
+        self.msgs.add(ex['exception-msg'])
+        self.svns.add(ex['svn-version'])
+        self.counts += 1
+        self.walkers.add(ex['walker-name'])
+        self.times.add(decodeTime(ex['start-time']))
+        self.ids.add(ex['id'])
+
+    def bestExample(self, examples):
+        def takeShorter(x, y):
+            if len(y) < len(x):
+                return y
+            else:
+                return x
+        return reduce(takeShorter, examples)
+        
+    def setString(self, s):
+        if len(s) > self.MAX_SET_ITEMS_TO_SHOW:
+            s = [x for x in s][0:self.MAX_SET_ITEMS_TO_SHOW] + ["..."]
+        return ','.join(s)
+        
+    def duration(self):
+        x = sorted(self.times)
+        return "-".join(map(lambda x: x.strftime("%m/%d/%y"), [x[0], x[-1]]))
+        
+    def toStrings(self):
+        return [self.bestExample(self.msgs), self.at, self.setString(self.svns), self.setString(self.walkers), self.counts, self.setString(self.ids), self.duration()] 
+
+addHandler('exceptions', ExceptionReport)
+
+
+# 
+# def long_substr(data):
+#     substr = ''
+#     if len(data) > 1 and len(data[0]) > 0:
+#         for i in range(len(data[0])):
+#             for j in range(len(data[0])-i+1):
+#                 if j > len(substr) and is_substr(data[0][i:i+j], data):
+#                     substr = data[0][i:i+j]
+#     return substr
+# 
+# def is_substr(find, data):
+#     if len(data) < 1 and len(find) < 1:
+#         return False
+#     for i in range(len(data)):
+#         if find not in data[i]:
+#             return False
+#     return True
+# 
+# def parameterizeStrings( strings ):
+#     example = strings[0]
+#     para = ''
+#     
+#     lcs = long_substr(strings)
+#     if lcs == '':
+#         # nothing common at all, we are done
+#         return para
+#     else:
+#         # we need to remove the LCS from all strings
+#         strings = map( lambda x: x.replace(lcs, ''), strings)
+#         
+    
 #
 # utilities
 #
@@ -190,6 +335,25 @@ def resolveFiles(paths):
     map( resolve1, paths )
     return allFiles
 
+def decodeTime(time):
+    return datetime.datetime.strptime(time.split()[0], "%Y/%m/%d")
+    #return datetime.datetime.strptime(time, "%Y/%m/%d %H.%M.%S")
+
+def passesFilters(elt):
+    if OPTIONS.noDev and eltTagEquals(elt,'build-type','dev'):
+        return False
+    if OPTIONS.maxDays != None:
+        now = datetime.datetime.today()
+        now = datetime.datetime(now.year, now.month, now.day)
+        #    <start-time>2010/08/31 15.38.00</start-time>
+        eltTime = decodeTime(elt.find('start-time').text)
+        diff = now - eltTime
+        #print eltTime, now, diff, diff.days
+        if diff.days > OPTIONS.maxDays:
+            return False
+
+    return True
+    
 def readReports(files):
     #print files
     for file in files:
@@ -198,9 +362,11 @@ def readReports(files):
         elem = tree.getroot()
         if elem.tag == RUN_REPORT_LIST:
             for sub in elem:
-                yield sub
+                if passesFilters(sub):
+                    yield sub
         else:
-            yield elem
+            if passesFilters(elem):
+                yield elem
     
 if __name__ == "__main__":
     main()
\ No newline at end of file