diff --git a/python/faiReader.py b/python/faiReader.py
new file mode 100755
index 000000000..5e8a872a0
--- /dev/null
+++ b/python/faiReader.py
@@ -0,0 +1,14 @@
+from itertools import *
+
+def readFAI(file):
+# 1       247249719       3       60      61
+# 2       242951149       251370554       60      61
+# 3       199501827       498370892       60      61
+    return [line.split() for line in open(file)]
+
+
+def readFAIContigOrdering(file):
+# 1       247249719       3       60      61
+# 2       242951149       251370554       60      61
+# 3       199501827       498370892       60      61
+    return dict([[rec[0], i] for rec, i in izip(readFAI(file), count())])
diff --git a/python/mergeVCFs.py b/python/mergeVCFs.py
new file mode 100755
index 000000000..93c30fed6
--- /dev/null
+++ b/python/mergeVCFs.py
@@ -0,0 +1,56 @@
+import os.path
+import sys
+from optparse import OptionParser
+from vcfReader import *
+from itertools import *
+import faiReader
+
+def main():
+    global OPTIONS
+    usage = "usage: %prog [options] file1 ... fileN"
+    parser = OptionParser(usage=usage)
+    parser.add_option("-f", "--f", dest="fai",
+                        type='string', default=None,
+                        help="FAI file defining the sort order of the VCF")
+         
+    (OPTIONS, args) = parser.parse_args()
+    if len(args) == 0:
+        parser.error("Requires at least 1 VCF to merge")
+
+    order = None
+    if OPTIONS.fai <> None: order = faiReader.readFAIContigOrdering(OPTIONS.fai)
+    #print 'Order', order
+
+    header = None
+    records = []
+    for file in args:
+        #print file
+        for header, record, counter in lines2VCF(open(file), extendedOutput = True, decodeAll = False):
+            records.append(record)
+
+    def cmpVCFRecords(r1, r2):
+        if order <> None:
+            c1 = order[str(r1.getChrom())]
+            c2 = order[str(r2.getChrom())]
+            orderCmp = cmp(c1, c2)
+            if orderCmp <> 0:
+                return orderCmp
+        return cmp(r1.getPos(), r2.getPos())
+
+    records.sort(cmpVCFRecords) 
+    for line in formatVCF(header, records):
+        #pass
+        print line
+
+PROFILE = False
+if __name__ == "__main__":
+    if PROFILE:
+        import cProfile
+        cProfile.run('main()', 'fooprof')
+        import pstats
+        p = pstats.Stats('fooprof')
+        p.sort_stats('cumulative').print_stats(10)
+        p.sort_stats('time').print_stats(10)
+        p.sort_stats('time', 'cum').print_stats(.5, 'init')
+    else:
+        main()
\ No newline at end of file
diff --git a/python/snpSelector.py b/python/snpSelector.py
index 2c18d81d0..ef061748a 100755
--- a/python/snpSelector.py
+++ b/python/snpSelector.py
@@ -5,6 +5,8 @@ from vcfReader import *
 #import pylab
 from itertools import *
 import math
+import random
+    
 
 class CallCovariate:
     def __init__(self, feature, left, right, FPRate = None, cumulative = False):
@@ -61,7 +63,7 @@ class RecalibratedCall:
     def __str__(self):
         return '[%s: %s => Q%d]' % (str(self.call), self.featureStringList(), phredScale(self.jointFPErrorRate()))
 
-def readVariants( file ):
+def readVariants( file, maxRecords = None, decodeAll = True ):
     counter = OPTIONS.skip
     f = open(file)
     header, ignore, lines = readVCFHeader(f)
@@ -73,7 +75,7 @@ def readVariants( file ):
         else:
             return None
 
-    return header, filter(None, map(parseVariant, lines2VCF(lines, extendedOutput = True)))
+    return header, filter(None, map(parseVariant, islice(lines2VCF(lines, extendedOutput = True, decodeAll = decodeAll), maxRecords)))
 
 def selectVariants( variants, selector = None ):
     if selector <> None:
@@ -174,7 +176,7 @@ def frange6(*args):
         yield v
         v += step
 
-def calculateBins(variants, field, minValue, maxValue, rangeValue, partitions):
+def calculateBins(variants, field, minValue, maxValue, partitions):
     sortedVariants = sorted(variants, key = lambda x: x.getField(field))
     sortedValues = map(lambda x: x.getField(field), sortedVariants)
     
@@ -198,20 +200,20 @@ def calculateBins(variants, field, minValue, maxValue, rangeValue, partitions):
 
     return bins
 
-
-def calculateBinsLinear(variants, minValue, maxValue, rangeValue, partitions):
-    breaks = list(frange6(minValue, maxValue, rangeValue / partitions))
-    if breaks[len(breaks)-1] <> maxValue:
-        breaks = breaks + ['*']
-    return zip(breaks, map( lambda x: x - 0.001, breaks[1:]))
+# 
+# def calculateBinsLinear(variants, minValue, maxValue, rangeValue, partitions):
+#     breaks = list(frange6(minValue, maxValue, rangeValue / partitions))
+#     if breaks[len(breaks)-1] <> maxValue:
+#         breaks = breaks + ['*']
+#     return zip(breaks, map( lambda x: x - 0.001, breaks[1:]))
 
 def fieldRange(variants, field):
     values = map(lambda v: v.getField(field), variants)
     minValue = min(values)
     maxValue = max(values)
-    rangeValue = maxValue - minValue
-    bins = calculateBins(variants, field, minValue, maxValue, rangeValue, OPTIONS.partitions)
-    return minValue, maxValue, rangeValue, bins
+    #rangeValue = maxValue - minValue
+    bins = calculateBins(variants, field, minValue, maxValue, OPTIONS.partitions)
+    return minValue, maxValue, bins
 
 def printFieldQual( left, right, variants, titv, FPRate, nErrors ):
     print '  %s nVariants=%8d titv=%.2f FPRate=%.2e Q%d' % (binString(left, right), len(variants), titv, FPRate, phredScale(FPRate))
@@ -273,11 +275,11 @@ def all( p, l ):
     return True
 
 def variantBinsForField(variants, field):
-    if not all( lambda x: x.hasField(field), variants):
-        raise Exception('Unknown field ' + field)
+    #if not all( lambda x: x.hasField(field), variants):
+    #    raise Exception('Unknown field ' + field)
     
-    minValue, maxValue, range, bins = fieldRange(variants, field)
-    print 'Field range', minValue, maxValue, range
+    minValue, maxValue, bins = fieldRange(variants, field)
+    print 'Field range', minValue, maxValue
     print 'Partitions', bins
     return bins
 
@@ -317,40 +319,52 @@ class CallCmp:
     
     def FPRate(self):
         return (1.0*self.nFP) / max(self.nTP + self.nFP, 1)
+
+    def FNRate(self):
+        return (1.0*self.nFN) / max(self.nTP + self.nFN, 1)
     
     def __str__(self):
-        return 'TP=%6d FP=%6d FPRate=%.2f FN=%6d' % (self.nTP, self.nFP, self.FPRate(), self.nFN)
+        return 'TP=%6d FP=%6d FPRate=%.2f FN=%6d FNRate=%.2f' % (self.nTP, self.nFP, self.FPRate(), self.nFN, self.FNRate())
 
 def variantInTruth(variant, truth):
-    return variant.getLoc() in truth
+    if variant.getLoc() in truth:
+        return truth[variant.getLoc()]
+    else:
+        return False
 
 def sensitivitySpecificity(variants, truth):
     nTP, nFP = 0, 0    
+    FPs = []
     for variant in variants:
-        if variantInTruth(variant, truth):
+        t = variantInTruth(variant, truth)
+        if t:
+            t.setField("FOUND", 1)
             nTP += 1
         else:
             if OPTIONS.printFP: print 'FP:', variant
             nFP += 1
+            #if variant.getPos() == 1520727:
+            #    print "Variant is missing", variant
+            FPs.append(variant)
     nFN = len(truth) - nTP
-    return CallCmp(nTP, nFP, nFN)
+    return CallCmp(nTP, nFP, nFN), FPs
 
 
 def compareCalls(calls, truthCalls):
     def compare1(cumulative):
         for left, right, selectedVariants in mapVariantBins(calls, 'QUAL', cumulative = cumulative):
-            callComparison = sensitivitySpecificity(selectedVariants, truthCalls)
+            callComparison, theseFPs = sensitivitySpecificity(selectedVariants, truthCalls)
+            for fp in theseFPs: fp.setField("FP", 1)
+            #FPsVariants.append(theseFPs)
             print binString(left, right), 'titv=%.2f' % titv(selectedVariants)[0], callComparison
     
-    print 'PER BIN'
+    print 'PER BIN nCalls=', len(calls)
     compare1(False)
 
-    print 'CUMULATIVE'
+    print 'CUMULATIVE nCalls=', len(calls)
     compare1(True)
     
 def randomSplit(l, pLeft):
-    import random
-    
     def keep(elt, p):
         if p < pLeft:
             return elt, None
@@ -361,7 +375,7 @@ def randomSplit(l, pLeft):
     return get(0), get(1)
 
 def main():
-    global OPTIONS
+    global OPTIONS, header
     usage = "usage: %prog files.list [options]"
     parser = OptionParser(usage=usage)
     parser.add_option("-f", "--f", dest="fields",
@@ -370,6 +384,9 @@ def main():
     parser.add_option("-t", "--truth", dest="truth",
                         type='string', default=None,
                         help="VCF formated truth file")
+    parser.add_option("", "--unFilteredTruth", dest="unFilteredTruth",
+                        action='store_true', default=False,
+                        help="If provided, the unfiltered truth calls will be used in comparisons")
     parser.add_option("-p", "--partitions", dest="partitions",
                         type='int', default=25,
                         help="Number of partitions to examine")
@@ -379,12 +396,18 @@ def main():
     parser.add_option("-m", "--minVariantsPerBin", dest="minVariantsPerBin",
                        type='int', default=10,
                         help="")
+    parser.add_option("-M", "--maxRecords", dest="maxRecords",
+                       type='int', default=None,
+                        help="")
     parser.add_option("-q", "--qMax", dest="maxQScore",
                         type='int', default=30,
                         help="")
     parser.add_option("-o", "--outputVCF", dest="outputVCF",
                         type='string', default=None,
                         help="If provided, VCF file will be written out to this file")
+    parser.add_option("", "--FNoutputVCF", dest="FNoutputVCF",
+                        type='string', default=None,
+                        help="If provided, VCF file will be written out to this file")
     parser.add_option("", "--titv", dest="titvTarget",
                         type='float', default=None,
                         help="If provided, we will optimize calls to the targeted ti/tv rather than that calculated from known calls")
@@ -394,15 +417,16 @@ def main():
     parser.add_option("-b", "--bootstrap", dest="bootStrap",
                        type='float', default=0.0,
                        help="If provided, the % of the calls used to generate the recalibration tables.")
-         
+    
+   
     (OPTIONS, args) = parser.parse_args()
     if len(args) > 2:
         parser.error("incorrect number of arguments")
 
     fields = OPTIONS.fields.split(',')
-    header, allCalls = readVariants(args[0])
+    header, allCalls = readVariants(args[0], OPTIONS.maxRecords)
     print 'Read', len(allCalls), 'calls'
-    print 'header is', header
+    #print 'header is', header
     
     if OPTIONS.titvTarget == None:
         OPTIONS.titvTarget = titv(calls, VCFRecord.isKnown)
@@ -422,10 +446,16 @@ def main():
         recalEvalCalls = recalibrateCalls(callsToEval, fields, covariates)
         printCallQuals(recalEvalCalls, OPTIONS.titvTarget, 'BOOTSTRAP EVAL CALLS')
 
+    truth = None
     if len(args) > 1:
         truthFile = args[1]
         print 'Reading truth file', truthFile
-        truth = dict( [[v.getLoc(), v] for v in readVariants(truthFile)[1]])
+        rawTruth = readVariants(truthFile, maxRecords = None, decodeAll = False)[1]
+        def keepVariant(t): 
+            #print t.getPos(), t.getLoc()
+            return OPTIONS.unFilteredTruth or t.passesFilters()
+        truth = dict( [[v.getLoc(), v] for v in filter(keepVariant, rawTruth)])
+        print len(rawTruth), len(truth)
         
         print '--------------------------------------------------------------------------------'
         print 'Comparing calls to truth', truthFile
@@ -439,10 +469,27 @@ def main():
 
     if OPTIONS.outputVCF:
         f = open(OPTIONS.outputVCF, 'w')
-        print 'HEADER', header
+        #print 'HEADER', header
         for line in formatVCF(header, allCalls):
             print >> f, line
         f.close()
 
+    if truth <> None and OPTIONS.FNoutputVCF:
+        f = open(OPTIONS.FNoutputVCF, 'w')
+        #print 'HEADER', header
+        for line in formatVCF(header, filter( lambda x: not x.hasField("FOUND"), truth.itervalues())):
+            print >> f, line
+        f.close()
+
+PROFILE = False
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    if PROFILE:
+        import cProfile
+        cProfile.run('main()', 'fooprof')
+        import pstats
+        p = pstats.Stats('fooprof')
+        p.sort_stats('cumulative').print_stats(10)
+        p.sort_stats('time').print_stats(10)
+        p.sort_stats('time', 'cum').print_stats(.5, 'init')
+    else:
+        main()
\ No newline at end of file
diff --git a/python/vcfReader.py b/python/vcfReader.py
index f837fadd4..4f6869119 100755
--- a/python/vcfReader.py
+++ b/python/vcfReader.py
@@ -7,26 +7,28 @@ for p in ["AG", "CT"]:
     TRANSITIONS[p] = True
     TRANSITIONS[''.join(reversed(p))] = True
 
-def convertToType(d):
+def convertToType(d, onlyKeys = None):
     out = dict()
     types = [int, float, str]
     for key, value in d.items():
-        for type in types:
-            try:
-                #print 'Parsing', key, value, type
-                out[key] = type(value)
-                #print '  Parsed as', key, value, type
-                break
-            except:
-                pass
+        if onlyKeys == None or key in onlyKeys:
+            for type in types:
+                try:
+                    out[key] = type(value)
+                    break
+                except:
+                    pass
+        else:
+            out[key] = value
     return out
     
 class VCFRecord:
     """Simple support for accessing a VCF record"""
-    def __init__(self, basicBindings, header=None, rest=[]):
+    def __init__(self, basicBindings, header=None, rest=[], decodeAll = True):
         self.header = header
-        self.info = convertToType(parseInfo(basicBindings["INFO"]))
-        self.bindings = convertToType(basicBindings)
+        self.info = parseInfo(basicBindings["INFO"])
+        if decodeAll: self.info = convertToType(self.info)
+        self.bindings = convertToType(basicBindings, onlyKeys = ['POS', 'QUAL'])
         self.rest = rest
     
     def hasHeader(self): return self.header <> None
@@ -112,20 +114,30 @@ class VCFRecord:
         return '\t'.join([str(self.getField(key)) for key in VCF_KEYS] + self.rest)
 
 def parseInfo(s):
-    def handleBoolean(key_val):
-        if len(key_val) == 1:
-            return [key_val[0], 1]
+    d = dict()
+    for elt in s.split(";"):
+        if '=' in elt:
+            key, val = elt.split('=')
         else:
-            return key_val
+            key, val = elt, 1
+        d[key] = val
+    return d
 
-    key_val = map( lambda x: handleBoolean(x.split("=")), s.split(";"))
-    return dict(key_val)
+# def parseInfo(s):
+#     def handleBoolean(key_val):
+#         if len(key_val) == 1:
+#             return [key_val[0], 1]
+#         else:
+#             return key_val
+# 
+#     key_val = map( lambda x: handleBoolean(x.split("=")), s.split(";"))
+#     return dict(key_val)
 
-def string2VCF(line, header=None):
+def string2VCF(line, header=None, decodeAll = True):
     if line[0] != "#":
         s = line.split()
         bindings = dict(zip(VCF_KEYS, s[0:8]))
-        return VCFRecord(bindings, header, rest=s[8:])
+        return VCFRecord(bindings, header, rest=s[8:], decodeAll = decodeAll)
     else:
         return None
 
@@ -139,16 +151,18 @@ def readVCFHeader(lines):
             if header <> []:
                 columnNames = header[-1]
             return header, columnNames, itertools.chain([line], lines)
-    
+            
+    # we reach this point for empty files    
+    return header, columnNames, []
 
-def lines2VCF(lines, extendedOutput = False):
+def lines2VCF(lines, extendedOutput = False, decodeAll = True):
     header, columnNames, lines = readVCFHeader(lines)
     counter = 0
     
     for line in lines:
         if line[0] != "#":
             counter += 1
-            vcf = string2VCF(line, header=columnNames)
+            vcf = string2VCF(line, header=columnNames, decodeAll = decodeAll)
             if vcf <> None:
                 if extendedOutput:
                     yield header, vcf, counter
@@ -159,6 +173,6 @@ def lines2VCF(lines, extendedOutput = False):
 
 def formatVCF(header, records):
     #print records
-    print records[0]
+    #print records[0]
     return itertools.chain(header, map(VCFRecord.format, records))