import os.path import sys from optparse import OptionParser from vcfReader import * #import pylab import operator from itertools import * import math import random DEBUG = False class Range: ANY = '*' def __init__(self, left = ANY, right = ANY, leftOpen = False, rightOpen = False): self.left = left self.right = right self.leftOpen = leftOpen self.rightOpen = rightOpen def __str__(self): leftB, rightB = '[', ']' if self.leftOpen: leftB = '(' if self.rightOpen: rightB = ')' return '%s%s, %s%s' % (leftB, self.left, self.right, rightB) __repr__ = __str__ def dashedString(self): return str(self).replace(", ", "-") def contains(self, v): def test(r, op, open): return r == Range.ANY or op(v, r) or (not open and v == r) return test(self.left, operator.__gt__, self.leftOpen) and test(self.right, operator.__lt__, self.rightOpen) class CallCovariate: def __init__(self, feature, featureRange, qualRange, FPRate = None): self.feature = feature self.featureRange = featureRange self.qualRange = qualRange self.FPRate = FPRate def containsVariant(self, call): inFeature = self.featureRange.contains(call.getField(self.feature)) inQual = self.qualRange.contains(call.getQual()) #print 'inFeature, inQual', inFeature, inQual return inFeature and inQual def getFPRate(self): return self.FPRate def getFeature(self): return self.feature def getCovariateField(self): return self.getFeature() + '_RQ' def __str__(self): return "[CC feature=%s range=%s qualRange=%s]" % (self.feature, self.featureRange, self.qualRange) class RecalibratedCall: def __init__(self, call, features): self.call = call self.features = dict([[feature, None] for feature in features]) def recalFeature( self, feature, FPRate ): assert self.features[feature] == None, "Feature " + feature + ' has value ' + str(self.features[feature]) + ' for call ' + str(self.call) # not reassigning values assert FPRate <= 1 and FPRate >= 0 self.features[feature] = FPRate def getFeature( self, feature, missingValue = None, phredScaleValue = False ): v = self.features[feature] if v == None: return missingValue elif phredScaleValue: return phredScale(v) else: return v def jointFPErrorRate(self): #print self.features logTPRates = [math.log10(1-r) for r in self.features.itervalues() if r <> None] logJointTPRate = reduce(lambda x, y: x + y, logTPRates, 0) logJointTPRate = min(logJointTPRate, 1e-3 / 3) # approximation from het of 0.001 jointTPRate = math.pow(10, logJointTPRate) #print logTPRates #print logJointTPRate, jointTPRate return 1 - jointTPRate def featureStringList(self): return ','.join(map(lambda feature: '%s=Q%d' % (feature, self.getFeature(feature, '*', True)), self.features.iterkeys())) def __str__(self): return '[%s: %s => Q%d]' % (str(self.call), self.featureStringList(), phredScale(self.jointFPErrorRate())) def readVariants( file, maxRecords = None, decodeAll = True, downsampleFraction = 1, filter = None, minQScore = -1, mustBeVariant = False ): if filter == None: filter = not OPTIONS.unfiltered f = open(file) header, columnNames, lines = readVCFHeader(f) nLowQual = 0 def parseVariant(args): global nLowQual header1, VCF, counter = args if filter and not VCF.passesFilters() or ( False and mustBeVariant == True and not VCF.isVariant() ): # currently ignore mustBeVariant #print 'filtering', VCF return None elif VCF.getQual() <= minQScore: #print 'filtering', VCF #nLowQual += 1 return None elif random.random() <= downsampleFraction: return VCF else: return None variants = ifilter(None, imap(parseVariant, islice(lines2VCF(lines, header=header, columnNames = columnNames, extendedOutput = True, decodeAll = decodeAll), maxRecords))) if nLowQual > 0: print '%d snps filtered due to QUAL < %d' % (nLowQual, minQScore) return header, variants def selectVariants( variants, selector = None ): if selector <> None: return filter(selector, variants) else: return variants def titv(variants): ti = len(filter(VCFRecord.isTransition, variants)) tv = len(variants) - ti titv = ti / (1.0*max(tv,1)) return titv def dbSNPRate(variants): inDBSNP = len(filter(VCFRecord.isKnown, variants)) return float(inDBSNP) / max(len(variants),1) def gaussian(x, mu, sigma): constant = 1 / math.sqrt(2 * math.pi * sigma**2) exponent = -1 * ( x - mu )**2 / (2 * sigma**2) return constant * math.exp(exponent) # if target = T, and FP calls have ti/tv = 0.5, we want to know how many FP calls # there are in N calls with ti/tv of X. # def titvFPRateEstimate(variants, target): titvRatio = titv(variants) # f <- function(To,T) { (To - T) / (1/2 - T) + 0.001 } def theoreticalCalc(): if titvRatio >= target: FPRate = 0 else: FPRate = (titvRatio - target) / (0.5 - target) FPRate = min(max(FPRate, 0), 1) TPRate = max(min(1 - FPRate, 1 - dephredScale(OPTIONS.maxQScore)), dephredScale(OPTIONS.maxQScore)) if DEBUG: print 'FPRate', FPRate, titvRatio, target assert FPRate >= 0 and FPRate <= 1 return TPRate # gaussian model def gaussianModel(): LEFT_HANDED = True sigma = 1 # old value is 5 constant = 1 / math.sqrt(2 * math.pi * sigma**2) exponent = -1 * ( titvRatio - target )**2 / (2 * sigma**2) TPRate = gaussian(titvRatio, target, sigma) / gaussian(target, target, sigma) if LEFT_HANDED and titvRatio >= target: TPRate = 1 TPRate -= dephredScale(OPTIONS.maxQScore) if DEBUG: print 'TPRate', TPRate, constant, exponent, dephredScale(OPTIONS.maxQScore) return TPRate FPRate = 1 - theoreticalCalc() #FPRate = 1 - gaussianModel() nVariants = len(variants) if DEBUG: print ':::', nVariants, titvRatio, target, FPRate return titvRatio, FPRate def phredScale(errorRate): return -10 * math.log10(max(errorRate, 1e-10)) def dephredScale(qscore): return math.pow(10, float(qscore) / -10) def frange6(*args): """A float range generator.""" start = 0.0 step = 1.0 l = len(args) if l == 1: end = args[0] elif l == 2: start, end = args elif l == 3: start, end, step = args if step == 0.0: raise ValueError, "step must not be zero" else: raise TypeError, "frange expects 1-3 arguments, got %d" % l v = start while True: if (step > 0 and v >= end) or (step < 0 and v <= end): raise StopIteration yield v v += step def compareFieldValues( v1, v2 ): if type(v1) <> type(v2): #print 'Different types', type(v1), type(v2) c = cmp(type(v1), type(v2)) else: c = cmp(v1, v2) #print 'Comparing %s %s = %s' % (v1, v2, c) return c def calculateBins(variants, field, minValue, maxValue, partitions): values = map(lambda x: x.getField(field), variants) return calculateBinsForValues(values, field, minValue, maxValue, partitions) def calculateBinsForValues(values, field, minValue, maxValue, partitions): sortedValues = sorted(values) captureFieldRangeForPrinting(field, sortedValues) targetBinSize = len(values) / (1.0*partitions) #print sortedValues uniqBins = groupby(sortedValues) binsAndSizes = map(lambda x: [x[0], len(list(x[1]))], uniqBins) #print 'BINS AND SIZES', binsAndSizes def bin2Break(bin): return [bin[0], bin[0], bin[1]] bins = [bin2Break(binsAndSizes[0])] for bin in binsAndSizes[1:]: #print ' Breaks', bins #print ' current bin', bin curLeft = bins[-1][0] curSize = bin[1] prevSize = bins[-1][2] #print curSize, prevSize if curSize + prevSize > targetBinSize or (not isNumber(curLeft) and isNumber(bin[0])): #print ' => appending', bin2Break(bin) bins.append(bin2Break(bin)) else: bins[-1][1] = bin[0] bins[-1][2] += curSize #print 'Returning ', bins #sys.exit(1) return bins def fieldRange(variants, field): values = map(lambda v: v.getField(field), variants) minValue = min(values) maxValue = max(values) #rangeValue = maxValue - minValue bins = calculateBins(variants, field, minValue, maxValue, OPTIONS.partitions) validateBins(bins) return minValue, maxValue, bins def validateBins(bins): #print 'Bins are', bins for left1, right1, count1 in bins: for left2, right2, count2 in bins: def contains2(x): return left2 < x and x < right2 if left1 <> left2 and right1 <> right2: if None in [left1, left2, right1, right2]: pass # we're ok elif contains2(left1) or contains2(right2): raise Exception("Bad bins", left1, right1, left2, right2) def printFieldQualHeader(): more = "" if TRUTH_CALLS <> None: more = CallCmp.HEADER def p(stream): if stream <> None: print >> stream, ' %20s %20s left right %15s nVariants nNovels titv titvNovels dbSNP Q' % ("category", "field", "qRange"), more p(sys.stdout) p(RECAL_LOG) def printFieldQual( category, field, cc, variants ): more = "" if TRUTH_CALLS <> None: callComparison, theseFPs = sensitivitySpecificity(variants, TRUTH_CALLS) more = str(callComparison) novels = selectVariants(variants, VCFRecord.isNovel) def p(stream): if stream <> None: print >> stream, ' %20s %20s %15s %15s %8d %8d %2.2f %2.2f %3.2f %3d' % (category, field, binString(field, cc.featureRange), cc.qualRange.dashedString(), len(variants), len(novels), titv(variants), titv(novels), dbSNPRate(variants) * 100, phredScale(cc.FPRate)), more p(sys.stdout) p(RECAL_LOG) FIELD_RANGES = dict() def captureFieldRangeForPrinting(field, sortedValues): """Finds the minimum float value in sortedValues for convenience printing in recal.log""" #print sortedValues floatValues = filter(isNumber, sortedValues) if floatValues <> []: FIELD_RANGES[field] = floatValues[0] #print 'Setting field range to', field, FIELD_RANGES[field] def isNumber(x): return isinstance(x, (int, long, float)) def binString(field, cc): epsilon = 1e-2 left, right = cc.left, cc.right leftStr = str(left) rightStr = "%5s" % str(right) if OPTIONS.plottableNones and not isNumber(left) and not isNumber(right) and field in FIELD_RANGES: left = right = FIELD_RANGES[field] - epsilon if OPTIONS.plottableNones and not isNumber(left) and isNumber(right): left = right - epsilon if isNumber(left): leftStr = "%.4f" % left if isNumber(right): rightStr = "%.4f" % right return '%12s %12s' % (leftStr, rightStr) # # # def recalibrateCalls(variants, fields, callCovariates): def phred(v): return phredScale(v) for variant in variants: recalCall = RecalibratedCall(variant, fields) originalQual = variant.getField('QUAL') for callCovariate in callCovariates: if callCovariate.containsVariant(variant): FPR = callCovariate.getFPRate() recalCall.recalFeature(callCovariate.getFeature(), FPR) recalCall.call.setField(callCovariate.getCovariateField(), phred(FPR)) #recalCall.call.setField('QUAL', phred(recalCall.jointFPErrorRate())) recalCall.call.setField('QUAL', phred(recalCall.jointFPErrorRate())) recalCall.call.setField('OQ', originalQual) #print 'recalibrating', variant.getLoc() #print ' =>', variant yield recalCall.call # # # def optimizeCalls(variants, fields, titvTarget): callCovariates = calibrateFeatures(variants, fields, titvTarget, category = "covariates", useBreaks=True) recalCalls = recalibrateCalls(variants, fields, callCovariates) return recalCalls, callCovariates def printCallQuals(field, recalCalls, titvTarget, info = ""): print '--------------------------------------------------------------------------------' print info calibrateFeatures(recalCalls, [field], titvTarget, printCall = True, cumulative = False, forcePrint = True, prefix = "OPT-", printHeader = False, category = "optimized-calls" ) print 'Cumulative' calibrateFeatures(recalCalls, [field], titvTarget, printCall = True, cumulative = True, forcePrint = True, prefix = "OPTCUM-", printHeader = False, category = "optimized-calls" ) def all( p, l ): for elt in l: if not p(elt): return False return True def mapVariantBins(variants, field, cumulative = False, breakQuals = [Range()]): minValue, maxValue, featureBins = fieldRange(variants, field) #print 'BREAKQuals', breakQuals[0] bins = [(x,y) for x in featureBins for y in breakQuals] #print 'BINS', bins def variantsInBin(featureBin, qualRange): right = featureBin[1] if cumulative: right = Range.ANY cc = CallCovariate(field, Range(featureBin[0], right), qualRange) return cc, selectVariants(variants, lambda v: cc.containsVariant(v)) #sys.exit(1) return starmap( variantsInBin, bins ) def qBreaksRanges(qBreaks, useBreaks): if qBreaks == None or not useBreaks: return [Range()] # include everything in a single range else: breaks = map(float, qBreaks.split(',')) return map(lambda x, y: Range(x,y, rightOpen = True), chain([Range.ANY], breaks), chain(breaks, [Range.ANY])) def calibrateFeatures(variants, fields, titvTarget, printCall = False, cumulative = False, forcePrint = False, prefix = '', printHeader = True, category = None, useBreaks = False ): covariates = [] if printHeader: printFieldQualHeader() for field in fields: if DEBUG: print 'Optimizing field', field titv, FPRate = titvFPRateEstimate(variants, titvTarget) #print 'Overall FRRate:', FPRate, nErrors, phredScale(FPRate) for cc, selectedVariants in mapVariantBins(variants, field, cumulative = cumulative, breakQuals = qBreaksRanges(OPTIONS.QBreaks, useBreaks and field <> 'QUAL')): #print 'CC', cc, field, useBreaks if len(selectedVariants) > max(OPTIONS.minVariantsPerBin,1) or forcePrint: titv, FPRate = titvFPRateEstimate(selectedVariants, titvTarget) #dbsnp = dbSNPRate(selectedVariants) cc.FPRate = FPRate covariates.append(cc) printFieldQual( category, prefix + field, cc, selectedVariants ) else: print 'Not calibrating bin', cc, 'because it contains too few variants:', len(selectedVariants) return covariates class CallCmp: def __init__(self, nTP, nFP, nFN): self.nTP = nTP self.nFP = nFP self.nFN = nFN # def FPRate(self): # return (1.0*self.nFP) / max(self.nTP + self.nFP, 1) def FNRate(self): return (1.0*self.nFN) / max(self.nTP + self.nFN, 1) def sensitivity(self): # = TP / (TP + FN) return (1.0*self.nTP) / max( self.nTP + self.nFN,1 ) def PPV(self): # = TP / (TP + FP) return (1.0*self.nTP) / max( self.nTP + self.nFP, 1 ) HEADER = "TP FP FN FNRate Sensitivity PPV" def __str__(self): return '%6d %6d %6d %.3f %.3f %.3f' % (self.nTP, self.nFP, self.nFN, self.FNRate(), self.sensitivity(), self.PPV()) def variantInTruth(variant, truth): if variant.getLoc() in truth: return truth[variant.getLoc()] else: return False def isVariantInSample(t, sample): #print "isVariantInSample", t.getLoc(), t.getField(sample), x return t.getField(sample) <> "0/0" def variantsInTruth(truth): # fixme return len(filter(lambda x: isVariantInSample(x, OPTIONS.useSample), truth)) def sensitivitySpecificity(variants, truth): nTP, nFP = 0, 0 FPs = [] for variant in variants: t = variantInTruth(variant, truth) isTP, isFP = False, False if OPTIONS.useSample or OPTIONS.onlyAtTruth: if t: # we have a site isTP = (isVariantInSample(t, OPTIONS.useSample) and t.isVariant()) or (not isVariantInSample(t, OPTIONS.useSample) and not t.isVariant()) isFP = not isTP else: isTP = t isFP = not t #if variant.getLoc() == "1:867694": # print variant, 'T: [', t, '] isTP, isFP', isTP, isFP if isTP: t.setField("FN", 0) variant.setField("TP", 1) nTP += 1 elif isFP: nFP += 1 variant.setField("TP", 0) #print t, variant, "is a FP!" FPs.append(variant) nRef = len(filter(lambda x: not x.isVariant(), truth.itervalues())) nFN = variantsInTruth(truth.itervalues()) - nTP - nRef #print 'nRef', nTP, nFP, nFN, nRef return CallCmp(nTP, nFP, nFN), FPs def markTruth(calls): if not OPTIONS.useSample: for variant in calls.itervalues(): variant.setField("TP", 0) # set the TP field to 0 def compareCalls(calls, truthCalls): #markTruth(truthCalls) def compare1(name, cumulative): for field in ["QUAL", "OQ"]: for cc, selectedVariants in mapVariantBins(calls, field, cumulative = cumulative): #print selectedVariants[0] printFieldQual("truth-comparison-" + name, field, cc, selectedVariants ) print 'PER BIN nCalls=', len(calls) compare1('per-bin', False) print 'CUMULATIVE nCalls=', len(calls) compare1('cum', True) def randomSplit(l, pLeft): def keep(elt, p): if p < pLeft: return elt, None else: return None, elt data = [keep(elt, p) for elt, p in zip(l, map(lambda x: random.random(), l))] def get(i): return filter(lambda x: x <> None, [x[i] for x in data]) return get(0), get(1) def setup(): global OPTIONS, header usage = "usage: %prog files.list [options]" parser = OptionParser(usage=usage) parser.add_option("-f", "--f", dest="fields", type='string', default="QUAL", help="Comma-separated list of fields (either in the VCF columns of as INFO keys) to use during optimization [default: %default]") parser.add_option("-t", "--truth", dest="truth", type='string', default=None, help="VCF formated truth file. If provided, the script will compare the input calls with the truth calls. It also emits calls tagged as TP and a separate file of FP calls") parser.add_option("-l", "--recalLog", dest="recalLog", type='string', default="recal.log", help="VCF formated truth file. If provided, the script will compare the input calls with the truth calls. It also emits calls tagged as TP and a separate file of FP calls") parser.add_option("-u", "--unfiltered", dest="unfiltered", action='store_true', default=False, help="If provided, unfiltered calls will be used in comparisons [default: %default]") parser.add_option("", "--plottable", dest="plottableNones", action='store_true', default=False, help="If provided, will generate fake plottable points for annotations with None values -- doesn't effect the behavior of the system just makes it easy to plot outputs [default: %default]") parser.add_option("", "--onlyAtTruth", dest="onlyAtTruth", action='store_true', default=False, help="If provided, we only consider TP/FP/FN at truth sites[default: %default]") parser.add_option("-p", "--partitions", dest="partitions", type='int', default=25, help="Number of partitions to use for each feature. Don't use so many that the number of variants per bin is very low. [default: %default]") parser.add_option("", "--maxRecordsForCovariates", dest="maxRecordsForCovariates", type='int', default=2000000, help="Derive covariate information from up to this many VCF records. For files with more than this number of records, the system downsamples the reads [default: %default]") parser.add_option("-m", "--minVariantsPerBin", dest="minVariantsPerBin", type='int', default=10, help="Don't include any covariates with fewer than this number of variants in the bin, if such a thing happens. NEEDS TO BE FIXED") parser.add_option("-M", "--maxRecords", dest="maxRecords", type='int', default=None, help="Maximum number of input VCF records to process, if provided. Default is all records") parser.add_option("-Q", "--qMin", dest="minQScore", type='int', default=-1, help="The minimum Q score of the initial SNP list to consider for selection [default: %default]") parser.add_option("", "--QBreaks", dest="QBreaks", type='string', default=None, help="Breaks in QUAL for generating covarites [default: %default]") parser.add_option("-q", "--qMax", dest="maxQScore", type='int', default=60, help="The maximum Q score allowed for both a single covariate and the overall QUAL score [default: %default]") parser.add_option("-o", "--outputVCF", dest="outputVCF", type='string', default="recal.vcf", help="If provided, a VCF file will be written out to this file [default: %default]") parser.add_option("", "--FNoutputVCF", dest="FNoutputVCF", type='string', default=None, help="If provided, VCF file will be written out to this file [default: %default]") parser.add_option("", "--titv", dest="titvTarget", type='float', default=None, help="If provided, we will optimize calls to the targeted ti/tv rather than that calculated from known calls [default: %default]") parser.add_option("-b", "--bootstrap", dest="bootStrap", type='float', default=None, help="If provided, the % of the calls used to generate the recalibration tables. [default: %default]") parser.add_option("-s", "--useSample", dest="useSample", type='string', default=False, help="If provided, we will examine sample genotypes for this sample, and consider TP/FP/FN in the truth conditional on sample genotypes [default: %default]") parser.add_option("-r", "--dontRecalibrate", dest="dontRecalibrate", action='store_true', default=False, help="If provided, we will not actually do anything to the calls, they will just be assessed [default: %default]") (OPTIONS, args) = parser.parse_args() if len(args) > 2: parser.error("incorrect number of arguments") return args def assessCalls(file): print 'Counting records in VCF', file numberOfRecords = quickCountRecords(open(file)) if OPTIONS.maxRecords <> None and OPTIONS.maxRecords < numberOfRecords: numberOfRecords = OPTIONS.maxRecords downsampleFraction = min(float(OPTIONS.maxRecordsForCovariates) / numberOfRecords, 1) header, allCalls = readVariants(file, OPTIONS.maxRecords, downsampleFraction=downsampleFraction, minQScore = OPTIONS.minQScore) allCalls = list(allCalls) print 'Number of VCF records', numberOfRecords, ', max number of records for covariates is', OPTIONS.maxRecordsForCovariates, 'so keeping', downsampleFraction * 100, '% of records' print 'Number of selected VCF records', len(allCalls) titvtarget = OPTIONS.titvTarget if titvtarget == None: titvtarget = titv(selectVariants(allCalls, VCFRecord.isKnown)) print 'Ti/Tv all ', titv(allCalls) print 'Ti/Tv known', titv(selectVariants(allCalls, VCFRecord.isKnown)) print 'Ti/Tv novel', titv(selectVariants(allCalls, VCFRecord.isNovel)) return header, allCalls, titvtarget def determineCovariates(allCalls, titvtarget, fields): if OPTIONS.bootStrap: callsToOptimize, recalEvalCalls = randomSplit(allCalls, OPTIONS.bootStrap) else: callsToOptimize = allCalls recalOptCalls, covariates = optimizeCalls(callsToOptimize, fields, titvtarget) printCallQuals("QUAL", list(recalOptCalls), titvtarget, 'OPTIMIZED CALLS') if OPTIONS.bootStrap: recalibatedEvalCalls = recalibrateCalls(recalEvalCalls, fields, covariates) printCallQuals("QUAL", list(recalibatedEvalCalls), titvtarget, 'BOOTSTRAP EVAL CALLS') return covariates def writeRecalibratedCalls(file, header, calls): if file: f = open(file, 'w') #print 'HEADER', header i = 0 for line in formatVCF(header, calls): if i % 10000 == 0: print 'writing VCF record', i i += 1 print >> f, line f.close() def readTruth(truthVCF): print 'Reading truth file', truthVCF rawTruth = list(readVariants(truthVCF, maxRecords = None, decodeAll = True, mustBeVariant = True)[1]) truth = dict( [[v.getLoc(), v] for v in rawTruth]) print 'Number of raw and passing filter truth calls', len(rawTruth), len(truth) return truth def evaluateTruth(header, callVCF, truth, truthVCF): print 'Reading variants back in from', callVCF header, calls = readVariants(callVCF) calls = list(calls) print '--------------------------------------------------------------------------------' print 'Comparing calls to truth', truthVCF print '' compareCalls(calls, truth) writeRecalibratedCalls(callVCF, header, calls) def isFN(v): return isVariantInSample(v, OPTIONS.useSample) and not v.hasField("FN") if truth <> None and OPTIONS.FNoutputVCF: f = open(OPTIONS.FNoutputVCF, 'w') #print 'HEADER', header for line in formatVCF(header, filter( isFN, truth.itervalues())): print >> f, line f.close() TRUTH_CALLS = None RECAL_LOG = None def main(): global TRUTH_CALLS, RECAL_LOG args = setup() fields = OPTIONS.fields.split(',') truthVCF = None #print("LENGTH OF ARGS "+str(len(args))) if OPTIONS.truth <> None: truthVCF = OPTIONS.truth TRUTH_CALLS = readTruth(truthVCF) if OPTIONS.recalLog <> None: RECAL_LOG = open(OPTIONS.recalLog, "w") print >> RECAL_LOG, "# optimized vcf", args[0] print >> RECAL_LOG, "# truth vcf", truthVCF for key, value in OPTIONS.__dict__.iteritems(): print >> RECAL_LOG, '#', key, value header, allCalls, titvTarget = assessCalls(args[0]) if not OPTIONS.dontRecalibrate: covariates = determineCovariates(allCalls, titvTarget, fields) header, callsToRecalibate = readVariants(args[0], OPTIONS.maxRecords, minQScore = OPTIONS.minQScore) RecalibratedCalls = recalibrateCalls(callsToRecalibate, fields, covariates) writeRecalibratedCalls(OPTIONS.outputVCF, header, RecalibratedCalls) else: printFieldQualHeader() printCallQuals("QUAL", allCalls, titvTarget) OPTIONS.outputVCF = args[0] if truthVCF <> None: evaluateTruth(header, OPTIONS.outputVCF, TRUTH_CALLS, truthVCF) PROFILE = False if __name__ == "__main__": if PROFILE: import cProfile cProfile.run('main()', 'fooprof') import pstats p = pstats.Stats('fooprof') p.sort_stats('cumulative').print_stats(10) p.sort_stats('time').print_stats(10) p.sort_stats('time', 'cum').print_stats(.5, 'init') else: main()