gatk-3.8/python/recalAssociation.py

91 lines
2.9 KiB
Python
Raw Normal View History

from optparse import OptionParser
from math import log10
from math import floor
def foreach( appl, col ):
for z in col:
appl(z)
def parseWiggle(line):
return int(line.strip())
def parseTDF(line):
return int(line.split("Q: ")[1].strip())
def recalibrateJointly(files,JECDF):
print("Not implemented.")
def recalibrate(fname,hist,out):
print(fname)
output = open(out + "." + fname.rsplit(".",1)[0].rsplit(".",1)[1], 'w')
denom = sum(hist)
cumQuals = dict()
for key1 in hist:
sumLess = 0
for key2 in hist:
if ( key2 <= key1 ):
sumLess += hist[key2]
cumQuals[key1] = int(-10*log10((0.0+sumLess)/denom))
use = parseTDF
if ( fname.endswith(".wig") ):
use = parseWiggle
inFile = open(fname)
if ( fname.endswith(".wig") ):
output.write(inFile.readline())
def rewrite(line,val):
if ( line.find("Q: ") == -1 ):
return str(val)+"\n"
else:
spline = line.split("Q: ")
bef = spline[0]
af = spline[1]
afSp = af.split("\t")
afSp[0] = str(val)
return bef + "\t".join(afSp)
foreach( lambda u: output.write(rewrite(u,cumQuals[use(u)])), inFile.readlines() )
def run(opt,arg):
print(opt)
print(arg)
files = map(lambda u: open(u), opt.in_files)
fn = dict(zip(files,opt.in_files))
foreach( lambda u: u[1].readline(), filter( lambda u: u[0].endswith('.wig'), zip(opt.in_files,files)))
isWig = map(lambda u: u.endswith('.wig'), opt.in_files)
isWig = dict(zip(files,isWig))
marginalCumulativeDists = dict(map( lambda u: [u,dict()], opt.in_files))
def addVal(f,v):
d = marginalCumulativeDists[fn[f]]
if ( v not in d ):
d[v] = 0
d[v] += 1
def parseLine(fl,line):
if ( isWig[fl] ):
return parseWiggle(line)
else:
return parseTDF(line)
foreach( lambda f: foreach( lambda v: addVal(f,v), map(lambda u: parseLine(f,u), f.readlines() )), files )
foreach( lambda f: f.close(), files )
#print(marginalCumulativeDists)
if ( opt.joint ):
recalibrateJointly(opt.in_files,marginalCumulativeDists)
else:
foreach( lambda s: recalibrate(s,marginalCumulativeDists[s],opt.out), opt.in_files )
def main():
usage = "usage: %prog [options] arg"
parser = OptionParser(usage)
parser.add_option("-I","--in",dest="in_files",help="file(s) to recalibrate",action="append")
parser.add_option("-J","--recalibrateJointly",dest="joint",action="store_true",help="Recalibrate quality scores jointly across files, rather than for each independently. Assumes lines match up exactly.")
parser.add_option("-O","--output",dest="out",action="store",help="The base name for the recalibrated output files")
(options,args) = parser.parse_args()
run(options,args)
if __name__ == "__main__":
main()