2011-03-12 01:32:45 +08:00
from optparse import OptionParser
from math import log10
from math import floor
def foreach ( appl , col ) :
for z in col :
appl ( z )
def parseWiggle ( line ) :
return int ( line . strip ( ) )
def parseTDF ( line ) :
return int ( line . split ( " Q: " ) [ 1 ] . strip ( ) )
def recalibrateJointly ( files , JECDF ) :
print ( " Not implemented. " )
def recalibrate ( fname , hist , out ) :
print ( fname )
output = open ( out + " . " + fname . rsplit ( " . " , 1 ) [ 0 ] . rsplit ( " . " , 1 ) [ 1 ] , ' w ' )
2011-03-12 01:34:27 +08:00
denom = sum ( hist . values ( ) )
2011-03-12 01:32:45 +08:00
cumQuals = dict ( )
for key1 in hist :
2011-03-12 01:37:40 +08:00
sumMore = 0
2011-03-12 01:32:45 +08:00
for key2 in hist :
2011-03-12 01:37:40 +08:00
if ( key2 > = key1 ) :
2011-03-12 01:36:41 +08:00
sumMore + = hist [ key2 ]
cumQuals [ key1 ] = min ( int ( - 10 * log10 ( ( 0.0 + sumMore ) / denom ) ) , 150 )
2011-03-12 01:32:45 +08:00
use = parseTDF
if ( fname . endswith ( " .wig " ) ) :
use = parseWiggle
inFile = open ( fname )
if ( fname . endswith ( " .wig " ) ) :
output . write ( inFile . readline ( ) )
def rewrite ( line , val ) :
if ( line . find ( " Q: " ) == - 1 ) :
return str ( val ) + " \n "
else :
spline = line . split ( " Q: " )
bef = spline [ 0 ]
af = spline [ 1 ]
afSp = af . split ( " \t " )
afSp [ 0 ] = str ( val )
return bef + " \t " . join ( afSp )
foreach ( lambda u : output . write ( rewrite ( u , cumQuals [ use ( u ) ] ) ) , inFile . readlines ( ) )
def run ( opt , arg ) :
print ( opt )
print ( arg )
files = map ( lambda u : open ( u ) , opt . in_files )
fn = dict ( zip ( files , opt . in_files ) )
foreach ( lambda u : u [ 1 ] . readline ( ) , filter ( lambda u : u [ 0 ] . endswith ( ' .wig ' ) , zip ( opt . in_files , files ) ) )
isWig = map ( lambda u : u . endswith ( ' .wig ' ) , opt . in_files )
isWig = dict ( zip ( files , isWig ) )
marginalCumulativeDists = dict ( map ( lambda u : [ u , dict ( ) ] , opt . in_files ) )
def addVal ( f , v ) :
d = marginalCumulativeDists [ fn [ f ] ]
if ( v not in d ) :
d [ v ] = 0
d [ v ] + = 1
def parseLine ( fl , line ) :
if ( isWig [ fl ] ) :
return parseWiggle ( line )
else :
return parseTDF ( line )
foreach ( lambda f : foreach ( lambda v : addVal ( f , v ) , map ( lambda u : parseLine ( f , u ) , f . readlines ( ) ) ) , files )
foreach ( lambda f : f . close ( ) , files )
#print(marginalCumulativeDists)
if ( opt . joint ) :
recalibrateJointly ( opt . in_files , marginalCumulativeDists )
else :
foreach ( lambda s : recalibrate ( s , marginalCumulativeDists [ s ] , opt . out ) , opt . in_files )
def main ( ) :
usage = " usage: % prog [options] arg "
parser = OptionParser ( usage )
parser . add_option ( " -I " , " --in " , dest = " in_files " , help = " file(s) to recalibrate " , action = " append " )
parser . add_option ( " -J " , " --recalibrateJointly " , dest = " joint " , action = " store_true " , help = " Recalibrate quality scores jointly across files, rather than for each independently. Assumes lines match up exactly. " )
parser . add_option ( " -O " , " --output " , dest = " out " , action = " store " , help = " The base name for the recalibrated output files " )
( options , args ) = parser . parse_args ( )
run ( options , args )
if __name__ == " __main__ " :
main ( )