Much improved python analysis routines, as well as easier / more correct merging utility. Better R scripts, which now close recalibration data by the confidence of the quality score itself
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1081 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ef546868bf
commit
caf5aef0f8
|
|
@ -63,7 +63,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
jobid = None
|
jobid = None
|
||||||
if OPTIONS.ignoreExistingFiles or not os.path.exists(spec.getMergedBAM()):
|
if OPTIONS.ignoreExistingFiles or not os.path.exists(spec.getMergedBAM()):
|
||||||
output = spec.getMergedBase() + '.stdout'
|
output = spec.getMergedBase()
|
||||||
cmd = spec.mergeCmd(OPTIONS.mergeBin, useSamtools = OPTIONS.useSamtools)
|
cmd = spec.mergeCmd(OPTIONS.mergeBin, useSamtools = OPTIONS.useSamtools)
|
||||||
#print cmd
|
#print cmd
|
||||||
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, output, just_print_commands = OPTIONS.dry)
|
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, output, just_print_commands = OPTIONS.dry)
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ class MergeFilesSpec:
|
||||||
|
|
||||||
def filename(self):
|
def filename(self):
|
||||||
if self.merged_filename_base <> None:
|
if self.merged_filename_base <> None:
|
||||||
return self.merged_filename_base + '.' + self.group()
|
return self.merged_filename_base + '.'.join([self.group()])
|
||||||
else:
|
else:
|
||||||
return self.group()
|
return self.group()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,14 +10,17 @@ from itertools import *
|
||||||
import math
|
import math
|
||||||
import operator
|
import operator
|
||||||
|
|
||||||
|
MAX_QUAL_SCORE = 50
|
||||||
|
|
||||||
def phredQScore( nMismatches, nBases ):
|
def phredQScore( nMismatches, nBases ):
|
||||||
#print 'phredQScore', nMismatches, nBases
|
#print 'phredQScore', nMismatches, nBases
|
||||||
if nMismatches == 0:
|
if nMismatches == 0:
|
||||||
return 40
|
return MAX_QUAL_SCORE
|
||||||
elif nBases == 0:
|
elif nBases == 0:
|
||||||
return 0
|
return 0
|
||||||
else:
|
else:
|
||||||
return -10 * math.log10(float(nMismatches) / nBases)
|
return min(-10 * math.log10(float(nMismatches) / nBases), MAX_QUAL_SCORE)
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
def phredScore2ErrorProp(qual):
|
def phredScore2ErrorProp(qual):
|
||||||
|
|
@ -30,16 +33,16 @@ def tryByInt(s):
|
||||||
except:
|
except:
|
||||||
return s
|
return s
|
||||||
|
|
||||||
expectedHeader = 'rg,dn,Qrep,pos,NBases,MMismatches,Qemp'.split(',')
|
expectedHeader = 'rg,pos,Qrep,dn,nBases,nMismatches,Qemp'.split(',')
|
||||||
defaultValues = '0,**,0,0,0,0,0'.split(',')
|
defaultValues = '0,0,0,**,0,0,0'.split(',')
|
||||||
class RecalData(dict):
|
class RecalData(dict):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.parse(expectedHeader, defaultValues)
|
self.parse(expectedHeader, defaultValues)
|
||||||
|
|
||||||
def parse(self, header, data):
|
def parse(self, header, data):
|
||||||
# rg,dn,Qrep,pos,NBases,MMismatches,Qemp
|
# rg,pos,Qrep,dn,NBases,MMismatches,Qemp
|
||||||
types = [str, str, int, tryByInt, int, int, int]
|
types = [str, tryByInt, int, str, int, int, int]
|
||||||
for head, expected, datum, type in zip(header, expectedHeader, data, types):
|
for head, expected, datum, type in zip(header, expectedHeader, data, types):
|
||||||
if head <> expected:
|
if head <> expected:
|
||||||
raise ("Unexpected header in rawData %s %s %s" % (head, expected, datum))
|
raise ("Unexpected header in rawData %s %s %s" % (head, expected, datum))
|
||||||
|
|
@ -59,32 +62,41 @@ class RecalData(dict):
|
||||||
def readGroup(self): return self.rg
|
def readGroup(self): return self.rg
|
||||||
def dinuc(self): return self.dn
|
def dinuc(self): return self.dn
|
||||||
def qReported(self): return self.Qrep
|
def qReported(self): return self.Qrep
|
||||||
def qEmpirical(self): return self.Qemp
|
|
||||||
def cycle(self): return self.pos
|
def cycle(self): return self.pos
|
||||||
def nBases(self): return self.NBases
|
def getNBases(self): return self.nBases
|
||||||
def nMismatches(self): return self.MMismatches
|
def getNMismatches(self): return self.nMismatches
|
||||||
def nExpectedMismatches(self): return self.nBases() * phredScore2ErrorProp(self.qReported())
|
def nExpectedMismatches(self): return self.getNBases() * phredScore2ErrorProp(self.qReported())
|
||||||
|
|
||||||
|
|
||||||
|
def qEmpirical(self):
|
||||||
|
#if OPTIONS.raw:
|
||||||
|
return self.Qemp
|
||||||
|
#else:
|
||||||
|
# r = phredQScore(self.getNMismatches() + 1, self.getNBases() + 1)
|
||||||
|
# #print 'Using yates corrected Q scores', self.getNMismatches(), self.getNBases(), self.getNMismatches() + 1, self.getNBases() + 1, self.Qemp, r, r - self.Qemp
|
||||||
|
# return r
|
||||||
|
|
||||||
|
|
||||||
def combine(self, moreData):
|
def combine(self, moreData):
|
||||||
# grab useful info
|
# grab useful info
|
||||||
sumErrors = self.nExpectedMismatches()
|
sumErrors = self.nExpectedMismatches()
|
||||||
for datum in moreData:
|
for datum in moreData:
|
||||||
self.NBases += datum.nBases()
|
self.nBases += datum.getNBases()
|
||||||
self.MMismatches += datum.nMismatches()
|
self.nMismatches += datum.getNMismatches()
|
||||||
sumErrors += datum.nExpectedMismatches()
|
sumErrors += datum.nExpectedMismatches()
|
||||||
self.updateQemp()
|
self.updateQemp()
|
||||||
self.Qrep = phredQScore(sumErrors, self.nBases())
|
self.Qrep = phredQScore(sumErrors, self.getNBases())
|
||||||
#print 'self.Qrep is now', self.Qrep
|
#print 'self.Qrep is now', self.Qrep
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def updateQemp(self):
|
def updateQemp(self):
|
||||||
newQemp = phredQScore( self.nMismatches(), self.nBases() )
|
newQemp = phredQScore( self.getNMismatches(), self.getNBases() )
|
||||||
#print 'Updating qEmp', self.Qemp, newQemp
|
#print 'Updating qEmp', self.Qemp, newQemp
|
||||||
self.Qemp = newQemp
|
self.Qemp = newQemp
|
||||||
return newQemp
|
return newQemp
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "[rg=%s cycle=%s dinuc=%s qrep=%.1f qemp=%.1f nbases=%d nmismatchs=%d]" % ( self.readGroup(), str(self.cycle()), self.dinuc(), self.qReported(), self.qEmpirical(), self.nBases(), self.nMismatches())
|
return "[rg=%s cycle=%s dinuc=%s qrep=%.1f qemp=%.1f nbases=%d nmismatchs=%d]" % ( self.readGroup(), str(self.cycle()), self.dinuc(), self.qReported(), self.qEmpirical(), self.getNBases(), self.getNMismatches())
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return self.__str__()
|
return self.__str__()
|
||||||
|
|
||||||
|
|
@ -157,7 +169,7 @@ def countQsOfMinQuality(thres, data):
|
||||||
return qDeclared, qDeclaredTrue
|
return qDeclared, qDeclaredTrue
|
||||||
|
|
||||||
def medianQreported(jaffe, allBases):
|
def medianQreported(jaffe, allBases):
|
||||||
i, ignore = medianByCounts(map( RecalData.nBases, jaffe ))
|
i, ignore = medianByCounts(map( RecalData.getNBases, jaffe ))
|
||||||
return jaffe[i].qReported()
|
return jaffe[i].qReported()
|
||||||
|
|
||||||
def medianByCounts(counts):
|
def medianByCounts(counts):
|
||||||
|
|
@ -170,7 +182,7 @@ def medianByCounts(counts):
|
||||||
return i, counts[i]
|
return i, counts[i]
|
||||||
|
|
||||||
def modeQreported(jaffe, allBases):
|
def modeQreported(jaffe, allBases):
|
||||||
ordered = sorted(jaffe, key=RecalData.nBases, reverse=True )
|
ordered = sorted(jaffe, key=RecalData.getNBases, reverse=True )
|
||||||
#print ordered
|
#print ordered
|
||||||
return ordered[0].qReported()
|
return ordered[0].qReported()
|
||||||
|
|
||||||
|
|
@ -202,21 +214,21 @@ def rmse(reportedList, empiricalList, counts):
|
||||||
diff = reported - empirical
|
diff = reported - empirical
|
||||||
inc = count * diff * diff
|
inc = count * diff * diff
|
||||||
sum += inc
|
sum += inc
|
||||||
|
#print reported, empirical, sum, inc, count, diff
|
||||||
|
#print sum, math.sqrt(sum)
|
||||||
return math.sqrt(sum)
|
return math.sqrt(sum)
|
||||||
|
|
||||||
def stdevQReported(jaffe, allBases):
|
def stdevQReported(jaffe, allBases):
|
||||||
mean = averageQreported(jaffe, allBases)
|
mean = averageQreported(jaffe, allBases)
|
||||||
return lsamplestdev(map( RecalData.qReported, jaffe ), map( RecalData.nBases, jaffe ), mean)
|
return lsamplestdev(map( RecalData.qReported, jaffe ), map( RecalData.getNBases, jaffe ), mean)
|
||||||
|
|
||||||
def coeffOfVariationQreported(jaffe, allBases):
|
def coeffOfVariationQreported(jaffe, allBases):
|
||||||
mean = averageQreported(jaffe, allBases)
|
mean = averageQreported(jaffe, allBases)
|
||||||
stdev = stdevQReported(jaffe, allBases)
|
stdev = stdevQReported(jaffe, allBases)
|
||||||
return stdev / mean
|
return stdev / mean
|
||||||
|
|
||||||
# o("variance_Qreported %2.2f" % varianceQreported(jaffe))
|
|
||||||
|
|
||||||
def rmseJaffe(jaffe):
|
def rmseJaffe(jaffe):
|
||||||
return rmse( map( RecalData.qReported, jaffe ), map( RecalData.qEmpirical, jaffe ), map( RecalData.nBases, jaffe ) )
|
return rmse( map( RecalData.qReported, jaffe ), map( RecalData.qEmpirical, jaffe ), map( RecalData.getNBases, jaffe ) )
|
||||||
|
|
||||||
def basicQualScoreStats(readGroup, data, output ):
|
def basicQualScoreStats(readGroup, data, output ):
|
||||||
def o(s):
|
def o(s):
|
||||||
|
|
@ -227,8 +239,8 @@ def basicQualScoreStats(readGroup, data, output ):
|
||||||
o("read_group %s" % rg)
|
o("read_group %s" % rg)
|
||||||
#o("number_of_cycles %d" % 0)
|
#o("number_of_cycles %d" % 0)
|
||||||
#o("maximum_reported_quality_score %d" % 0)
|
#o("maximum_reported_quality_score %d" % 0)
|
||||||
o("number_of_bases %d" % allBases.nBases())
|
o("number_of_bases %d" % allBases.getNBases())
|
||||||
o("number_of_mismatching_bases %d" % allBases.nMismatches())
|
o("number_of_mismatching_bases %d" % allBases.getNMismatches())
|
||||||
o("lane_wide_Qreported %2.2f" % allBases.qReported())
|
o("lane_wide_Qreported %2.2f" % allBases.qReported())
|
||||||
o("lane_wide_Qempirical %2.2f" % allBases.qEmpirical())
|
o("lane_wide_Qempirical %2.2f" % allBases.qEmpirical())
|
||||||
o("lane_wide_Qempirical_minus_Qreported %2.2f" % (allBases.qEmpirical()-allBases.qReported()))
|
o("lane_wide_Qempirical_minus_Qreported %2.2f" % (allBases.qEmpirical()-allBases.qReported()))
|
||||||
|
|
@ -241,21 +253,22 @@ def basicQualScoreStats(readGroup, data, output ):
|
||||||
o("stdev_Qreported %2.2f" % stdevQReported(jaffe, allBases))
|
o("stdev_Qreported %2.2f" % stdevQReported(jaffe, allBases))
|
||||||
o("coeff_of_variation_Qreported %2.2f" % coeffOfVariationQreported(jaffe, allBases))
|
o("coeff_of_variation_Qreported %2.2f" % coeffOfVariationQreported(jaffe, allBases))
|
||||||
|
|
||||||
o("RMSE(qReported,qEmpirical) %2.2f" % rmseJaffe(jaffe))
|
o("RMSE_qReported_qEmpirical %2.2f" % rmseJaffe(jaffe))
|
||||||
for thres in [20, 25, 30]:
|
for thres in [20, 25, 30]:
|
||||||
qDeclared, qDeclaredTrue = countQsOfMinQuality(thres, jaffe)
|
qDeclared, qDeclaredTrue = countQsOfMinQuality(thres, jaffe)
|
||||||
o("number_of_q%d_bases %d" % (thres, qDeclared.nBases()))
|
o("number_of_q%d+_bases %d" % (thres, qDeclared.getNBases()))
|
||||||
o("percent_of_q%d_bases %2.2f" % (thres, 100 * qDeclared.nBases() / float(allBases.nBases())))
|
o("percent_of_q%d+_bases %2.2f" % (thres, 100 * qDeclared.getNBases() / float(allBases.getNBases())))
|
||||||
o("number_of_q%d_bases_with_qemp_above_q%d %d" % (thres, thres, qDeclaredTrue.nBases()))
|
o("number_of_q%d+_bases_with_qemp_above_q%d %d" % (thres, thres, qDeclaredTrue.getNBases()))
|
||||||
o("percent_of_q%d_bases_with_qemp_above_q%d %2.2f" % (thres, thres, 100 * qDeclaredTrue.nBases() / float(allBases.nBases())))
|
o("percent_of_q%d+_bases_with_qemp_above_q%d %2.2f" % (thres, thres, 100 * qDeclaredTrue.getNBases() / float(allBases.getNBases())))
|
||||||
|
|
||||||
def qDiffByCycle(readGroup, allData, output):
|
def qDiffByCycle(readGroup, allData, output):
|
||||||
|
#print '#### qDiffByCycle ####'
|
||||||
print >> output, '# Note Qreported is a float here due to combining Qreported across quality bins -- Qreported is the expected Q across all Q bins, weighted by nBases'
|
print >> output, '# Note Qreported is a float here due to combining Qreported across quality bins -- Qreported is the expected Q across all Q bins, weighted by nBases'
|
||||||
print >> output, 'Cycle Qreported Qempirical Qempirical_Qreported nMismatches nBases'
|
print >> output, 'Cycle Qreported Qempirical Qempirical_Qreported nMismatches nBases'
|
||||||
for cycle, datum in groupRecalData(allData, key=RecalData.cycle):
|
for cycle, datum in groupRecalData(allData, key=RecalData.cycle):
|
||||||
datum.set(['rg', 'dn', 'pos'], [readGroup, '**', cycle])
|
datum.set(['rg', 'dn', 'pos'], [readGroup, '**', cycle])
|
||||||
diff = datum.qEmpirical() - datum.qReported()
|
diff = datum.qEmpirical() - datum.qReported()
|
||||||
print >> output, "%s %2.2f %2.2f %2.2f %12d %12d" % (datum.cycle(), datum.qReported(), datum.qEmpirical(), diff, datum.nMismatches(), datum.nBases())
|
print >> output, "%s %2.2f %2.2f %2.2f %12d %12d" % (datum.cycle(), datum.qReported(), datum.qEmpirical(), diff, datum.getNMismatches(), datum.getNBases())
|
||||||
|
|
||||||
def qDiffByDinuc(readGroup, allData, output):
|
def qDiffByDinuc(readGroup, allData, output):
|
||||||
print >> output, '# Note Qreported is a float here due to combining Qreported across quality bins -- Qreported is the expected Q across all Q bins, weighted by nBases'
|
print >> output, '# Note Qreported is a float here due to combining Qreported across quality bins -- Qreported is the expected Q across all Q bins, weighted by nBases'
|
||||||
|
|
@ -263,7 +276,7 @@ def qDiffByDinuc(readGroup, allData, output):
|
||||||
for dinuc, datum in groupRecalData(allData, key=RecalData.dinuc):
|
for dinuc, datum in groupRecalData(allData, key=RecalData.dinuc):
|
||||||
datum.set(['rg', 'dn', 'pos'], [readGroup, dinuc, '*'])
|
datum.set(['rg', 'dn', 'pos'], [readGroup, dinuc, '*'])
|
||||||
diff = datum.qEmpirical() - datum.qReported()
|
diff = datum.qEmpirical() - datum.qReported()
|
||||||
print >> output, "%s %2.2f %2.2f %2.2f %12d %12d" % (datum.dinuc(), datum.qReported(), datum.qEmpirical(), diff, datum.nMismatches(), datum.nBases())
|
print >> output, "%s %2.2f %2.2f %2.2f %12d %12d" % (datum.dinuc(), datum.qReported(), datum.qEmpirical(), diff, datum.getNMismatches(), datum.getNBases())
|
||||||
|
|
||||||
def qReportedVsqEmpiricalStream(readGroup, data):
|
def qReportedVsqEmpiricalStream(readGroup, data):
|
||||||
for key, datum in groupRecalData(data, key=RecalData.qReported):
|
for key, datum in groupRecalData(data, key=RecalData.qReported):
|
||||||
|
|
@ -273,7 +286,9 @@ def qReportedVsqEmpiricalStream(readGroup, data):
|
||||||
def qReportedVsqEmpirical(readGroup, allData, output):
|
def qReportedVsqEmpirical(readGroup, allData, output):
|
||||||
print >> output, 'Qreported Qempirical nMismatches nBases'
|
print >> output, 'Qreported Qempirical nMismatches nBases'
|
||||||
for key, datum in qReportedVsqEmpiricalStream(readGroup, allData):
|
for key, datum in qReportedVsqEmpiricalStream(readGroup, allData):
|
||||||
print >> output, "%2.2f %2.2f %12d %12d" % (datum.qReported(), datum.qEmpirical(), datum.nMismatches(), datum.nBases())
|
#if datum.qReported() > 35:
|
||||||
|
# print datum
|
||||||
|
print >> output, "%2.2f %2.2f %12d %12d" % (datum.qReported(), datum.qEmpirical(), datum.getNMismatches(), datum.getNBases())
|
||||||
|
|
||||||
def analyzeRawData(rawDataFile):
|
def analyzeRawData(rawDataFile):
|
||||||
for readGroup, data in rawDataByReadGroup(rawDataFile):
|
for readGroup, data in rawDataByReadGroup(rawDataFile):
|
||||||
|
|
@ -303,6 +318,7 @@ def analyzeFiles(files):
|
||||||
#print 'analyzeFiles', files
|
#print 'analyzeFiles', files
|
||||||
Rscript = config.getOption('R', 'Rscript', 'input_file')
|
Rscript = config.getOption('R', 'Rscript', 'input_file')
|
||||||
for file in files:
|
for file in files:
|
||||||
|
print 'Analyzing file', file
|
||||||
plotter = getPlotterForFile(file)
|
plotter = getPlotterForFile(file)
|
||||||
if plotter <> None:
|
if plotter <> None:
|
||||||
cmd = ' '.join([Rscript, plotter, file])
|
cmd = ' '.join([Rscript, plotter, file])
|
||||||
|
|
@ -328,6 +344,9 @@ def main():
|
||||||
parser.add_option("", "--dry", dest="dry",
|
parser.add_option("", "--dry", dest="dry",
|
||||||
action='store_true', default=False,
|
action='store_true', default=False,
|
||||||
help="If provided, nothing actually gets run, just a dry run")
|
help="If provided, nothing actually gets run, just a dry run")
|
||||||
|
#parser.add_option("-r", "--raw", dest="raw",
|
||||||
|
# action='store_true', default=False,
|
||||||
|
# help="If provided, analyze data w.r.t. the raw empirical qulaity scores # mmismatches / # bases, as opposed to the Yates correction of +1 to each")
|
||||||
parser.add_option("-g", "--readGroup", dest="selectedReadGroups",
|
parser.add_option("-g", "--readGroup", dest="selectedReadGroups",
|
||||||
action="append", type="string", default=[],
|
action="append", type="string", default=[],
|
||||||
help="If provided, only the provided read groups will be analyzed")
|
help="If provided, only the provided read groups will be analyzed")
|
||||||
|
|
|
||||||
|
|
@ -12,9 +12,9 @@ if __name__ == "__main__":
|
||||||
parser.add_option("-A", "--args", dest="args",
|
parser.add_option("-A", "--args", dest="args",
|
||||||
type="string", default="",
|
type="string", default="",
|
||||||
help="arguments to GATK")
|
help="arguments to GATK")
|
||||||
parser.add_option("-C", "--CovariateArgs", dest="CovariateArgs",
|
parser.add_option("-m", "--mode", dest="RecalMode",
|
||||||
type="string", default="",
|
type="string", default="",
|
||||||
help="arguments to GATK")
|
help="Mode argument to provide to table recalibrator")
|
||||||
parser.add_option("-q", "--farm", dest="farmQueue",
|
parser.add_option("-q", "--farm", dest="farmQueue",
|
||||||
type="string", default=None,
|
type="string", default=None,
|
||||||
help="Farm queue to send processing jobs to")
|
help="Farm queue to send processing jobs to")
|
||||||
|
|
@ -45,37 +45,36 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
covariateRoot = os.path.join(OPTIONS.scratchDir, rootname)
|
covariateRoot = os.path.join(OPTIONS.scratchDir, rootname)
|
||||||
covariateInitial = covariateRoot + '.init'
|
covariateInitial = covariateRoot + '.init'
|
||||||
initDataFile = covariateInitial + '.raw_data.csv'
|
initDataFile = covariateInitial + '.recal_data.csv'
|
||||||
covariateRecal = covariateRoot + '.recal'
|
covariateRecal = covariateRoot + '.recal'
|
||||||
recalDataFile = covariateRecal + '.raw_data.csv'
|
recalDataFile = covariateRecal + '.recal_data.csv'
|
||||||
|
|
||||||
if not os.path.exists(OPTIONS.scratchDir):
|
if not os.path.exists(OPTIONS.scratchDir):
|
||||||
os.mkdir(OPTIONS.scratchDir)
|
os.mkdir(OPTIONS.scratchDir)
|
||||||
|
|
||||||
def covariateCmd(bam, outputDir, ignoreAdds):
|
def covariateCmd(bam, outputDir):
|
||||||
add = " -I %s --OUTPUT_FILEROOT %s" % (bam, outputDir)
|
add = " -I %s --OUTPUT_FILEROOT %s" % (bam, outputDir)
|
||||||
if not ignoreAdds:
|
|
||||||
add += " " + OPTIONS.CovariateArgs
|
|
||||||
return config.gatkCmd('CountCovariates') + add
|
return config.gatkCmd('CountCovariates') + add
|
||||||
|
|
||||||
def recalibrateCmd(inputBAM, dataFile, outputBAM):
|
def recalibrateCmd(inputBAM, dataFile, outputBAM):
|
||||||
return config.gatkCmd('TableRecalibration') + " -I %s -params %s -outputBAM %s" % (inputBAM, dataFile, outputBAM)
|
return config.gatkCmd('TableRecalibration') + " -I %s -params %s -outputBAM %s -mode %s" % (inputBAM, dataFile, outputBAM, OPTIONS.RecalMode)
|
||||||
|
|
||||||
def runCovariateCmd(inputBAM, dataFile, dir, jobid, ignoreAdds = False):
|
def runCovariateCmd(inputBAM, dataFile, dir, jobid):
|
||||||
if OPTIONS.ignoreExistingFiles or not os.path.exists(dataFile):
|
if OPTIONS.ignoreExistingFiles or not os.path.exists(dataFile):
|
||||||
cmd = covariateCmd(inputBAM, dir, ignoreAdds)
|
cmd = covariateCmd(inputBAM, dir)
|
||||||
return farm_commands.cmd(cmd, OPTIONS.farmQueue, None, just_print_commands = OPTIONS.dry, waitID = jobid)
|
return farm_commands.cmd(cmd, OPTIONS.farmQueue, None, just_print_commands = OPTIONS.dry, waitID = jobid)
|
||||||
|
|
||||||
#
|
#
|
||||||
# Actually do some work here
|
# Actually do some work here
|
||||||
#
|
#
|
||||||
jobid = None
|
jobid = None
|
||||||
jobid = runCovariateCmd(inputBAM, initDataFile, covariateInitial, jobid, False)
|
if OPTIONS.ignoreExistingFiles or not os.path.exists(initDataFile):
|
||||||
|
jobid = runCovariateCmd(inputBAM, initDataFile, covariateInitial, jobid)
|
||||||
|
|
||||||
if OPTIONS.ignoreExistingFiles or not os.path.exists(outputBAM):
|
if OPTIONS.ignoreExistingFiles or not os.path.exists(outputBAM):
|
||||||
cmd = recalibrateCmd(inputBAM, initDataFile, outputBAM)
|
cmd = recalibrateCmd(inputBAM, initDataFile, outputBAM)
|
||||||
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, None, just_print_commands = OPTIONS.dry, waitID = jobid)
|
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, None, just_print_commands = OPTIONS.dry, waitID = jobid)
|
||||||
jobid = farm_commands.cmd('samtools index ' + outputBAM, OPTIONS.farmQueue, None, just_print_commands = OPTIONS.dry, waitID = jobid)
|
jobid = farm_commands.cmd('samtools index ' + outputBAM, OPTIONS.farmQueue, None, just_print_commands = OPTIONS.dry, waitID = jobid)
|
||||||
|
|
||||||
jobid = runCovariateCmd(outputBAM, recalDataFile, covariateRecal, jobid, True)
|
jobid = runCovariateCmd(outputBAM, recalDataFile, covariateRecal, jobid)
|
||||||
|
|
||||||
Loading…
Reference in New Issue