2009-06-21 00:00:23 +08:00
|
|
|
import farm_commands
|
|
|
|
|
import os.path
|
|
|
|
|
import sys
|
|
|
|
|
from optparse import OptionParser
|
|
|
|
|
from gatkConfigParser import *
|
|
|
|
|
import glob
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2009-06-21 03:50:37 +08:00
|
|
|
usage = """usage: %prog [-c config.cfg]* input.bam output.bam"""
|
2009-06-21 00:00:23 +08:00
|
|
|
|
|
|
|
|
parser = OptionParser(usage=usage)
|
|
|
|
|
parser.add_option("-A", "--args", dest="args",
|
|
|
|
|
type="string", default="",
|
|
|
|
|
help="arguments to GATK")
|
2009-06-24 09:12:35 +08:00
|
|
|
parser.add_option("-m", "--mode", dest="RecalMode",
|
2009-06-21 00:00:23 +08:00
|
|
|
type="string", default="",
|
2009-06-24 09:12:35 +08:00
|
|
|
help="Mode argument to provide to table recalibrator")
|
2009-06-21 00:00:23 +08:00
|
|
|
parser.add_option("-q", "--farm", dest="farmQueue",
|
|
|
|
|
type="string", default=None,
|
|
|
|
|
help="Farm queue to send processing jobs to")
|
2009-06-21 03:50:37 +08:00
|
|
|
parser.add_option("-c", "--config", dest="configs",
|
|
|
|
|
action="append", type="string", default=[],
|
|
|
|
|
help="Configuration file")
|
2009-06-21 00:00:23 +08:00
|
|
|
parser.add_option("-d", "--dir", dest="scratchDir",
|
|
|
|
|
type="string", default="test",
|
|
|
|
|
help="Output directory")
|
|
|
|
|
parser.add_option("", "--dry", dest="dry",
|
|
|
|
|
action='store_true', default=False,
|
|
|
|
|
help="If provided, nothing actually gets run, just a dry run")
|
|
|
|
|
parser.add_option("", "--plot", dest="plot",
|
|
|
|
|
action='store_true', default=False,
|
|
|
|
|
help="If provided, will call R to generate convenient plots about the Q scores of the pre and post calibrated files")
|
|
|
|
|
parser.add_option("-i", "--ignoreExistingFiles", dest="ignoreExistingFiles",
|
|
|
|
|
action='store_true', default=False,
|
|
|
|
|
help="Ignores already written files, if present")
|
|
|
|
|
|
|
|
|
|
(OPTIONS, args) = parser.parse_args()
|
2009-06-21 03:50:37 +08:00
|
|
|
if len(args) != 2:
|
|
|
|
|
parser.error("incorrect number of arguments")
|
2009-06-21 00:00:23 +08:00
|
|
|
|
2009-06-21 03:50:37 +08:00
|
|
|
config = gatkConfigParser(OPTIONS.configs)
|
|
|
|
|
inputBAM = args[0]
|
|
|
|
|
outputBAM = args[1]
|
2009-06-21 00:00:23 +08:00
|
|
|
rootname = os.path.split(os.path.splitext(outputBAM)[0])[1]
|
|
|
|
|
|
|
|
|
|
covariateRoot = os.path.join(OPTIONS.scratchDir, rootname)
|
|
|
|
|
covariateInitial = covariateRoot + '.init'
|
2009-06-24 09:12:35 +08:00
|
|
|
initDataFile = covariateInitial + '.recal_data.csv'
|
2009-06-21 00:00:23 +08:00
|
|
|
covariateRecal = covariateRoot + '.recal'
|
2009-06-24 09:12:35 +08:00
|
|
|
recalDataFile = covariateRecal + '.recal_data.csv'
|
2009-06-21 00:00:23 +08:00
|
|
|
|
|
|
|
|
if not os.path.exists(OPTIONS.scratchDir):
|
|
|
|
|
os.mkdir(OPTIONS.scratchDir)
|
|
|
|
|
|
2009-06-24 09:12:35 +08:00
|
|
|
def covariateCmd(bam, outputDir):
|
2009-06-21 00:00:23 +08:00
|
|
|
add = " -I %s --OUTPUT_FILEROOT %s" % (bam, outputDir)
|
|
|
|
|
return config.gatkCmd('CountCovariates') + add
|
|
|
|
|
|
|
|
|
|
def recalibrateCmd(inputBAM, dataFile, outputBAM):
|
2009-06-24 09:12:35 +08:00
|
|
|
return config.gatkCmd('TableRecalibration') + " -I %s -params %s -outputBAM %s -mode %s" % (inputBAM, dataFile, outputBAM, OPTIONS.RecalMode)
|
2009-06-21 00:00:23 +08:00
|
|
|
|
2009-06-24 09:12:35 +08:00
|
|
|
def runCovariateCmd(inputBAM, dataFile, dir, jobid):
|
2009-06-21 00:00:23 +08:00
|
|
|
if OPTIONS.ignoreExistingFiles or not os.path.exists(dataFile):
|
2009-06-24 09:12:35 +08:00
|
|
|
cmd = covariateCmd(inputBAM, dir)
|
2009-06-21 00:00:23 +08:00
|
|
|
return farm_commands.cmd(cmd, OPTIONS.farmQueue, None, just_print_commands = OPTIONS.dry, waitID = jobid)
|
|
|
|
|
|
2009-06-21 03:50:37 +08:00
|
|
|
#
|
|
|
|
|
# Actually do some work here
|
|
|
|
|
#
|
|
|
|
|
jobid = None
|
2009-06-24 09:12:35 +08:00
|
|
|
if OPTIONS.ignoreExistingFiles or not os.path.exists(initDataFile):
|
|
|
|
|
jobid = runCovariateCmd(inputBAM, initDataFile, covariateInitial, jobid)
|
2009-06-21 03:50:37 +08:00
|
|
|
|
|
|
|
|
if OPTIONS.ignoreExistingFiles or not os.path.exists(outputBAM):
|
|
|
|
|
cmd = recalibrateCmd(inputBAM, initDataFile, outputBAM)
|
|
|
|
|
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, None, just_print_commands = OPTIONS.dry, waitID = jobid)
|
|
|
|
|
jobid = farm_commands.cmd('samtools index ' + outputBAM, OPTIONS.farmQueue, None, just_print_commands = OPTIONS.dry, waitID = jobid)
|
2009-06-21 00:00:23 +08:00
|
|
|
|
2009-06-24 09:12:35 +08:00
|
|
|
jobid = runCovariateCmd(outputBAM, recalDataFile, covariateRecal, jobid)
|
2009-06-21 00:00:23 +08:00
|
|
|
|