gatk-3.8/python/StressTestGATK.py

67 lines
2.3 KiB
Python
Executable File

import farm_commands
import os.path
import sys
import getopt
defaultCommands = ['CountReads', 'Pileup']
def usage():
print "Optional arguments:"
print " -f QUEUE Farm jobs to QUEUE on LSF"
print " -c cmd1,cmd2 Walkers to execute, otherwise", ' '.join(defaultCommands)
print " -e Ignore existing files", ' '.join(defaultCommands)
if __name__ == "__main__":
opts = None
try:
opts, args = getopt.getopt(sys.argv[1:], "f:c:a:e", ["farm", "commands", "args", "ignoreExistingFiles"])
except getopt.GetoptError:
print sys.argv
usage()
sys.exit(2)
farm_sub = False
commandsList = defaultCommands
ignoreExistingFiles = False
extraArgs = ''
for opt, arg in opts:
if opt in ("-f", "--farm"):
farm_sub = arg
if opt in ("-c", "--commands"):
commandsList = arg.split(',')
if opt in ("-e", "--ignoreExistingFiles"):
ignoreExistingFiles = True
if opt in ("-a", "--args"):
extraArgs = arg
directory = args[1]
for line in open(args[0]):
lineParts = line.split()
lane = lineParts[0].strip()
ref = '/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta'
if ( len(lineParts) > 1 ):
ref = lineParts[1].strip()
if not os.path.exists(lane):
print 'Input SAM/BAM file: "', lane, '" does not exist, skipping...'
continue
head, lane_filename = os.path.split(lane)
filebase = os.path.splitext(lane_filename)[0]
# make the pileup
# 503 8:03 samtools view -b MV1994.bam Escherichia_coli_K12:10-1000 > sub.bam
# 504 8:03 samtools pileup -f Escherichia_coli_K12_MG1655.fasta sub.bam
# convert the fasta
for analysis in commandsList:
output = os.path.join(directory, filebase + '.' + analysis + '.output')
if ignoreExistingFiles or not os.path.exists(output):
cmd = "java -jar ~/dev/GenomeAnalysisTK/trunk/dist/GenomeAnalysisTK.jar -T " + analysis + " -I " + lane + " -R " + ref + " -o " + output + " -l INFO " + extraArgs
print cmd
farm_commands.cmd(cmd, farm_sub, output)