Shouldn't be in the tree
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@9 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
17aabb38f9
commit
e892c3fd98
|
|
@ -1,202 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import getopt, sys, os, string
|
||||
|
||||
FastaQuals2Fastq_exe = "/wga/dev/andrewk/Arachne/AlignerEvaluation/FastaQuals2Fastq.py"
|
||||
|
||||
def cmd(cmd_str, farm_queue=False, output_head=""):
|
||||
# if farm_queue is non-False, submits to queue, other
|
||||
|
||||
if farm_queue:
|
||||
farm_stdout = output_head+".stdout"
|
||||
cmd_str = "bsub -q "+farm_queue+" -o "+farm_stdout+" "+cmd_str #+" TMP_DIR=/wga/scr1/andrewk/tmp"
|
||||
print "### Farming via "+cmd_str
|
||||
else:
|
||||
print "### Executing "+cmd_str
|
||||
|
||||
if not justPrintCommands:
|
||||
# Actually execute the command if we're not just in debugging output mode
|
||||
os.system(cmd_str)
|
||||
|
||||
def isFastaB(filename):
|
||||
"""Is the file a fastb file already?"""
|
||||
#print os.path.splitext(filename)
|
||||
return os.path.splitext(filename)[1] == '.fastb'
|
||||
|
||||
def readListOfLanes( listFile ):
|
||||
"""Simply reads a list of files to process from a file"""
|
||||
lines = map( string.split, map( string.strip, open(listFile).readlines() ) )
|
||||
return map( lambda x: x[0], lines ), map( lambda x: x[1], lines )
|
||||
|
||||
|
||||
def run_swmerlin(input_file, input_head, farm=""):
|
||||
run_merlin(input_file, input_head, farm, sw=True)
|
||||
|
||||
def run_merlin(input_file, input_head, farm="", sw=False):
|
||||
"sw = Merlin Smith-Waterman option"
|
||||
if isFastaB(input_file):
|
||||
input_fastb = input_file
|
||||
else:
|
||||
input_fastb = input_head+".fastb"
|
||||
if not os.path.exists(input_fastb):
|
||||
cmd("Fasta2Fastb IN= "+input_file)
|
||||
if sw:
|
||||
output_head = input_head+".swmerlin"
|
||||
else:
|
||||
output_head = input_head+".merlin"
|
||||
cmd_str = "Merlin REF_FASTB= /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta.lookuptable.fastb REF_MERLIN= /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta.merlinref.bin FASTB= "+input_fastb+" OUT_HEAD="+output_head
|
||||
if sw:
|
||||
cmd_str += " SW=True"
|
||||
cmd(cmd_str, farm, output_head)
|
||||
#if farm:
|
||||
# farm_stdout = output_head+".stdout"
|
||||
# cmd("bsub -q long -o "+farm_stdout+" "+cmd_str)
|
||||
#else:
|
||||
# cmd(cmd_str)
|
||||
|
||||
<<<<<<< SpawnMapperJobs.py
|
||||
USE_BATCH = True
|
||||
|
||||
def run_ILT(input_file, input_head, farm=""):
|
||||
print 'isFastaB', input_file, isFastaB(input_file)
|
||||
=======
|
||||
def run_ilt(input_file, input_head, farm=""):
|
||||
#print 'isFastaB', input_file, isFastaB(input_file)
|
||||
>>>>>>> 1.5
|
||||
if isFastaB(input_file):
|
||||
input_fastb = input_file
|
||||
else:
|
||||
input_fastb = input_head+".fastb"
|
||||
if not os.path.exists(input_fastb):
|
||||
cmd("Fasta2Fastb IN= "+input_file)
|
||||
|
||||
<<<<<<< SpawnMapperJobs.py
|
||||
output_head = input_head+".ILT"
|
||||
|
||||
if USE_BATCH:
|
||||
cmd_str = "~depristo/bin/batchShortQueryLookup2.pl --NUMPROCS=10 --BATCHQUEUE=long --SEQS="+input_fastb+" --L=/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta.lookuptable.lookup --MAX_FREQ=1000 --O= "+output_head
|
||||
cmd(cmd_str, False, input_head)
|
||||
else:
|
||||
cmd_str = "ImperfectLookupTable SEQS= "+input_fastb+" L= /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta.lookuptable.lookup MAX_FREQ=1000 OUT_PREFIX= "+output_head
|
||||
cmd(cmd_str, farm, input_head)
|
||||
|
||||
=======
|
||||
output_head = input_head+".ilt"
|
||||
cmd_str = "ImperfectLookupTable SEQS= "+input_fastb+" L= /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta.lookuptable.lookup MAX_FREQ=1000 OUT_PREFIX= "+output_head
|
||||
cmd(cmd_str, farm, output_head)
|
||||
>>>>>>> 1.5
|
||||
|
||||
def run_MAQ(input_fasta, head, farm=""):
|
||||
maq_exe = "/seq/dirseq/maq-0.7.1/maq"
|
||||
bfa_ref="/seq/dirseq/ktibbett/maq-0.7.1-test/Homo_sapiens_assembly18.bfa"
|
||||
|
||||
fasta = input_fasta
|
||||
quals = head+".quals.txt"
|
||||
fastq = head+".fastq"
|
||||
if not os.path.exists(fastq) :
|
||||
cmd_str = FastaQuals2Fastq_exe+" "+fasta+" "+quals+" "+fastq
|
||||
cmd(cmd_str)
|
||||
|
||||
bfq = head+".bfq"
|
||||
if not os.path.exists(bfq):
|
||||
cmd( maq_exe+" fastq2bfq "+fastq+" "+bfq )
|
||||
|
||||
out_head = head+".maq"
|
||||
maq_out = out_head+".out.aln.map"
|
||||
cmd_str = maq_exe+" map -e 100 -a 600 -s 0 "+maq_out+" "+bfa_ref+" "+bfq
|
||||
cmd(cmd_str, farm, out_head)
|
||||
|
||||
def usage():
|
||||
print "Required arguments:"
|
||||
print " -i Input FASTA head (*.fasta, *.qualb)"
|
||||
print " OR"
|
||||
print " -d Directory to grab all FASTA files from"
|
||||
print " OR"
|
||||
print " -l List of FASTA/FASTB files to process"
|
||||
print
|
||||
print "Optional arguments:"
|
||||
print " -f QUEUE Farm jobs to QUEUE on LSF"
|
||||
print
|
||||
print " -m MAPPER Compare output from MAPPER which can be: ilt, merlin, swmerlin, maq, all (default: all)"
|
||||
print
|
||||
print " -x Don't execute commands, just print them"
|
||||
print
|
||||
print " -w Output files to current directory (strip path from input file/dir/list"
|
||||
print
|
||||
|
||||
|
||||
def get_all_fasta_files(fasta_dir):
|
||||
files = os.listdir(fasta_dir)
|
||||
if not fasta_dir.endswith("/"): fasta_dir += "/"
|
||||
fasta_files = [fasta_dir+f for f in files if f.endswith(".fasta") and os.path.getsize(fasta_dir+f) > 0]
|
||||
#print fasta_files
|
||||
return fasta_files
|
||||
|
||||
justPrintCommands = False
|
||||
|
||||
if __name__ == "__main__":
|
||||
opts = None
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "i:d:f:m:l:xw", ["input","fasta_dir","farm","mapper","listOfLanes", "dontexe", "outputInWorkingDirectory"])
|
||||
except getopt.GetoptError:
|
||||
print sys.argv
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
input_head = ""
|
||||
fasta_dir = ""
|
||||
mapper_str = "all"
|
||||
farm_sub = False
|
||||
listOfLanes = None
|
||||
outputInWorkingDirectory = False
|
||||
|
||||
for opt, arg in opts:
|
||||
print opt, arg
|
||||
if opt in ("-i", "--input"):
|
||||
input_head = arg
|
||||
if opt in ("-l", "--listOfLanes"):
|
||||
listOfLanes = arg
|
||||
if opt in ("-d", "--fasta_dir"):
|
||||
fasta_dir = arg
|
||||
if opt in ("-f", "--farm"):
|
||||
farm_sub = arg
|
||||
if opt in ("-m", "--mapper"):
|
||||
mapper_str = arg
|
||||
if opt in ("-x", "--dontexe"):
|
||||
justPrintCommands = True
|
||||
if opt in ("-w", "--outputInWorkingDirectory"):
|
||||
outputInWorkingDirectory = True
|
||||
|
||||
if (input_head == "") and (fasta_dir == "") and (listOfLanes == None):
|
||||
print input_head, fasta_dir, listOfLanes
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
# Select function(s) for mapper
|
||||
mapper_func_list = {"ilt":run_ilt, "merlin":run_merlin, "swmerlin":run_swmerlin, "maq":run_MAQ}
|
||||
if mapper_str.lower() == "all":
|
||||
mapper_list = mapper_func_list.values()
|
||||
else:
|
||||
mapper_list = [mapper_func_list.get(mapper_str.lower())]
|
||||
if mapper_list == [None]:
|
||||
sys.exit("Don't know of mapper argument: "+mapper_str)
|
||||
|
||||
if input_head:
|
||||
input_heads = [None]
|
||||
input_files = [input_head + 'fasta']
|
||||
elif listOfLanes <> None:
|
||||
input_heads, input_files = readListOfLanes(listOfLanes)
|
||||
else:
|
||||
input_files = [file for file in get_all_fasta_files(fasta_dir)]
|
||||
input_heads = [None] * len(input_files)
|
||||
|
||||
for input_file, input_head in zip(input_files, input_heads):
|
||||
if input_head == None:
|
||||
file_head = os.path.splitext(input_file)[0]
|
||||
if outputInWorkingDirectory:
|
||||
file_head = os.path.split(file_head)[1]
|
||||
else:
|
||||
file_head = input_head
|
||||
for mapper in mapper_list:
|
||||
mapper( input_file, file_head, farm=farm_sub )
|
||||
print
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
;; Object AlignerEvaluation/
|
||||
;; SEMANTICDB Tags save file
|
||||
(semanticdb-project-database-file "AlignerEvaluation/"
|
||||
:tables (list
|
||||
(semanticdb-table "SimulateReads.py"
|
||||
:major-mode 'python-mode
|
||||
:tags '(("Bio" include nil (dependency-file none) [21 31]) ("optparse" include nil (dependency-file none) [32 65]) ("Bio" include nil (dependency-file none) [66 87]) ("Bio.Seq" include nil (dependency-file none) [88 111]) ("Bio.SeqRecord" include nil (dependency-file none) [112 147]) ("Bio.Alphabet" include nil (dependency-file none) [148 186]) ("os" include nil (dependency-file none) [187 196]) ("sys" include nil (dependency-file none) [197 207]) ("random" include nil (dependency-file none) [220 233]) ("string" include nil (dependency-file none) [234 247]) ("SAM" include nil (dependency-file none) [249 266]) ("mutateReference" function (:arguments (("ref" variable nil (reparse-symbol function_parameters) [288 291]) ("mutSite" variable nil (reparse-symbol function_parameters) [293 300]) ("mutType" variable nil (reparse-symbol function_parameters) [302 309]) ("mutParams" variable nil (reparse-symbol function_parameters) [311 320]) ("nBasesToPad" variable nil (reparse-symbol function_parameters) [322 333]))) nil [268 1475]) ("sampleReadsFromAlignment" function (:arguments (("refSeq" variable nil (reparse-symbol function_parameters) [1517 1523]) ("mutSeq" variable nil (reparse-symbol function_parameters) [1525 1531]) ("alignStart" variable nil (reparse-symbol function_parameters) [1533 1543]) ("readLen" variable nil (reparse-symbol function_parameters) [1545 1552]) ("nReads" variable nil (reparse-symbol function_parameters) [1554 1560]))) nil [1488 2391]) ("fakeQuals" function (:arguments (("seq" variable nil (reparse-symbol function_parameters) [2407 2410]))) nil [2392 2441]) ("alignedRead2SAM" function (:arguments (("readID" variable nil (reparse-symbol function_parameters) [2491 2497]) ("fastaID" variable nil (reparse-symbol function_parameters) [2499 2506]) ("read" variable nil (reparse-symbol function_parameters) [2508 2512]) ("pos" variable nil (reparse-symbol function_parameters) [2514 2517]) ("cigar" variable nil (reparse-symbol function_parameters) [2519 2524]))) nil [2470 2783]) ("readRef" function (:arguments (("referenceFasta" variable nil (reparse-symbol function_parameters) [2796 2810]))) nil [2784 3033]) ("OPTIONS" variable nil nil [3034 3048]) ("os.path" include nil (dependency-file none) [3050 3064]) ("outputFilename" function nil nil [3065 3481]) ("main" function nil nil [3482 8118]) ("main" code nil nil [8120 8126]))
|
||||
:file "SimulateReads.py"
|
||||
:pointmax 8129
|
||||
)
|
||||
)
|
||||
:file "semantic.cache"
|
||||
:semantic-tag-version "2.0pre4"
|
||||
:semanticdb-version "2.0pre4"
|
||||
)
|
||||
Loading…
Reference in New Issue