Awesome: JobDispatcher can now dispatch jobs by gene from a target .design file found in /seq/references.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3170 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
04909fa6ad
commit
2e4377b1cf
|
|
@ -401,18 +401,8 @@ class GATKDispatcher(JobDispatcher):
|
|||
cmdToDispatch = command + " -o "+job_dir+"job"+str(num)+".txt"
|
||||
return cmdToDispatch
|
||||
|
||||
class GeneDispatcher(GATKDispatcher):
|
||||
|
||||
def __init__(self,geneNames,jarfile,memory,walker,args,output_directory,reference = None, bams = None,
|
||||
queues = ["long"], limits = dict([["long",500]]), print_only = False, delay = "0:1:0"):
|
||||
JobDispatcher.GATKDispatcher.__init__(self,jarfile,memory,walker,args,output_directory,reference,bams,None,queues,
|
||||
limits,print_only,"space",delay)
|
||||
def dispatchByGene(self, geneNames):
|
||||
self.genes = RefseqLibrary.getRefseqGenes(geneNames)
|
||||
|
||||
def dispatchByInterval(self,base_limit):
|
||||
raise JobDispatchError("Dispatch by interval not permitted from GeneDispatcher")
|
||||
|
||||
def dispatchByGene(self):
|
||||
dispatchCommand = self.baseCommand + " -R "+self.reference
|
||||
farmJobs = list()
|
||||
jobNumber = ""
|
||||
|
|
@ -429,3 +419,22 @@ class GeneDispatcher(GATKDispatcher):
|
|||
farmJobs.append(self._buildIntervalJob(jobNumber,headerLines,intervals,dispatchCommand))
|
||||
|
||||
self.dispatchAll_Interval(farmJobs)
|
||||
|
||||
def dispatchByTargetDesign(self,designFile):
|
||||
self.genes = RefseqLibrary.parseDesignFile(designFile)
|
||||
dispatchCommand = self.baseCommand + " -R "+self.reference
|
||||
farmJobs = list()
|
||||
jobNumber = ""
|
||||
headerLines = RefseqLibrary.getIntervalHeaderLines()
|
||||
|
||||
if ( not os.path.exists(self.outputDir+"GATKDispatcher/") ):
|
||||
os.mkdir(self.outputDir+"GATKDispatcher/")
|
||||
if ( self.bams != None ):
|
||||
dispatchCommand += " -I "+self.bams
|
||||
|
||||
for gene in self.genes:
|
||||
jobNumber = "_"+gene.getGeneName()
|
||||
intervals = gene.getExonIntervals()
|
||||
farmJobs.append(self._buildIntervalJob(jobNumber,headerLines,intervals,dispatchCommand))
|
||||
|
||||
self.dispatchAll_Interval(farmJobs)
|
||||
|
|
|
|||
|
|
@ -284,3 +284,28 @@ def getIntervalHeaderLines():
|
|||
line = whole_exome_file.readline()
|
||||
whole_exome_file.close()
|
||||
return header
|
||||
|
||||
def parseDesignFile(file):
|
||||
designFile = open(file)
|
||||
genes = dict()
|
||||
for line in designFile.readlines():
|
||||
if ( line.startswith("TARGET") ):
|
||||
spline = line.strip().split()
|
||||
if ( spline[1].startswith("chr") ):
|
||||
chrom = spline[1]
|
||||
else:
|
||||
chrom = "chr"+spline[1]
|
||||
start = 1 + int(spline[2])
|
||||
stop = 1 + int(spline[3])
|
||||
gene_name = spline[4].split("#")[1].split("_")[0]
|
||||
try:
|
||||
exon_id = spline[4].split(gene_name)[1]
|
||||
except IndexError:
|
||||
exon_id = "_".join(spline[5:len(spline)-1])
|
||||
exon = Exon(gene_name,exon_id,chrom,start,stop)
|
||||
if ( gene_name in genes.keys() ):
|
||||
genes[gene_name].addExon(exon)
|
||||
else:
|
||||
genes[gene_name] = Gene(gene_name)
|
||||
genes[gene_name].addExon(exon)
|
||||
return genes.values()
|
||||
|
|
|
|||
Loading…
Reference in New Issue