Awesome: JobDispatcher can now dispatch jobs by gene from a target .design file found in /seq/references.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3170 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
04909fa6ad
commit
2e4377b1cf
|
|
@ -401,18 +401,8 @@ class GATKDispatcher(JobDispatcher):
|
||||||
cmdToDispatch = command + " -o "+job_dir+"job"+str(num)+".txt"
|
cmdToDispatch = command + " -o "+job_dir+"job"+str(num)+".txt"
|
||||||
return cmdToDispatch
|
return cmdToDispatch
|
||||||
|
|
||||||
class GeneDispatcher(GATKDispatcher):
|
def dispatchByGene(self, geneNames):
|
||||||
|
|
||||||
def __init__(self,geneNames,jarfile,memory,walker,args,output_directory,reference = None, bams = None,
|
|
||||||
queues = ["long"], limits = dict([["long",500]]), print_only = False, delay = "0:1:0"):
|
|
||||||
JobDispatcher.GATKDispatcher.__init__(self,jarfile,memory,walker,args,output_directory,reference,bams,None,queues,
|
|
||||||
limits,print_only,"space",delay)
|
|
||||||
self.genes = RefseqLibrary.getRefseqGenes(geneNames)
|
self.genes = RefseqLibrary.getRefseqGenes(geneNames)
|
||||||
|
|
||||||
def dispatchByInterval(self,base_limit):
|
|
||||||
raise JobDispatchError("Dispatch by interval not permitted from GeneDispatcher")
|
|
||||||
|
|
||||||
def dispatchByGene(self):
|
|
||||||
dispatchCommand = self.baseCommand + " -R "+self.reference
|
dispatchCommand = self.baseCommand + " -R "+self.reference
|
||||||
farmJobs = list()
|
farmJobs = list()
|
||||||
jobNumber = ""
|
jobNumber = ""
|
||||||
|
|
@ -429,3 +419,22 @@ class GeneDispatcher(GATKDispatcher):
|
||||||
farmJobs.append(self._buildIntervalJob(jobNumber,headerLines,intervals,dispatchCommand))
|
farmJobs.append(self._buildIntervalJob(jobNumber,headerLines,intervals,dispatchCommand))
|
||||||
|
|
||||||
self.dispatchAll_Interval(farmJobs)
|
self.dispatchAll_Interval(farmJobs)
|
||||||
|
|
||||||
|
def dispatchByTargetDesign(self,designFile):
|
||||||
|
self.genes = RefseqLibrary.parseDesignFile(designFile)
|
||||||
|
dispatchCommand = self.baseCommand + " -R "+self.reference
|
||||||
|
farmJobs = list()
|
||||||
|
jobNumber = ""
|
||||||
|
headerLines = RefseqLibrary.getIntervalHeaderLines()
|
||||||
|
|
||||||
|
if ( not os.path.exists(self.outputDir+"GATKDispatcher/") ):
|
||||||
|
os.mkdir(self.outputDir+"GATKDispatcher/")
|
||||||
|
if ( self.bams != None ):
|
||||||
|
dispatchCommand += " -I "+self.bams
|
||||||
|
|
||||||
|
for gene in self.genes:
|
||||||
|
jobNumber = "_"+gene.getGeneName()
|
||||||
|
intervals = gene.getExonIntervals()
|
||||||
|
farmJobs.append(self._buildIntervalJob(jobNumber,headerLines,intervals,dispatchCommand))
|
||||||
|
|
||||||
|
self.dispatchAll_Interval(farmJobs)
|
||||||
|
|
|
||||||
|
|
@ -284,3 +284,28 @@ def getIntervalHeaderLines():
|
||||||
line = whole_exome_file.readline()
|
line = whole_exome_file.readline()
|
||||||
whole_exome_file.close()
|
whole_exome_file.close()
|
||||||
return header
|
return header
|
||||||
|
|
||||||
|
def parseDesignFile(file):
|
||||||
|
designFile = open(file)
|
||||||
|
genes = dict()
|
||||||
|
for line in designFile.readlines():
|
||||||
|
if ( line.startswith("TARGET") ):
|
||||||
|
spline = line.strip().split()
|
||||||
|
if ( spline[1].startswith("chr") ):
|
||||||
|
chrom = spline[1]
|
||||||
|
else:
|
||||||
|
chrom = "chr"+spline[1]
|
||||||
|
start = 1 + int(spline[2])
|
||||||
|
stop = 1 + int(spline[3])
|
||||||
|
gene_name = spline[4].split("#")[1].split("_")[0]
|
||||||
|
try:
|
||||||
|
exon_id = spline[4].split(gene_name)[1]
|
||||||
|
except IndexError:
|
||||||
|
exon_id = "_".join(spline[5:len(spline)-1])
|
||||||
|
exon = Exon(gene_name,exon_id,chrom,start,stop)
|
||||||
|
if ( gene_name in genes.keys() ):
|
||||||
|
genes[gene_name].addExon(exon)
|
||||||
|
else:
|
||||||
|
genes[gene_name] = Gene(gene_name)
|
||||||
|
genes[gene_name].addExon(exon)
|
||||||
|
return genes.values()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue