module for listing out samples for data processing and firehose reporting
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3847 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1209b165bf
commit
3596b1529f
|
|
@ -0,0 +1,64 @@
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
#this will create a list of collaborator ids or a list of sample names--it will find cleaned bams even if they're not stored in the sample_set file system. I'm mainly using it as a module import.
|
||||||
|
class SampleSet:
|
||||||
|
def __init__(self, projectname, setname, path):
|
||||||
|
self.setname = setname
|
||||||
|
self.projectname = projectname
|
||||||
|
self.path = path
|
||||||
|
def sampslist(self):
|
||||||
|
'''finds and lists all samples in a set'''
|
||||||
|
try:
|
||||||
|
searchpath="ls /humgen/gsa-firehose/firehose/firehose_output/trunk/Sample_Set/" + self.setname +"/ -I CleanBam -I UnifiedGenotyper -I MergeBam"
|
||||||
|
raw_samps= subprocess.Popen([searchpath], shell=True, stdout=subprocess.PIPE).communicate()[0]
|
||||||
|
except IOError:
|
||||||
|
print( "Can't make sample list. Those files are not where they ought to be, or Sample_Set is not valid.")
|
||||||
|
samps = raw_samps.split("\n" + self.projectname+ "_")
|
||||||
|
samplelist = raw_samps.split("\n")[0:len(samps)]
|
||||||
|
samps[0] = samps[0].split(self.projectname+"_")[len(samps[0].split(self.projectname+"_"))-1]
|
||||||
|
samps[len(samps)-1] = samps[len(samps)-1].split("\n")[0]
|
||||||
|
return [samps, samplelist]
|
||||||
|
def bamlist(self, samplist, write=True):
|
||||||
|
'''finds and lists all cleaned bams in a sample set'''
|
||||||
|
if (write == True):
|
||||||
|
try:
|
||||||
|
if os.path.exists(self.path + "bamsfor" + self.setname + ".list"):
|
||||||
|
os.remove(self.path + "bamsfor" + self.setname + ".list")
|
||||||
|
listfile = open(self.path + "bamsfor" + self.setname + ".list", "a")
|
||||||
|
for samp in samplist:
|
||||||
|
searcher="find /humgen/gsa-firehose/firehose/firehose_output/trunk/Sample/" + repr(samp) +"/ -name \*cleaned.bam"
|
||||||
|
raw_samp= subprocess.Popen([searcher], shell=True, stdout=subprocess.PIPE).communicate()[0]
|
||||||
|
listfile.write(raw_samp)
|
||||||
|
listfile.close()
|
||||||
|
print (listfile.name)
|
||||||
|
except IOError:
|
||||||
|
print( "can't make .bam list.Those files are not where they ought to be, or Sample_Set is not valid")
|
||||||
|
else:
|
||||||
|
for samp in samplist:
|
||||||
|
searcher="find /humgen/gsa-firehose/firehose/firehose_output/trunk/Sample/" + samp +"/ -name \*cleaned.bam"
|
||||||
|
raw_samp= subprocess.Popen([searcher], shell=True, stdout=subprocess.PIPE).communicate()[0]
|
||||||
|
print(raw_samp)
|
||||||
|
def bedlist(self, samplist, write=True):
|
||||||
|
'''finds and lists all beds for a sample set'''
|
||||||
|
if (write == True):
|
||||||
|
try:
|
||||||
|
if os.path.exists(self.path + "bedsfor" + self.setname + ".list"):
|
||||||
|
os.remove(self.path + "bedsfor" + self.setname + ".list")
|
||||||
|
listfile = open(self.path + "bedsfor" + self.setname + ".list", "a")
|
||||||
|
for samp in samplist:
|
||||||
|
searcher="find /humgen/gsa-firehose/firehose/firehose_output/trunk/Sample/" + repr(samp) +"/ -name \*.bed"
|
||||||
|
raw_samp= subprocess.Popen([searcher], shell=True, stdout=subprocess.PIPE).communicate()[0]
|
||||||
|
listfile.write(raw_samp)
|
||||||
|
listfile.close()
|
||||||
|
print (listfile.name)
|
||||||
|
except IOError:
|
||||||
|
print( "can't make .bed list.Those files are not where they ought to be, or Sample_Set is not valid")
|
||||||
|
else:
|
||||||
|
for samp in samplist:
|
||||||
|
searcher="find /humgen/gsa-firehose/firehose/firehose_output/trunk/Sample/" + samp +"/ -name \*.bed"
|
||||||
|
raw_samp= subprocess.Popen([searcher], shell=True, stdout=subprocess.PIPE).communicate()[0]
|
||||||
|
print(raw_samp)
|
||||||
|
'''next two lines are example usage
|
||||||
|
#pfizer5=SampleSet("T2D_Altshuler_Pfizer_Plate_5", "T2D_Altshuler_Pfizer", "humgen/gsa-hphome1/corin/oneoffs/pfizer5/")
|
||||||
|
#pfizer5.bamlist(pfizer5.sampslist()[0], write=False)'''
|
||||||
Loading…
Reference in New Issue