62 lines
2.3 KiB
Python
62 lines
2.3 KiB
Python
|
|
import sys
|
||
|
|
import os
|
||
|
|
import subprocess
|
||
|
|
import shlex
|
||
|
|
|
||
|
|
from optparse import OptionParser
|
||
|
|
|
||
|
|
def parseInput(fList,ignoreExt = None):
|
||
|
|
inNames = []
|
||
|
|
for ele in fList:
|
||
|
|
if ( ignoreExt != None and ele.endswith(ignoreExt) ):
|
||
|
|
inFileNames.append(ele)
|
||
|
|
if ( os.path.exists(ele) ):
|
||
|
|
for line in open(ele).readlines():
|
||
|
|
inNames.append(line.strip())
|
||
|
|
return inNames
|
||
|
|
|
||
|
|
def bamsWithSamples(bamList):
|
||
|
|
cmdbase = "samtools view -H %s | grep SM | tr '\\t' '\\n' | grep SM | sed 's/SM://g' | uniq"
|
||
|
|
sam2bam = dict()
|
||
|
|
for bf in bamList:
|
||
|
|
if ( not os.path.exists(bf) ):
|
||
|
|
raise IOError("Bam file "+bf+" does not exist")
|
||
|
|
cmd = cmdbase % bf
|
||
|
|
proc = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
|
||
|
|
proc.wait()
|
||
|
|
stdout = proc.stdout.readlines()
|
||
|
|
if ( len(stdout) > 1 ):
|
||
|
|
raise RuntimeError("The bam file "+bf+" contains multiple different sample entries")
|
||
|
|
sm = stdout[0].strip()
|
||
|
|
sam2bam[sm]=bf
|
||
|
|
return sam2bam
|
||
|
|
|
||
|
|
def runMain(opt,arg):
|
||
|
|
bamFiles = bamsWithSamples(parseInput(opt.bam_files,"bam"))
|
||
|
|
caseNames = set(parseInput(opt.cases))
|
||
|
|
controlNames = set(parseInput(opt.controls))
|
||
|
|
output = open(opt.output,'w')
|
||
|
|
output.write("samples:")
|
||
|
|
sample_base = "- id: %s\n properties:\n cohort: %s\n bam: %s"
|
||
|
|
for s in bamFiles.keys():
|
||
|
|
cc = "Unknown"
|
||
|
|
if ( s in caseNames ):
|
||
|
|
cc = "case"
|
||
|
|
if ( s in controlNames ):
|
||
|
|
cc = "control"
|
||
|
|
output.write("\n" + sample_base % (s,cc,bamFiles[s]))
|
||
|
|
output.close()
|
||
|
|
|
||
|
|
def main():
|
||
|
|
usage = "usage: %prog [options] arg"
|
||
|
|
parser = OptionParser(usage)
|
||
|
|
parser.add_option("-I","--bams",dest="bam_files",help="the bam files, as multiple arguments or a simple newline-delimited file",action="append")
|
||
|
|
parser.add_option("-A","--case",dest="cases",action="append",help="A list of the case samples, as multiple arguments or a simple newline-delimited file")
|
||
|
|
parser.add_option("-O","--control",dest="controls",action="append",help="A list of the control samples, multiple arguments or a newline-delimited file")
|
||
|
|
parser.add_option("-o","--out",dest="output",action="store",help="Name of the output metadata file to write to")
|
||
|
|
(options,args) = parser.parse_args()
|
||
|
|
runMain(options,args)
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|