Script to split a provided interval list into contigs. Excesses will be dropped into the last provided file. Works like splitIntervals.sh. This is for Queue.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3895 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
62a9217a61
commit
52f24c86fa
|
|
@ -0,0 +1,33 @@
|
|||
import sys
|
||||
input_file = sys.argv[1]
|
||||
file_index = 1
|
||||
|
||||
headerLines = list()
|
||||
intervals = open(input_file)
|
||||
prevContig = None
|
||||
outFile = None
|
||||
|
||||
def parseContig(line):
|
||||
if( line.find("-") > -1 ): ## format is chr:start-stop
|
||||
return line.split(":")[0]
|
||||
else:
|
||||
return line.split("\t")[0]
|
||||
|
||||
for line in open(input_file).readlines():
|
||||
if ( line.startswith("@") ):
|
||||
headerLines.append(line)
|
||||
else:
|
||||
thisContig = parseContig(line)
|
||||
if ( thisContig != prevContig ):
|
||||
file_index += 1
|
||||
try:
|
||||
newOutFile = open(sys.argv[file_index],'w')
|
||||
if ( outFile != None):
|
||||
outFile.close()
|
||||
outFile = newOutFile
|
||||
for headerline in headerLines:
|
||||
outFile.write(headerline)
|
||||
except IndexError:
|
||||
print("Error: fewer output files than contigs. Writing remainder to final file.")
|
||||
prevContig = thisContig
|
||||
outFile.write(line)
|
||||
Loading…
Reference in New Issue