Script to split a provided interval list into contigs. Excesses will be dropped into the last provided file. Works like splitIntervals.sh. This is for Queue.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3895 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-07-29 16:42:53 +00:00
parent 62a9217a61
commit 52f24c86fa
1 changed files with 33 additions and 0 deletions

View File

@ -0,0 +1,33 @@
import sys
input_file = sys.argv[1]
file_index = 1
headerLines = list()
intervals = open(input_file)
prevContig = None
outFile = None
def parseContig(line):
if( line.find("-") > -1 ): ## format is chr:start-stop
return line.split(":")[0]
else:
return line.split("\t")[0]
for line in open(input_file).readlines():
if ( line.startswith("@") ):
headerLines.append(line)
else:
thisContig = parseContig(line)
if ( thisContig != prevContig ):
file_index += 1
try:
newOutFile = open(sys.argv[file_index],'w')
if ( outFile != None):
outFile.close()
outFile = newOutFile
for headerline in headerLines:
outFile.write(headerline)
except IndexError:
print("Error: fewer output files than contigs. Writing remainder to final file.")
prevContig = thisContig
outFile.write(line)