Script to split a provided interval list into contigs. Excesses will be dropped into the last provided file. Works like splitIntervals.sh. This is for Queue.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3895 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
62a9217a61
commit
52f24c86fa
|
|
@ -0,0 +1,33 @@
|
||||||
|
import sys
|
||||||
|
input_file = sys.argv[1]
|
||||||
|
file_index = 1
|
||||||
|
|
||||||
|
headerLines = list()
|
||||||
|
intervals = open(input_file)
|
||||||
|
prevContig = None
|
||||||
|
outFile = None
|
||||||
|
|
||||||
|
def parseContig(line):
|
||||||
|
if( line.find("-") > -1 ): ## format is chr:start-stop
|
||||||
|
return line.split(":")[0]
|
||||||
|
else:
|
||||||
|
return line.split("\t")[0]
|
||||||
|
|
||||||
|
for line in open(input_file).readlines():
|
||||||
|
if ( line.startswith("@") ):
|
||||||
|
headerLines.append(line)
|
||||||
|
else:
|
||||||
|
thisContig = parseContig(line)
|
||||||
|
if ( thisContig != prevContig ):
|
||||||
|
file_index += 1
|
||||||
|
try:
|
||||||
|
newOutFile = open(sys.argv[file_index],'w')
|
||||||
|
if ( outFile != None):
|
||||||
|
outFile.close()
|
||||||
|
outFile = newOutFile
|
||||||
|
for headerline in headerLines:
|
||||||
|
outFile.write(headerline)
|
||||||
|
except IndexError:
|
||||||
|
print("Error: fewer output files than contigs. Writing remainder to final file.")
|
||||||
|
prevContig = thisContig
|
||||||
|
outFile.write(line)
|
||||||
Loading…
Reference in New Issue