Quick script that changes "chr#" to "#" and "chrM" to "MT" and moves mitochondria to the end of the vcf; in accordance with the 1KG reference.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2727 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-01-28 21:59:33 +00:00
parent 79c4cc1db7
commit 0fb032a436
1 changed files with 29 additions and 0 deletions

View File

@ -0,0 +1,29 @@
#!/usr/bin/env python
import os
hapmap_dir = os.getcwd()+"/" ##CHANGE ME
def convert(line):
line = line.replace("chr","",1)
if ( line.startswith("M") ):
line = line.replace("M","MT",1)
return line
for file in os.listdir(hapmap_dir):
if ( file.endswith('vcf') ):
chrM_lines = list()
print("converting: "+file)
in_vcf = open(hapmap_dir+file)
out_vcf_filename = file.replace("hg18","b36")
out_vcf = open(out_vcf_filename,'w')
for line in in_vcf.readlines():
if ( line.startswith("#") ):
out_vcf.write(line)
else:
if ( line.startswith("chrM") ):
chrM_lines.append(line)
else:
out_vcf.write(convert(line))
for line in chrM_lines:
out_vcf.write(convert(line))