From 0fb032a436e1a31fe9aa9f3370f759a8b330e505 Mon Sep 17 00:00:00 2001 From: chartl Date: Thu, 28 Jan 2010 21:59:33 +0000 Subject: [PATCH] Quick script that changes "chr#" to "#" and "chrM" to "MT" and moves mitochondria to the end of the vcf; in accordance with the 1KG reference. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2727 348d0f76-0448-11de-a6fe-93d51630548a --- python/generate1KGHapmapVCF.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100755 python/generate1KGHapmapVCF.py diff --git a/python/generate1KGHapmapVCF.py b/python/generate1KGHapmapVCF.py new file mode 100755 index 000000000..44b58c59b --- /dev/null +++ b/python/generate1KGHapmapVCF.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +import os + +hapmap_dir = os.getcwd()+"/" ##CHANGE ME + +def convert(line): + line = line.replace("chr","",1) + if ( line.startswith("M") ): + line = line.replace("M","MT",1) + return line + +for file in os.listdir(hapmap_dir): + if ( file.endswith('vcf') ): + chrM_lines = list() + print("converting: "+file) + in_vcf = open(hapmap_dir+file) + out_vcf_filename = file.replace("hg18","b36") + out_vcf = open(out_vcf_filename,'w') + for line in in_vcf.readlines(): + if ( line.startswith("#") ): + out_vcf.write(line) + else: + if ( line.startswith("chrM") ): + chrM_lines.append(line) + else: + out_vcf.write(convert(line)) + for line in chrM_lines: + out_vcf.write(convert(line))