Quick python scripts for going from genotype VCFs to site-only VCFs, and one to fix BC vcf files (which had "het" genotypes at non-variant sites)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3768 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e50627a49e
commit
46c39f2d53
|
|
@ -0,0 +1,16 @@
|
|||
import sys
|
||||
print("Fixing "+sys.argv[1]+" to "+sys.argv[2])
|
||||
bad_vcf = open(sys.argv[1])
|
||||
out_vcf = open(sys.argv[2],'w')
|
||||
|
||||
for line in bad_vcf.readlines():
|
||||
if ( line.startswith("#") ):
|
||||
out_vcf.write(line)
|
||||
else:
|
||||
spline = line.strip().split("\t")
|
||||
newspline = list()
|
||||
for field in spline:
|
||||
if ( field.find("pGeno") > -1 ):
|
||||
field = "0/0:"+field.split(":",1)[1]
|
||||
newspline.append(field)
|
||||
out_vcf.write("\t".join(newspline)+"\n")
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
import sys
|
||||
genotype_vcf = open(sys.argv[1])
|
||||
sites_vcf = open(sys.argv[2],'w')
|
||||
|
||||
sample_name = "ALL_HET"
|
||||
info = "."
|
||||
format = "GT"
|
||||
het = "0/1"
|
||||
use_fields = range(7)
|
||||
|
||||
line_counter = 0
|
||||
print("Reading genotype file...")
|
||||
for line in genotype_vcf.readlines():
|
||||
line_counter += 1
|
||||
if ( line.startswith("#") and not line.startswith("#CHR") ):
|
||||
sites_vcf.write(line)
|
||||
elif ( line.startswith("#CHR") ):
|
||||
sites_vcf.write("##source=vcfGenotypeToSites\n")
|
||||
spline = line.strip().split("\t")
|
||||
newfields = list()
|
||||
for i in range(9):
|
||||
newfields.append(spline[i])
|
||||
newfields.append(sample_name)
|
||||
sites_vcf.write("\t".join(newfields)+"\n")
|
||||
else:
|
||||
spline = line.strip().split("\t")
|
||||
newfields = list()
|
||||
for i in use_fields:
|
||||
newfields.append(spline[i])
|
||||
newfields.append(info)
|
||||
newfields.append(format)
|
||||
newfields.append(het)
|
||||
sites_vcf.write("\t".join(newfields)+"\n")
|
||||
if ( line_counter % 100000 == 0 ):
|
||||
print("Converted: "+str(line_counter)+" lines")
|
||||
|
||||
genotype_vcf.close()
|
||||
sites_vcf.close()
|
||||
Loading…
Reference in New Issue