diff --git a/python/setFilterGenotypesToRef.py b/python/setFilterGenotypesToRef.py new file mode 100644 index 000000000..e8944aa61 --- /dev/null +++ b/python/setFilterGenotypesToRef.py @@ -0,0 +1,16 @@ +import sys +print("Fixing "+sys.argv[1]+" to "+sys.argv[2]) +bad_vcf = open(sys.argv[1]) +out_vcf = open(sys.argv[2],'w') + +for line in bad_vcf.readlines(): + if ( line.startswith("#") ): + out_vcf.write(line) + else: + spline = line.strip().split("\t") + newspline = list() + for field in spline: + if ( field.find("pGeno") > -1 ): + field = "0/0:"+field.split(":",1)[1] + newspline.append(field) + out_vcf.write("\t".join(newspline)+"\n") diff --git a/python/vcfGenotypeToSites.py b/python/vcfGenotypeToSites.py new file mode 100644 index 000000000..203d51dfc --- /dev/null +++ b/python/vcfGenotypeToSites.py @@ -0,0 +1,38 @@ +import sys +genotype_vcf = open(sys.argv[1]) +sites_vcf = open(sys.argv[2],'w') + +sample_name = "ALL_HET" +info = "." +format = "GT" +het = "0/1" +use_fields = range(7) + +line_counter = 0 +print("Reading genotype file...") +for line in genotype_vcf.readlines(): + line_counter += 1 + if ( line.startswith("#") and not line.startswith("#CHR") ): + sites_vcf.write(line) + elif ( line.startswith("#CHR") ): + sites_vcf.write("##source=vcfGenotypeToSites\n") + spline = line.strip().split("\t") + newfields = list() + for i in range(9): + newfields.append(spline[i]) + newfields.append(sample_name) + sites_vcf.write("\t".join(newfields)+"\n") + else: + spline = line.strip().split("\t") + newfields = list() + for i in use_fields: + newfields.append(spline[i]) + newfields.append(info) + newfields.append(format) + newfields.append(het) + sites_vcf.write("\t".join(newfields)+"\n") + if ( line_counter % 100000 == 0 ): + print("Converted: "+str(line_counter)+" lines") + +genotype_vcf.close() +sites_vcf.close()