diff --git a/perl/maf_annotation/annotate_single_maf.pl b/perl/maf_annotation/annotate_single_maf.pl new file mode 100755 index 000000000..be27aa992 --- /dev/null +++ b/perl/maf_annotation/annotate_single_maf.pl @@ -0,0 +1,19 @@ +#!/util/bin/perl -w +use strict; + +if (scalar(@ARGV) != 1) { + die("Usage: annotate_single_maf.pl \n"); +} + +my ($maf) = @ARGV; + +my $cmd = "use matlab && " . + "matlab <\n"); +} + +my ($normalAlias) = @ARGV; +my $line; +while (defined($line = )) { + if ($line =~ /#/) { next; } + + my ($chrom, $pos, $rsName, $ref, $alt, $qual, $filter, $info, $format, $sampleA) = split("\t", $line); + + my ($genoNum) = split(":", $sampleA); + my ($geno1, $geno2) = split("/",$genoNum); + + my $allele1 = "N"; + my $allele2 = "N"; + if ( $geno1 eq "0") { + $allele1 = $ref; + } elsif ($geno1 eq "1") { + $allele1 = $alt; + } else { + die("only handled single alt currently:\n$line \n"); + } + + if ( $geno2 eq "0") { + $allele2 = $ref; + } elsif ($geno2 eq "1") { + $allele2 = $alt; + } else { + die("only handled single alt currently:\n$line\n"); + } + + my $ncbiBuild = "36"; + print join("\t", ($ncbiBuild, $chrom, $pos, $pos, $ref, $allele1, $allele2, $normalAlias, $normalAlias)) . "\n"; +} diff --git a/python/maf_annotation/AnnotateVCFwithMAF.py b/python/maf_annotation/AnnotateVCFwithMAF.py new file mode 100755 index 000000000..98dd9152e --- /dev/null +++ b/python/maf_annotation/AnnotateVCFwithMAF.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python + +import sys, FlatFileTable, os + +if sys.argv < 3: + print "Usage: AnnotateVCFwithMAF.py VCF_file MAF_file" + sys.exit() + +vcf_filename = sys.argv[1] +maf_filename = sys.argv[2] + +maf_gen = FlatFileTable.record_generator(maf_filename, "\t") + +headers=["gene","type","transcript","strand","genomechange","cDNAchange","codonchange","proteinchange"] + +loci_and_info = [] + +for record in maf_gen: + #print record + #info_string = ",".join(["%s=%s" % (header, record[header]) for header in headers]) + info_string = "" + for index,header in enumerate(headers): + if record.has_key(header): + if index > 0: + info_string += "," + info_string += "%s=%s" % (header, record[header]) + + locus = record["chr"]+":"+record["start"] + + #print locus, info_string + loci_and_info.append((locus, info_string)) + +#vcf_gen = FlatFileTable.record_generator(vcf_file, "\t", 34) +vcf_file = open(vcf_filename) +vcf_out_file = open(os.path.splitext(vcf_filename)[0]+".maf_annotated.vcf", "w") +vcf_format_line = vcf_file.readline() +vcf_out_file.write(vcf_format_line) +if vcf_format_line != "##fileformat=VCFv3.3\n": + print ("VCF not v 3.3") + sys.exit() + +for i in range(33): + vcf_out_file.write(vcf_file.readline()) + +header_fields = vcf_file.readline() +vcf_out_file.write(header_fields) +if not header_fields.startswith("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT"): + print ("VCF header fields not in expected order") + print header_fields + sys.exit() + +for vcf_line, locus_and_info in zip(vcf_file.readlines(), loci_and_info): + vcf_line_fields = vcf_line.split("\t") + vcf_locus = vcf_line_fields[0]+":"+vcf_line_fields[1] + #print record + maf_locus, maf_info = locus_and_info + if maf_locus != vcf_locus: + print "ERROR: VCF and MAF loci did not match" + sys.exit() + + vcf_line_fields[7] = vcf_line_fields[7]+","+maf_info + new_vcf_line = "\t".join(vcf_line_fields) + vcf_out_file.write(new_vcf_line) + + + + + + + + + + + \ No newline at end of file