Two perl scripts (from Kristian Cibulskis) and one python script for annotating VCF files with the information generated by the cancer MAF annotation tool.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2797 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a1d5a384f4
commit
58456822ab
|
|
@ -0,0 +1,19 @@
|
|||
#!/util/bin/perl -w
|
||||
use strict;
|
||||
|
||||
if (scalar(@ARGV) != 1) {
|
||||
die("Usage: annotate_single_maf.pl <maf>\n");
|
||||
}
|
||||
|
||||
my ($maf) = @ARGV;
|
||||
|
||||
my $cmd = "use matlab && " .
|
||||
"matlab <<STOP\n" .
|
||||
"addpath /home/radon00/lawrence/CancerGenomeAnalysis/trunk/matlab/mike" . "\n" .
|
||||
"addpath /home/radon00/lawrence/CancerGenomeAnalysis/trunk/matlab/seq" . "\n" .
|
||||
"addpath /home/radon00/lawrence/CancerGenomeAnalysis/trunk/matlab" . "\n" .
|
||||
"annotate_maflite('$maf', '$maf.annotated')" . "\n" .
|
||||
"quit" . "\n" .
|
||||
"STOP" . "\n";
|
||||
|
||||
system($cmd) == 0 or die();
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
#!/util/bin/perl
|
||||
$|++;
|
||||
use strict;
|
||||
|
||||
if (scalar(@ARGV) != 1 ) {
|
||||
die("usage vcf_to_germline_maflite.pl <normalAlias>\n");
|
||||
}
|
||||
|
||||
my ($normalAlias) = @ARGV;
|
||||
my $line;
|
||||
while (defined($line = <STDIN>)) {
|
||||
if ($line =~ /#/) { next; }
|
||||
|
||||
my ($chrom, $pos, $rsName, $ref, $alt, $qual, $filter, $info, $format, $sampleA) = split("\t", $line);
|
||||
|
||||
my ($genoNum) = split(":", $sampleA);
|
||||
my ($geno1, $geno2) = split("/",$genoNum);
|
||||
|
||||
my $allele1 = "N";
|
||||
my $allele2 = "N";
|
||||
if ( $geno1 eq "0") {
|
||||
$allele1 = $ref;
|
||||
} elsif ($geno1 eq "1") {
|
||||
$allele1 = $alt;
|
||||
} else {
|
||||
die("only handled single alt currently:\n$line \n");
|
||||
}
|
||||
|
||||
if ( $geno2 eq "0") {
|
||||
$allele2 = $ref;
|
||||
} elsif ($geno2 eq "1") {
|
||||
$allele2 = $alt;
|
||||
} else {
|
||||
die("only handled single alt currently:\n$line\n");
|
||||
}
|
||||
|
||||
my $ncbiBuild = "36";
|
||||
print join("\t", ($ncbiBuild, $chrom, $pos, $pos, $ref, $allele1, $allele2, $normalAlias, $normalAlias)) . "\n";
|
||||
}
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import sys, FlatFileTable, os
|
||||
|
||||
if sys.argv < 3:
|
||||
print "Usage: AnnotateVCFwithMAF.py VCF_file MAF_file"
|
||||
sys.exit()
|
||||
|
||||
vcf_filename = sys.argv[1]
|
||||
maf_filename = sys.argv[2]
|
||||
|
||||
maf_gen = FlatFileTable.record_generator(maf_filename, "\t")
|
||||
|
||||
headers=["gene","type","transcript","strand","genomechange","cDNAchange","codonchange","proteinchange"]
|
||||
|
||||
loci_and_info = []
|
||||
|
||||
for record in maf_gen:
|
||||
#print record
|
||||
#info_string = ",".join(["%s=%s" % (header, record[header]) for header in headers])
|
||||
info_string = ""
|
||||
for index,header in enumerate(headers):
|
||||
if record.has_key(header):
|
||||
if index > 0:
|
||||
info_string += ","
|
||||
info_string += "%s=%s" % (header, record[header])
|
||||
|
||||
locus = record["chr"]+":"+record["start"]
|
||||
|
||||
#print locus, info_string
|
||||
loci_and_info.append((locus, info_string))
|
||||
|
||||
#vcf_gen = FlatFileTable.record_generator(vcf_file, "\t", 34)
|
||||
vcf_file = open(vcf_filename)
|
||||
vcf_out_file = open(os.path.splitext(vcf_filename)[0]+".maf_annotated.vcf", "w")
|
||||
vcf_format_line = vcf_file.readline()
|
||||
vcf_out_file.write(vcf_format_line)
|
||||
if vcf_format_line != "##fileformat=VCFv3.3\n":
|
||||
print ("VCF not v 3.3")
|
||||
sys.exit()
|
||||
|
||||
for i in range(33):
|
||||
vcf_out_file.write(vcf_file.readline())
|
||||
|
||||
header_fields = vcf_file.readline()
|
||||
vcf_out_file.write(header_fields)
|
||||
if not header_fields.startswith("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT"):
|
||||
print ("VCF header fields not in expected order")
|
||||
print header_fields
|
||||
sys.exit()
|
||||
|
||||
for vcf_line, locus_and_info in zip(vcf_file.readlines(), loci_and_info):
|
||||
vcf_line_fields = vcf_line.split("\t")
|
||||
vcf_locus = vcf_line_fields[0]+":"+vcf_line_fields[1]
|
||||
#print record
|
||||
maf_locus, maf_info = locus_and_info
|
||||
if maf_locus != vcf_locus:
|
||||
print "ERROR: VCF and MAF loci did not match"
|
||||
sys.exit()
|
||||
|
||||
vcf_line_fields[7] = vcf_line_fields[7]+","+maf_info
|
||||
new_vcf_line = "\t".join(vcf_line_fields)
|
||||
vcf_out_file.write(new_vcf_line)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue