From f8a3bd72433870f601ea5979a6ad3bd1b59b3b45 Mon Sep 17 00:00:00 2001 From: kiran Date: Sun, 28 Nov 2010 00:45:21 +0000 Subject: [PATCH] A helper script to merge two VCFs, run VariantEval, and the VariantReport.R script. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4741 348d0f76-0448-11de-a6fe-93d51630548a --- perl/MergeAndEvaluateVCFs.pl | 80 ++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100755 perl/MergeAndEvaluateVCFs.pl diff --git a/perl/MergeAndEvaluateVCFs.pl b/perl/MergeAndEvaluateVCFs.pl new file mode 100755 index 000000000..e5ad4cf6d --- /dev/null +++ b/perl/MergeAndEvaluateVCFs.pl @@ -0,0 +1,80 @@ +#!/usr/bin/perl -w + +use strict; +use Getopt::Long; + +sub getArgs { + my %doc = ( + 'GATK' => "Path to GenomeAnalysisTK.jar (usually in your Sting/dist/ directory)", + 'VARIANTREPORT' => "Path to VariantReport.R (usually in your Sting/R/VariantReport/ directory)", + 'REFERENCE' => "Path to human reference sequence", + 'DBSNP' => "Path to dbsnp rod", + 'VCF1' => "Path to first VCF file", + 'VCF1NAME' => "Name of first VCF file", + 'VCF2' => "Path to second VCF file", + 'VCF2NAME' => "Name of second VCF file", + 'VCFOUT' => "Output root for resultant merged VCF, the eval file, and PDF", + ); + + my %args = ( + 'GATK' => undef, + 'VARIANTREPORT' => undef, + 'REFERENCE' => undef, + 'DBSNP' => undef, + 'VCF1' => undef, + 'VCF1NAME' => undef, + 'VCF2' => undef, + 'VCF2NAME' => undef, + 'VCFOUT' => undef, + ); + + my $result = GetOptions( + "GATK=s" => \$args{'GATK'}, + "VARIANTREPORT=s" => \$args{'VARIANTREPORT'}, + "REFERENCE=s" => \$args{'REFERENCE'}, + "DBSNP=s" => \$args{'DBSNP'}, + "VCF1=s" => \$args{'VCF1'}, + "VCF1NAME=s" => \$args{'VCF1NAME'}, + "VCF2=s" => \$args{'VCF2'}, + "VCF2NAME=s" => \$args{'VCF2NAME'}, + "VCFOUT=s" => \$args{'VCFOUT'}, + ); + + my @undefinedArgs; + foreach my $key (sort { $a cmp $b } keys(%args)) { + if (!defined($args{$key})) { + push(@undefinedArgs, $key); + } + } + + if (scalar(@undefinedArgs) > 0) { + print "Error: there were some undefined arguments:\n"; + + foreach my $undefinedArg (@undefinedArgs) { + print "\t-$undefinedArg : $doc{$undefinedArg}\n"; + } + + exit(-1); + } + + return %args; +} + +sub runCommand { + my ($output, $cmd) = @_; + + #if (!-e $output) { + system($cmd); + #} +} + +my %args = &getArgs(); + +my $combineVariantsCmd = "java -jar $args{'GATK'} -T CombineVariants -R $args{'REFERENCE'} -B:$args{'VCF1NAME'},VCF $args{'VCF1'} -B:$args{'VCF2NAME'},VCF $args{'VCF2'} -variantMergeOptions UNION -priority $args{'VCF1NAME'},$args{'VCF2NAME'} -o $args{'VCFOUT'} -l INFO"; +&runCommand($args{'VCFOUT'}, $combineVariantsCmd); + +my $variantEvalCmd = "java -jar $args{'GATK'} -T VariantEval -R $args{'REFERENCE'} -D $args{'DBSNP'} -B:eval,VCF $args{'VCFOUT'} -select 'set==\"Intersection\"' -selectName Intersection -select 'set==\"$args{'VCF1NAME'}\"' -selectName $args{'VCF1NAME'} -select 'set==\"filterIn$args{'VCF1NAME'}-$args{'VCF2NAME'}\"' -selectName In$args{'VCF2NAME'}-FilteredIn$args{'VCF1NAME'} -select 'set==\"$args{'VCF2NAME'}\"' -selectName $args{'VCF2NAME'} -select 'set==\"filterIn$args{'VCF2NAME'}-$args{'VCF1NAME'}\"' -selectName In$args{'VCF1NAME'}-FilteredIn$args{'VCF2NAME'} -select 'set==\"FilteredInAll\"' -selectName FilteredInAll -reportType R -reportLocation $args{'VCFOUT'}.eval"; +&runCommand("$args{'VCFOUT'}.eval", $variantEvalCmd); + +my $variantReportCmd = "Rscript $args{'VARIANTREPORT'} -title 'Automated Variant Report' -author '$ENV{'USER'}' -evalRoot $args{'VCFOUT'}.eval -plotOut $args{'VCFOUT'}.eval.pdf"; +&runCommand("$args{'VCFOUT'}.eval.pdf", $variantReportCmd);