gatk-3.8/perl/MergeAndEvaluateVCFs.pl

81 lines
3.3 KiB
Perl
Executable File

#!/usr/bin/perl -w
use strict;
use Getopt::Long;
sub getArgs {
my %doc = (
'GATK' => "Path to GenomeAnalysisTK.jar (usually in your Sting/dist/ directory)",
'VARIANTREPORT' => "Path to VariantReport.R (usually in your Sting/R/VariantReport/ directory)",
'REFERENCE' => "Path to human reference sequence",
'DBSNP' => "Path to dbsnp rod",
'VCF1' => "Path to first VCF file",
'VCF1NAME' => "Name of first VCF file",
'VCF2' => "Path to second VCF file",
'VCF2NAME' => "Name of second VCF file",
'VCFOUT' => "Output root for resultant merged VCF, the eval file, and PDF",
);
my %args = (
'GATK' => undef,
'VARIANTREPORT' => undef,
'REFERENCE' => undef,
'DBSNP' => undef,
'VCF1' => undef,
'VCF1NAME' => undef,
'VCF2' => undef,
'VCF2NAME' => undef,
'VCFOUT' => undef,
);
my $result = GetOptions(
"GATK=s" => \$args{'GATK'},
"VARIANTREPORT=s" => \$args{'VARIANTREPORT'},
"REFERENCE=s" => \$args{'REFERENCE'},
"DBSNP=s" => \$args{'DBSNP'},
"VCF1=s" => \$args{'VCF1'},
"VCF1NAME=s" => \$args{'VCF1NAME'},
"VCF2=s" => \$args{'VCF2'},
"VCF2NAME=s" => \$args{'VCF2NAME'},
"VCFOUT=s" => \$args{'VCFOUT'},
);
my @undefinedArgs;
foreach my $key (sort { $a cmp $b } keys(%args)) {
if (!defined($args{$key})) {
push(@undefinedArgs, $key);
}
}
if (scalar(@undefinedArgs) > 0) {
print "Error: there were some undefined arguments:\n";
foreach my $undefinedArg (@undefinedArgs) {
print "\t-$undefinedArg : $doc{$undefinedArg}\n";
}
exit(-1);
}
return %args;
}
sub runCommand {
my ($output, $cmd) = @_;
#if (!-e $output) {
system($cmd);
#}
}
my %args = &getArgs();
my $combineVariantsCmd = "java -jar $args{'GATK'} -T CombineVariants -R $args{'REFERENCE'} -B:$args{'VCF1NAME'},VCF $args{'VCF1'} -B:$args{'VCF2NAME'},VCF $args{'VCF2'} -variantMergeOptions UNION -priority $args{'VCF1NAME'},$args{'VCF2NAME'} -o $args{'VCFOUT'} -l INFO";
&runCommand($args{'VCFOUT'}, $combineVariantsCmd);
my $variantEvalCmd = "java -jar $args{'GATK'} -T VariantEval -R $args{'REFERENCE'} -D $args{'DBSNP'} -B:eval,VCF $args{'VCFOUT'} -select 'set==\"Intersection\"' -selectName Intersection -select 'set==\"$args{'VCF1NAME'}\"' -selectName $args{'VCF1NAME'} -select 'set==\"filterIn$args{'VCF1NAME'}-$args{'VCF2NAME'}\"' -selectName In$args{'VCF2NAME'}-FilteredIn$args{'VCF1NAME'} -select 'set==\"$args{'VCF2NAME'}\"' -selectName $args{'VCF2NAME'} -select 'set==\"filterIn$args{'VCF2NAME'}-$args{'VCF1NAME'}\"' -selectName In$args{'VCF1NAME'}-FilteredIn$args{'VCF2NAME'} -select 'set==\"FilteredInAll\"' -selectName FilteredInAll -reportType R -reportLocation $args{'VCFOUT'}.eval";
&runCommand("$args{'VCFOUT'}.eval", $variantEvalCmd);
my $variantReportCmd = "Rscript $args{'VARIANTREPORT'} -title 'Automated Variant Report' -author '$ENV{'USER'}' -evalRoot $args{'VCFOUT'}.eval -plotOut $args{'VCFOUT'}.eval.pdf";
&runCommand("$args{'VCFOUT'}.eval.pdf", $variantReportCmd);