#!/usr/bin/perl -w # Runs the liftover tool on a VCF and properly handles the output use strict; use Getopt::Long; my $in = undef; my $gatk = undef; my $chain = undef; my $newRef = undef; my $oldRef = undef; my $out = undef; my $tmp = "/tmp"; GetOptions( "vcf=s" => \$in, "gatk=s" => \$gatk, "chain=s" => \$chain, "newRef=s" => \$newRef, "oldRef=s" => \$oldRef, "out=s" => \$out, "tmp=s" => \$tmp); if ( !$in || !$gatk || !$chain || !$newRef || !$oldRef || !$out ) { print "Usage: liftOverVCF.pl\n\t-vcf \t\t\n\t-gatk \t\t\n\t-chain \t\t\n\t-newRef \t\n\t-oldRef \t\n\t-out \t\t\n\t-tmp \t\t\n"; print "Example: ./liftOverVCF.pl\n\t-vcf /humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/1kg_snp_validation/all_validation_batches.b36.vcf\n\t-chain b36ToHg19.broad.over.chain\n\t-out lifted.hg19.vcf\n\t-gatk /humgen/gsa-scr1/ebanks/Sting_dev\n\t-newRef /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19\n\t-oldRef /humgen/1kg/reference/human_b36_both\n"; exit(1); } # generate a random number my $random_number = rand(); my $tmp_prefix = "$tmp/$random_number"; print "Writing temporary files to prefix: $tmp_prefix\n"; my $unsorted_vcf = "$tmp_prefix.unsorted.vcf"; # lift over the file print "Lifting over the vcf..."; my $cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T LiftoverVariants -R $oldRef.fasta -B:variant,vcf $in -o $unsorted_vcf -chain $chain -dict $newRef.dict"; system($cmd); # we need to sort the lifted over file now print "\nRe-sorting the vcf...\n"; my $sorted_vcf = "$tmp_prefix.sorted.vcf"; open(SORTED, ">$sorted_vcf") or die "can't open $sorted_vcf: $!"; # write the header open(UNSORTED, "< $unsorted_vcf") or die "can't open $unsorted_vcf: $!"; my $inHeader = 1; while ( $inHeader == 1 ) { my $line = ; if ( $line !~ m/^#/ ) { $inHeader = 0; } else { print SORTED "$line"; } } close(UNSORTED); close(SORTED); $cmd = "grep \"^#\" -v $unsorted_vcf | sort -n -k2 -T $tmp | $gatk/perl/sortByRef.pl --tmp $tmp - $newRef.fasta.fai >> $sorted_vcf"; system($cmd); # Filter the VCF for bad records print "\nFixing/removing bad records...\n"; $cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T FilterLiftedVariants -R $newRef.fasta -B:variant,vcf $sorted_vcf -o $out"; system($cmd); # clean up unlink $unsorted_vcf; unlink $sorted_vcf; my $sorted_index = "$sorted_vcf.idx"; unlink $sorted_index; print "\nDone!\n";