gatk-3.8/shell/runbeagle.sh

115 lines
3.6 KiB
Bash
Executable File

#!/bin/bash
chr=$1
# set basic input and output paths. User should modify the following to point to:
# a) Input vcf,
# b) Output base path for all files
# c) Location of beagle.jar
# d) Path to reference file
beagle_prefix="CEUTSI.chr${chr}.bgl"
beagledir="/broad/shptmp/username/beagle/CEUTSI_Pilot1/chr${chr}/"
beaglejar="../../beagle/beagle.jar"
input_vcf="/broad/shptmp/username/beagle/CEUTSI_Pilot1/CEUTSI.recal.filtered.vcf"
tmpdir="/broad/shptmp/username/tmp"
bgloutprefix="recal.filtered"
reffile="/broad/1KG/reference/human_g1k_v37.fasta"
# Set to one to determine which sections of beagle pipeline to run
runinput=1 # Run VCF to Beagle input converter
runbgl=1 # Run Beagle
runoutput=1 # run Beagle output-to-VCF converter
runvarianteval=1 # Run Variant Evaluator to measure Beagle performance.
# Reference files for variant evaluator
dohapmap=1
do1kgchip=1
# Path to HapMap/1KG Chip truth sets for variant evaluator
if [ $dohapmap == 1 ]
then
cmpfileh="/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.2/genotypes_r27_nr.hg19_fwd.vcf"
fi
if [ $do1kgchip == 1 ]
then
cmpfile1="/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/1kg_chip_jan2010/1000Genome.chip.hg19.filtered.vcf"
fi
outputbglvcf=$beagledir$bgloutprefix.$beagle_prefix.output.vcf
# check if Beagle directory exists. If not, create it.
if [ ! -d $beagledir ]
then
echo "Creating Beagle directory $beagledir"
mkdir $beagledir
fi
if [ $runinput == 1 ]
then
echo "Running GATK to create Beagle input"
java -Xmx4000m -jar ../dist/GenomeAnalysisTK.jar -L $chr -l INFO \
-R $reffile -T ProduceBeagleInput \
-B vcf,VCF,$input_vcf \
-beagle $beagledir$beagle_prefix
fi
# now, run beagle
if [ $runbgl == 1 ]
then
echo "Running Beagle..."
java -Xmx8000m -Djava.io.tmpdir=$tmpdir -jar $beaglejar like=$beagledir$beagle_prefix out=$bgloutprefix
# move output files to beagle directory
cp ./$bgloutprefix.* $beagledir
# unzip gzip'd files, force overwrite if existing
gunzip -f $beagledir$bgloutprefix.$beagle_prefix.gprobs.gz
gunzip -f $beagledir$bgloutprefix.$beagle_prefix.phased.gz
#rename also Beagle likelihood file to mantain consistency
cp $beagledir$beagle_prefix $beagledir$bgloutprefix.$beagle_prefix.like
cp $beagledir$beagle_prefix.log $beagledir$bgloutprefix.$beagle_prefix.log
fi
# run GATK to parse Beagle files and to produce output vcf
if [ $runoutput == 1 ]
then
java -Xmx4000m -Djava.io.tmpdir=$tmpdir -jar ../dist/GenomeAnalysisTK.jar \
-R $reffile -T BeagleOutputToVCF -l INFO -L $chr \
-B inputvcf,VCF,$input_vcf \
-B beagleR2,BEAGLE,$beagledir$bgloutprefix.$beagle_prefix.r2 \
-B beagleProbs,BEAGLE,$beagledir$bgloutprefix.$beagle_prefix.gprobs \
-B beaglePhased,BEAGLE,$beagledir$bgloutprefix.$beagle_prefix.phased \
-output $beagledir$bgloutprefix.$beagle_prefix.output.vcf
fi
if [ $runvarianteval == 1 ]
then
# finally, run VariantEval to produce useful comparisons between pre-and post-Beagle vcf's.
if [ $dohapmap == 1 ]
then
java -Xmx4096m -jar ../dist/GenomeAnalysisTK.jar -T VariantEval \
-R $reffile -l INFO -L $chr \
-B eval_prebeagle,VCF,$input_vcf \
-B eval_beagle,VCF,$outputbglvcf \
-B comp_hapmap,VCF,$cmpfileh \
-reportType Grep -o ${beagledir}$bgloutprefix.$beagle_prefix.variant_eval_hapmap_grep.txt
fi
if [ $do1kgchip == 1 ]
then
java -Xmx4096m -jar ../dist/GenomeAnalysisTK.jar -T VariantEval \
-R $reffile -l INFO -L $chr \
-B eval_prebeagle,VCF,$input_vcf \
-B eval_beagle,VCF,$outputbglvcf \
-B comp_1kgchip,VCF,$cmpfile1 \
-reportType Grep -o ${beagledir}$bgloutprefix.$beagle_prefix.variant_eval_1kgchip_grep.txt
fi
fi