2009-09-14 10:41:57 +08:00
#!/usr/bin/perl -w
use strict ;
use Getopt::Long ;
sub usage {
print "Usage: perl runCallingPipeline.pl\n\t-i <GATK input bam command>\n\t-o <output file head>\n\t[-sting Sting dir]\n\t[-frac multiplier for indel fractions]\n\t[-snps Should we call snps?]\n\t[-sample for writing vcf with -snps]\n\t[-badsnps bad snps file from cleaning with -snps]\n\t[-doc DepthOfCoverage for filtering with -snps]\n\t[-mq mapping quality zero for filtering with -snps ]\n\t[-q farm queue; default:gsa]\n\t[-wait farm wait id]\n\t[-dry]\n" ;
exit ( 1 ) ;
}
my $ inputBamStr = undef ;
my $ outputHead = undef ;
my $ wait = undef ;
my $ dry ;
my $ snps ;
my $ badsnps = undef ;
my $ sample ;
my $ doc = 100 ;
my $ mq = 100 ;
my $ indelFractionMultiplier = "" ;
my $ queue = "gsa" ;
my $ sting = "/humgen/gsa-scr1/ebanks/Sting" ;
GetOptions ( "i=s" = > \ $ inputBamStr ,
"o=s" = > \ $ outputHead ,
"q:s" = > \ $ queue ,
"dry!" = > \ $ dry ,
"snps!" = > \ $ snps ,
"sample:s" = > \ $ sample ,
"doc:s" = > \ $ doc ,
"mq:s" = > \ $ mq ,
"frac:s" = > \ $ indelFractionMultiplier ,
"badsnps:s" = > \ $ badsnps ,
"wait:s" = > \ $ wait ,
"sting:s" = > \ $ sting ) ;
usage ( ) if ( ! $ inputBamStr || ! $ outputHead ) ;
my $ indelsHigh = "$outputHead.indels.high.calls" ;
my $ bsub = "bsub -q $queue -o $indelsHigh.sdout" ;
if ( $ wait ) {
$ bsub . = " -w \"ended($wait)\"" ;
}
2009-10-09 02:13:49 +08:00
my $ command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IndelGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -minConsensusFraction 0.5 -minCnt 2 -1kg -minFraction 0." . $ indelFractionMultiplier . "3 -O $indelsHigh" ;
2009-09-14 10:41:57 +08:00
execute ( "$bsub $command" , $ dry ) ;
my $ indelsLow = "$outputHead.indels.low.calls" ;
$ bsub = "bsub -q $queue -o $indelsLow.sdout -J $outputHead" ;
if ( $ wait ) {
$ bsub . = " -w \"ended($wait)\"" ;
}
2009-10-09 02:13:49 +08:00
$ command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IndelGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -minConsensusFraction 0.5 -minCnt 2 -1kg -minFraction 0." . $ indelFractionMultiplier . "1 -O $indelsLow" ;
2009-09-14 10:41:57 +08:00
execute ( "$bsub $command" , $ dry ) ;
if ( $ snps ) {
my $ snpsFile = "$outputHead.snps.unfiltered.calls" ;
$ bsub = "bsub -q $queue -o $snpsFile.sdout -J $outputHead" ;
if ( $ wait ) {
$ bsub . = " -w \"ended($wait)\"" ;
}
2009-10-09 02:13:49 +08:00
$ command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -varout $snpsFile -lod 0.0" ;
2009-09-14 10:41:57 +08:00
execute ( "$bsub $command" , $ dry ) ;
my $ filterFile = "$outputHead.snps.filtered.calls" ;
my $ vcfFile = "$outputHead.snps.vcf" ;
$ bsub = "bsub -q $queue -o $filterFile.sdout -w \"ended($outputHead)\"" ;
2009-10-09 02:13:49 +08:00
$ command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T VariantFiltration -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -vcf $vcfFile -included $filterFile -sample $sample -B dbsnp,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod,variant,Variants,$snpsFile," ;
2009-09-14 10:41:57 +08:00
if ( $ badsnps ) {
$ command . = "cleaned,CleanedOutSNP,$badsnps," ;
}
2009-10-09 01:40:09 +08:00
$ command . = "indels,SimpleIndel,$indelsLow -X DepthOfCoverage:max=$doc -X AlleleBalance:low=0.25,high=0.75 -X FisherStrand:pvalue=0.00001 -X LodThreshold:lod=5.0 -X MappingQualityZero:max=$mq -X IndelArtifact -X ClusteredSnps:window=7,snps=3" ;
2009-09-14 10:41:57 +08:00
execute ( "$bsub $command" , $ dry ) ;
}
sub execute {
my $ cmd = $ _ [ 0 ] ;
my $ dry = $ _ [ 1 ] ;
if ( $ dry ) {
print "$cmd\n" ;
} else {
system ( $ cmd ) ;
}
}