diff --git a/perl/enqueueGATKcallerJobs.pl b/perl/enqueueGATKcallerJobs.pl new file mode 100755 index 000000000..fc65c6641 --- /dev/null +++ b/perl/enqueueGATKcallerJobs.pl @@ -0,0 +1,67 @@ +#!/usr/bin/perl -w + +use strict; +use Getopt::Long; + +my $pilot = "pilot2"; +my $queue = "gsa"; +my $tech = "SLX"; +my $jar = "/humgen/gsa-scr1/ebanks/Sting/dist/GenomeAnalysisTK.jar"; + +GetOptions( "p:s" => \$pilot, + "q:s" => \$queue, + "tech:s" => \$tech, + "j:s" => \$jar ); + +my @samples; +if ($pilot eq "pilot1") { + @samples = ("CEU","YRI","CHB-JPT"); +} elsif ($pilot eq "pilot2") { + @samples = ("NA19238","NA19239","NA19240","NA12878","NA12891","NA12892"); +} + +foreach my $sample (@samples) { + enqueue($sample, $pilot, $queue, $jar, $tech); +} + +sub enqueue { + + my $sample = $_[0]; + my $pilot = $_[1]; + my $queue = $_[2]; + my $jar = $_[3]; + my $tech = $_[4]; + + my $inputBamStr = ""; + my $outputDir; + if ($pilot eq "pilot2") { + $inputBamStr = "-I /humgen/gsa-hphome1/projects/1kg_pilot2/useTheseBamsForAnalyses/$sample.$tech.bam"; + $outputDir = "/broad/hptmp/ebanks/1kg_pilot2/cleaned/calls"; + } else { + my $num = 1; + while ($num < 23) { + $inputBamStr .= "-I /broad/hptmp/ebanks/1kg_pilot1/cleaned/bams/$sample.chr$num.$tech.bam "; + $num++; + } + $inputBamStr .= "-I /broad/hptmp/ebanks/1kg_pilot1/cleaned/bams/$sample.chrX.$tech.bam -I /broad/hptmp/ebanks/1kg_pilot1/cleaned/bams/$sample.chrY.$tech.bam "; + $outputDir = "/broad/hptmp/ebanks/1kg_pilot1/cleaned/calls"; + } + + my $outputFile = "$outputDir/indels/$sample.$tech.low.calls"; + my $cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T IndelGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -o $outputFile -minConsensusFraction 0.5 -minFraction 0."; + if ($pilot eq "pilot1") { $cmd .= "0"; } + $cmd .= "1 -minCnt 2 -1kg"; + system($cmd); + + $outputFile = "$outputDir/indels/$sample.$tech.high.calls"; + $cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T IndelGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -o $outputFile -minConsensusFraction 0.5 -minFraction 0."; + if ($pilot eq "pilot1") { $cmd .= "0"; } + $cmd .= "3 -minCnt 2 -1kg"; + system($cmd); + + if ($pilot eq "pilot2") { + $outputFile = "$outputDir/unfiltered_snps/$sample.$tech.geli.calls"; + $cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T SingleSampleGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -varout $outputFile -lod 5"; + system($cmd); + } +} diff --git a/perl/enqueueGATKjobsByChromosome.pl b/perl/enqueueGATKjobsByChromosome.pl new file mode 100755 index 000000000..3685f76ba --- /dev/null +++ b/perl/enqueueGATKjobsByChromosome.pl @@ -0,0 +1,111 @@ +#!/usr/bin/perl -w + +use strict; +use Getopt::Long; + +my $walker = undef; +my $pilot = "pilot2"; +my $queue = "gsa"; +my $tech = "SLX"; +my $jar = "/humgen/gsa-scr1/ebanks/Sting/dist/GenomeAnalysisTK.jar"; + +GetOptions( "T=s" => \$walker, + "p:s" => \$pilot, + "q:s" => \$queue, + "tech:s" => \$tech, + "j:s" => \$jar ); + +exit(1) if ( !$walker ); + +my @samples; +if ($pilot eq "pilot1") { + @samples = ("CEU","YRI","CHB-JPT"); +} elsif ($pilot eq "pilot2") { + @samples = ("NA19238","NA19239","NA19240","NA12878","NA12891","NA12892"); +} + +foreach my $sample (@samples) { + + my $num = 1; + while ($num < 23) { + enqueue($sample, $num, $pilot, $queue, $jar, $walker, $tech); + $num++; + } + + enqueue($sample, "X", $pilot, $queue, $jar, $walker, $tech); + enqueue($sample, "Y", $pilot, $queue, $jar, $walker, $tech); +} + +sub enqueue { + + my $sample = $_[0]; + my $chr = $_[1]; + my $pilot = $_[2]; + my $queue = $_[3]; + my $jar = $_[4]; + my $walker = $_[5]; + my $tech = $_[6]; + + my $inputBam; + if ($pilot eq "pilot2") { + $inputBam = "/broad/1KG/DCC/ftp/pilot_data/$sample/alignment/$sample.chrom$chr.$tech.SRP000032.2009_07.bam"; + } else { + $inputBam = "/broad/hptmp/ebanks/1kg_pilot1/".$sample."_BAMS.list"; + } + + my $outputDir; + if ($pilot eq "pilot1") { + $outputDir = "/broad/hptmp/ebanks/1kg_pilot1/cleaned"; + } else { + $outputDir = "/broad/hptmp/ebanks/1kg_pilot2/cleaned"; + } + + my $cmd; + my $outputFile; + + SWITCH: { + $walker eq "IndelIntervals" && do { + $outputFile = "$outputDir/intervals/$sample.chr$chr.$tech.indels.intervals"; + $cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T IndelIntervals -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -o $outputFile -L $chr"; + last SWITCH; + }; + $walker eq "MismatchIntervals" && do { + $outputFile = "$outputDir/intervals/$sample.chr$chr.$tech.mismatches.intervals"; + $cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T MismatchIntervals -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -o $outputFile -L $chr"; + last SWITCH; + }; + $walker eq "SNPClusters" && do { + $outputFile = "$outputDir/intervals/$sample.chr$chr.$tech.clusters.intervals"; + $cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T SNPClusters -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -o $outputFile -L $chr"; + last SWITCH; + }; + $walker eq "IntervalMerger" && do { + $outputFile = "$outputDir/intervals/$sample.chr$chr.$tech.merged.intervals"; + $cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T IntervalMerger -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -o $outputFile -L $chr -intervals $outputDir/intervals/$sample.chr$chr.$tech.indels.intervals -intervals $outputDir/intervals/$sample.chr$chr.$tech.mismatches.intervals"; + last SWITCH; + }; + $walker eq "IntervalCleaner" && do { + if ($pilot eq "pilot2") { + $outputFile = "$outputDir/cleaner/$sample.chr$chr.$tech.bam"; + } else { + $outputFile = "$outputDir/bams/$sample.chr$chr.$tech.bam"; + } + $cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T IntervalCleaner -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -O $outputFile -L $chr -intervals $outputDir/intervals/$sample.chr$chr.$tech.merged.intervals -compress 1"; + if ($pilot eq "pilot2") { + $cmd .= " -cleanedOnly"; + } + last SWITCH; + }; + $walker eq "CleanedReadInjector" && do { + $outputFile = "$outputDir/bams/$sample.chr$chr.$tech.bam"; + $cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T CleanedReadInjector -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -o $outputFile -L $chr"; + last SWITCH; + }; + + print "$walker is not a supported class\n"; + exit(1); + } + +# print "$cmd\n"; + system($cmd); +}