Some useful scripts for running only individual parts of the pilot1/2 cleaning/calling pipeline
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1587 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d6a0b65ac9
commit
702ba553d6
|
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
|
||||
my $pilot = "pilot2";
|
||||
my $queue = "gsa";
|
||||
my $tech = "SLX";
|
||||
my $jar = "/humgen/gsa-scr1/ebanks/Sting/dist/GenomeAnalysisTK.jar";
|
||||
|
||||
GetOptions( "p:s" => \$pilot,
|
||||
"q:s" => \$queue,
|
||||
"tech:s" => \$tech,
|
||||
"j:s" => \$jar );
|
||||
|
||||
my @samples;
|
||||
if ($pilot eq "pilot1") {
|
||||
@samples = ("CEU","YRI","CHB-JPT");
|
||||
} elsif ($pilot eq "pilot2") {
|
||||
@samples = ("NA19238","NA19239","NA19240","NA12878","NA12891","NA12892");
|
||||
}
|
||||
|
||||
foreach my $sample (@samples) {
|
||||
enqueue($sample, $pilot, $queue, $jar, $tech);
|
||||
}
|
||||
|
||||
sub enqueue {
|
||||
|
||||
my $sample = $_[0];
|
||||
my $pilot = $_[1];
|
||||
my $queue = $_[2];
|
||||
my $jar = $_[3];
|
||||
my $tech = $_[4];
|
||||
|
||||
my $inputBamStr = "";
|
||||
my $outputDir;
|
||||
if ($pilot eq "pilot2") {
|
||||
$inputBamStr = "-I /humgen/gsa-hphome1/projects/1kg_pilot2/useTheseBamsForAnalyses/$sample.$tech.bam";
|
||||
$outputDir = "/broad/hptmp/ebanks/1kg_pilot2/cleaned/calls";
|
||||
} else {
|
||||
my $num = 1;
|
||||
while ($num < 23) {
|
||||
$inputBamStr .= "-I /broad/hptmp/ebanks/1kg_pilot1/cleaned/bams/$sample.chr$num.$tech.bam ";
|
||||
$num++;
|
||||
}
|
||||
$inputBamStr .= "-I /broad/hptmp/ebanks/1kg_pilot1/cleaned/bams/$sample.chrX.$tech.bam -I /broad/hptmp/ebanks/1kg_pilot1/cleaned/bams/$sample.chrY.$tech.bam ";
|
||||
$outputDir = "/broad/hptmp/ebanks/1kg_pilot1/cleaned/calls";
|
||||
}
|
||||
|
||||
my $outputFile = "$outputDir/indels/$sample.$tech.low.calls";
|
||||
my $cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T IndelGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -o $outputFile -minConsensusFraction 0.5 -minFraction 0.";
|
||||
if ($pilot eq "pilot1") { $cmd .= "0"; }
|
||||
$cmd .= "1 -minCnt 2 -1kg";
|
||||
system($cmd);
|
||||
|
||||
$outputFile = "$outputDir/indels/$sample.$tech.high.calls";
|
||||
$cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T IndelGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -o $outputFile -minConsensusFraction 0.5 -minFraction 0.";
|
||||
if ($pilot eq "pilot1") { $cmd .= "0"; }
|
||||
$cmd .= "3 -minCnt 2 -1kg";
|
||||
system($cmd);
|
||||
|
||||
if ($pilot eq "pilot2") {
|
||||
$outputFile = "$outputDir/unfiltered_snps/$sample.$tech.geli.calls";
|
||||
$cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T SingleSampleGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -varout $outputFile -lod 5";
|
||||
system($cmd);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
|
||||
my $walker = undef;
|
||||
my $pilot = "pilot2";
|
||||
my $queue = "gsa";
|
||||
my $tech = "SLX";
|
||||
my $jar = "/humgen/gsa-scr1/ebanks/Sting/dist/GenomeAnalysisTK.jar";
|
||||
|
||||
GetOptions( "T=s" => \$walker,
|
||||
"p:s" => \$pilot,
|
||||
"q:s" => \$queue,
|
||||
"tech:s" => \$tech,
|
||||
"j:s" => \$jar );
|
||||
|
||||
exit(1) if ( !$walker );
|
||||
|
||||
my @samples;
|
||||
if ($pilot eq "pilot1") {
|
||||
@samples = ("CEU","YRI","CHB-JPT");
|
||||
} elsif ($pilot eq "pilot2") {
|
||||
@samples = ("NA19238","NA19239","NA19240","NA12878","NA12891","NA12892");
|
||||
}
|
||||
|
||||
foreach my $sample (@samples) {
|
||||
|
||||
my $num = 1;
|
||||
while ($num < 23) {
|
||||
enqueue($sample, $num, $pilot, $queue, $jar, $walker, $tech);
|
||||
$num++;
|
||||
}
|
||||
|
||||
enqueue($sample, "X", $pilot, $queue, $jar, $walker, $tech);
|
||||
enqueue($sample, "Y", $pilot, $queue, $jar, $walker, $tech);
|
||||
}
|
||||
|
||||
sub enqueue {
|
||||
|
||||
my $sample = $_[0];
|
||||
my $chr = $_[1];
|
||||
my $pilot = $_[2];
|
||||
my $queue = $_[3];
|
||||
my $jar = $_[4];
|
||||
my $walker = $_[5];
|
||||
my $tech = $_[6];
|
||||
|
||||
my $inputBam;
|
||||
if ($pilot eq "pilot2") {
|
||||
$inputBam = "/broad/1KG/DCC/ftp/pilot_data/$sample/alignment/$sample.chrom$chr.$tech.SRP000032.2009_07.bam";
|
||||
} else {
|
||||
$inputBam = "/broad/hptmp/ebanks/1kg_pilot1/".$sample."_BAMS.list";
|
||||
}
|
||||
|
||||
my $outputDir;
|
||||
if ($pilot eq "pilot1") {
|
||||
$outputDir = "/broad/hptmp/ebanks/1kg_pilot1/cleaned";
|
||||
} else {
|
||||
$outputDir = "/broad/hptmp/ebanks/1kg_pilot2/cleaned";
|
||||
}
|
||||
|
||||
my $cmd;
|
||||
my $outputFile;
|
||||
|
||||
SWITCH: {
|
||||
$walker eq "IndelIntervals" && do {
|
||||
$outputFile = "$outputDir/intervals/$sample.chr$chr.$tech.indels.intervals";
|
||||
$cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T IndelIntervals -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -o $outputFile -L $chr";
|
||||
last SWITCH;
|
||||
};
|
||||
$walker eq "MismatchIntervals" && do {
|
||||
$outputFile = "$outputDir/intervals/$sample.chr$chr.$tech.mismatches.intervals";
|
||||
$cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T MismatchIntervals -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -o $outputFile -L $chr";
|
||||
last SWITCH;
|
||||
};
|
||||
$walker eq "SNPClusters" && do {
|
||||
$outputFile = "$outputDir/intervals/$sample.chr$chr.$tech.clusters.intervals";
|
||||
$cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T SNPClusters -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -o $outputFile -L $chr";
|
||||
last SWITCH;
|
||||
};
|
||||
$walker eq "IntervalMerger" && do {
|
||||
$outputFile = "$outputDir/intervals/$sample.chr$chr.$tech.merged.intervals";
|
||||
$cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T IntervalMerger -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -o $outputFile -L $chr -intervals $outputDir/intervals/$sample.chr$chr.$tech.indels.intervals -intervals $outputDir/intervals/$sample.chr$chr.$tech.mismatches.intervals";
|
||||
last SWITCH;
|
||||
};
|
||||
$walker eq "IntervalCleaner" && do {
|
||||
if ($pilot eq "pilot2") {
|
||||
$outputFile = "$outputDir/cleaner/$sample.chr$chr.$tech.bam";
|
||||
} else {
|
||||
$outputFile = "$outputDir/bams/$sample.chr$chr.$tech.bam";
|
||||
}
|
||||
$cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T IntervalCleaner -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -O $outputFile -L $chr -intervals $outputDir/intervals/$sample.chr$chr.$tech.merged.intervals -compress 1";
|
||||
if ($pilot eq "pilot2") {
|
||||
$cmd .= " -cleanedOnly";
|
||||
}
|
||||
last SWITCH;
|
||||
};
|
||||
$walker eq "CleanedReadInjector" && do {
|
||||
$outputFile = "$outputDir/bams/$sample.chr$chr.$tech.bam";
|
||||
$cmd = "bsub -q $queue -o $outputFile.sdout java -Xmx4096m -jar $jar -S SILENT -T CleanedReadInjector -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -o $outputFile -L $chr";
|
||||
last SWITCH;
|
||||
};
|
||||
|
||||
print "$walker is not a supported class\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
# print "$cmd\n";
|
||||
system($cmd);
|
||||
}
|
||||
Loading…
Reference in New Issue