Checking in the scripts I'm using for syncing the 1000G mirror. Note that very few people can actually use them because you most likely don't have perission to write to /humgen/1kg/DCC, but these should be used as a resource if anyone ever needs to do this in the future. These scripts are very naive and consist of just the actual pulling down of data. Currently aspera and wget are supported, but Mark should feel free to add lftp if he wants. :) Also, while I'm here, I'm removing obsolete scripts for running an obsolete pipeline.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5282 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cd7a7091ba
commit
463bb737c3
|
|
@ -1,85 +0,0 @@
|
||||||
#!/usr/bin/perl -w
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
use Getopt::Long;
|
|
||||||
|
|
||||||
sub usage {
|
|
||||||
print "Usage: perl runCallingPipeline.pl\n\t-i <GATK input bam command>\n\t-o <output file head>\n\t[-sting Sting dir]\n\t[-frac multiplier for indel fractions]\n\t[-snps Should we call snps?]\n\t[-sample for writing vcf with -snps]\n\t[-badsnps bad snps file from cleaning with -snps]\n\t[-doc DepthOfCoverage for filtering with -snps]\n\t[-mq mapping quality zero for filtering with -snps ]\n\t[-q farm queue; default:gsa]\n\t[-wait farm wait id]\n\t[-dry]\n";
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
my $inputBamStr = undef;
|
|
||||||
my $outputHead = undef;
|
|
||||||
my $wait = undef;
|
|
||||||
my $dry;
|
|
||||||
my $snps;
|
|
||||||
my $badsnps = undef;
|
|
||||||
my $sample;
|
|
||||||
my $doc = 100;
|
|
||||||
my $mq = 100;
|
|
||||||
my $indelFractionMultiplier = "";
|
|
||||||
my $queue = "gsa";
|
|
||||||
my $sting = "/humgen/gsa-scr1/ebanks/Sting";
|
|
||||||
|
|
||||||
GetOptions( "i=s" => \$inputBamStr,
|
|
||||||
"o=s" => \$outputHead,
|
|
||||||
"q:s" => \$queue,
|
|
||||||
"dry!" => \$dry,
|
|
||||||
"snps!" => \$snps,
|
|
||||||
"sample:s" => \$sample,
|
|
||||||
"doc:s" => \$doc,
|
|
||||||
"mq:s" => \$mq,
|
|
||||||
"frac:s" => \$indelFractionMultiplier,
|
|
||||||
"badsnps:s" => \$badsnps,
|
|
||||||
"wait:s" => \$wait,
|
|
||||||
"sting:s" => \$sting );
|
|
||||||
|
|
||||||
usage() if ( !$inputBamStr || !$outputHead );
|
|
||||||
|
|
||||||
my $indelsHigh = "$outputHead.indels.high.calls";
|
|
||||||
my $bsub = "bsub -q $queue -o $indelsHigh.sdout";
|
|
||||||
if ($wait) {
|
|
||||||
$bsub .= " -w \"ended($wait)\"";
|
|
||||||
}
|
|
||||||
my $command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IndelGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -minConsensusFraction 0.5 -minCnt 2 -1kg -minFraction 0.".$indelFractionMultiplier."3 -O $indelsHigh";
|
|
||||||
execute("$bsub $command", $dry);
|
|
||||||
|
|
||||||
my $indelsLow = "$outputHead.indels.low.calls";
|
|
||||||
$bsub = "bsub -q $queue -o $indelsLow.sdout -J $outputHead";
|
|
||||||
if ($wait) {
|
|
||||||
$bsub .= " -w \"ended($wait)\"";
|
|
||||||
}
|
|
||||||
$command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IndelGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -minConsensusFraction 0.5 -minCnt 2 -1kg -minFraction 0.".$indelFractionMultiplier."1 -O $indelsLow";
|
|
||||||
execute("$bsub $command", $dry);
|
|
||||||
|
|
||||||
if ($snps) {
|
|
||||||
my $snpsFile = "$outputHead.snps.unfiltered.calls";
|
|
||||||
$bsub = "bsub -q $queue -o $snpsFile.sdout -J $outputHead";
|
|
||||||
if ($wait) {
|
|
||||||
$bsub .= " -w \"ended($wait)\"";
|
|
||||||
}
|
|
||||||
$command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -varout $snpsFile -lod 0.0";
|
|
||||||
execute("$bsub $command", $dry);
|
|
||||||
|
|
||||||
my $filterFile = "$outputHead.snps.filtered.calls";
|
|
||||||
my $vcfFile = "$outputHead.snps.vcf";
|
|
||||||
$bsub = "bsub -q $queue -o $filterFile.sdout -w \"ended($outputHead)\"";
|
|
||||||
$command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T VariantFiltration -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -vcf $vcfFile -included $filterFile -sample $sample -B dbsnp,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod,variant,Variants,$snpsFile,";
|
|
||||||
if ($badsnps) {
|
|
||||||
$command .= "cleaned,CleanedOutSNP,$badsnps,";
|
|
||||||
}
|
|
||||||
$command .= "indels,SimpleIndel,$indelsLow -X DepthOfCoverage:max=$doc -X AlleleBalance:low=0.25,high=0.75 -X FisherStrand:pvalue=0.00001 -X LodThreshold:lod=5.0 -X MappingQualityZero:max=$mq -X IndelArtifact -X ClusteredSnps:window=7,snps=3";
|
|
||||||
execute("$bsub $command", $dry);
|
|
||||||
}
|
|
||||||
|
|
||||||
sub execute {
|
|
||||||
|
|
||||||
my $cmd = $_[0];
|
|
||||||
my $dry = $_[1];
|
|
||||||
|
|
||||||
if ($dry) {
|
|
||||||
print "$cmd\n";
|
|
||||||
} else {
|
|
||||||
system($cmd);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,110 +0,0 @@
|
||||||
#!/usr/bin/perl -w
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
use Getopt::Long;
|
|
||||||
|
|
||||||
sub usage {
|
|
||||||
print "Usage: perl runCleaningPipeline.pl\n\t-i <input bam>\n\t-obam <output bam name>\n\t[-sting Sting dir]\n\t[-badsnps badsnps file name]\n\t[-inject]\n\t[-q farm queue; default:gsa]\n\t[-wait farm wait id]\n\t[-job final farm job name]\n\t[-dry]\n";
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
my $inputBam = undef;
|
|
||||||
my $outputBam = undef;
|
|
||||||
my $jobName = undef;
|
|
||||||
my $wait = undef;
|
|
||||||
my $badsnps = undef;
|
|
||||||
my $dry;
|
|
||||||
my $inject;
|
|
||||||
my $queue = "gsa";
|
|
||||||
my $sting = "/humgen/gsa-scr1/ebanks/Sting";
|
|
||||||
|
|
||||||
GetOptions( "i=s" => \$inputBam,
|
|
||||||
"obam=s" => \$outputBam,
|
|
||||||
"q:s" => \$queue,
|
|
||||||
"dry!" => \$dry,
|
|
||||||
"inject!" => \$inject,
|
|
||||||
"job:s" => \$jobName,
|
|
||||||
"wait:s" => \$wait,
|
|
||||||
"badsnps:s" => \$badsnps,
|
|
||||||
"sting:s" => \$sting );
|
|
||||||
|
|
||||||
usage() if ( !$inputBam || !$outputBam );
|
|
||||||
|
|
||||||
my $indelIntervals = "$outputBam.indels.intervals";
|
|
||||||
my $command = "perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IndelIntervals -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam\" -o $indelIntervals -oarg o -j $outputBam.intervals -q $queue";
|
|
||||||
if ($dry) {
|
|
||||||
$command .= " -dry";
|
|
||||||
}
|
|
||||||
if ($wait) {
|
|
||||||
$command .= " -wait $wait";
|
|
||||||
}
|
|
||||||
system($command);
|
|
||||||
|
|
||||||
my $mismatchIntervals = "$outputBam.mismatches.intervals";
|
|
||||||
$command = "perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T MismatchIntervals -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam\" -o $mismatchIntervals -oarg o -j $outputBam.intervals -q $queue";
|
|
||||||
if ($dry) {
|
|
||||||
$command .= " -dry";
|
|
||||||
}
|
|
||||||
if ($wait) {
|
|
||||||
$command .= " -wait $wait";
|
|
||||||
}
|
|
||||||
system($command);
|
|
||||||
|
|
||||||
my $mergedIntervals = "$outputBam.merged.intervals";
|
|
||||||
$command = "perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IntervalMerger -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -intervals $indelIntervals -intervals $mismatchIntervals\" -o $mergedIntervals -oarg o -wait $outputBam.intervals -j $outputBam.merged -q $queue";
|
|
||||||
if ($dry) {
|
|
||||||
$command .= " -dry";
|
|
||||||
}
|
|
||||||
system($command);
|
|
||||||
|
|
||||||
my $cleanedBam;
|
|
||||||
if ($inject) {
|
|
||||||
$cleanedBam = "$outputBam.cleaned.bam";
|
|
||||||
} else {
|
|
||||||
$cleanedBam = "$outputBam";
|
|
||||||
}
|
|
||||||
$command = "bsub -q $queue -o $outputBam.cleaner.script1 -w \"ended($outputBam.merged)\" -J $outputBam.cleaner.script perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IntervalCleaner -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -compress 1";
|
|
||||||
if ($inject) {
|
|
||||||
$command .= " -cleanedOnly\" -j $outputBam.cleaner.clean";
|
|
||||||
} else {
|
|
||||||
$command .= "\" -j $jobName";
|
|
||||||
}
|
|
||||||
$command .= " -o $cleanedBam -oarg O -q $queue -bam -i $mergedIntervals -n 50";
|
|
||||||
if ($dry) {
|
|
||||||
$command .= " -dry";
|
|
||||||
print "$command\n";
|
|
||||||
} else {
|
|
||||||
system($command);
|
|
||||||
}
|
|
||||||
|
|
||||||
my $snpsFile = $badsnps;
|
|
||||||
if (!$snpsFile) {
|
|
||||||
$snpsFile = "$outputBam.badsnps";
|
|
||||||
}
|
|
||||||
$command = "bsub -q $queue -o $outputBam.cleaner.script2 -w \"ended($outputBam.merged)\" -J $outputBam.cleaner.script perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IntervalCleaner -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam\"";
|
|
||||||
if ($inject) {
|
|
||||||
$command .= " -j $outputBam.cleaner.badsnps";
|
|
||||||
} else {
|
|
||||||
$command .= " -j $jobName";
|
|
||||||
}
|
|
||||||
$command .= " -o $snpsFile -oarg snps -q $queue -i $mergedIntervals -n 50";
|
|
||||||
if ($dry) {
|
|
||||||
$command .= " -dry";
|
|
||||||
print "$command\n";
|
|
||||||
} else {
|
|
||||||
system($command);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($inject) {
|
|
||||||
my $bam = "$outputBam";
|
|
||||||
$command = "bsub -q $queue -o $bam.sdout";
|
|
||||||
if ($jobName) {
|
|
||||||
$command .= " -J $jobName";
|
|
||||||
}
|
|
||||||
$command .= " -w \"ended($outputBam.cleaner.*)\" java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T CleanedReadInjector -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam --output_bam $bam --cleaned_reads $cleanedBam -compress 1";
|
|
||||||
if ($dry) {
|
|
||||||
print "$command\n";
|
|
||||||
} else {
|
|
||||||
system($command);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,72 +0,0 @@
|
||||||
#!/usr/bin/perl -w
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
use Getopt::Long;
|
|
||||||
|
|
||||||
sub usage {
|
|
||||||
print "Usage: perl runPilot1Pipeline.pl\n\t-i <input dir>\n\t-o <output directory>\n\t[-sting Sting dir]\n\t[-q farm queue; default:gsa]\n\t[-dry]\n";
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
my $inputDir = undef;
|
|
||||||
my $outputDir = undef;
|
|
||||||
my $dry;
|
|
||||||
my $queue = "gsa";
|
|
||||||
my $sting = "/humgen/gsa-scr1/ebanks/Sting";
|
|
||||||
|
|
||||||
GetOptions( "i=s" => \$inputDir,
|
|
||||||
"odir=s" => \$outputDir,
|
|
||||||
"q:s" => \$queue,
|
|
||||||
"dry!" => \$dry,
|
|
||||||
"sting:s" => \$sting );
|
|
||||||
|
|
||||||
usage() if ( !$inputDir || !$outputDir );
|
|
||||||
|
|
||||||
my @samples = ("ceu","yri","chb_jpt");
|
|
||||||
|
|
||||||
foreach my $sample (@samples) {
|
|
||||||
|
|
||||||
my $inputBam = "$inputDir/low_coverage_$sample.bam";
|
|
||||||
my $outputHead = "$outputDir/$sample";
|
|
||||||
my $outputBam = "$outputHead.bam";
|
|
||||||
my $badsnps = "$outputBam.badsnps";
|
|
||||||
clean($inputBam, $outputBam, $queue, $sting, $dry, $badsnps);
|
|
||||||
call("-I $outputBam", $outputHead, $queue, $sting, $dry, "$inputBam.cleaner.*", $sample, $badsnps);
|
|
||||||
}
|
|
||||||
|
|
||||||
sub clean {
|
|
||||||
|
|
||||||
my $inputBam = $_[0];
|
|
||||||
my $outputBam = $_[1];
|
|
||||||
my $queue = $_[2];
|
|
||||||
my $sting = $_[3];
|
|
||||||
my $dry = $_[4];
|
|
||||||
my $badsnps = $_[5];
|
|
||||||
|
|
||||||
my $cmd = "perl $sting/perl/1kgScripts/runCleaningPipeline.pl -i $inputBam -obam $outputBam -q $queue -j $outputBam.cleaner.pipeline -sting $sting -badsnps $badsnps";
|
|
||||||
if ($dry) {
|
|
||||||
$cmd .= " -dry";
|
|
||||||
}
|
|
||||||
system($cmd);
|
|
||||||
}
|
|
||||||
|
|
||||||
sub call {
|
|
||||||
|
|
||||||
my $inputBams = $_[0];
|
|
||||||
my $outputHead = $_[1];
|
|
||||||
my $queue = $_[2];
|
|
||||||
my $sting = $_[3];
|
|
||||||
my $dry = $_[4];
|
|
||||||
my $wait = $_[5];
|
|
||||||
my $sample = $_[6];
|
|
||||||
my $badsnps = $_[7];
|
|
||||||
|
|
||||||
my $cmd = "perl $sting/perl/1kgScripts/runCallingPipeline.pl -i $inputBams -o $outputHead -q $queue -sting $sting -frac 0 -sample $sample -badsnps $badsnps";
|
|
||||||
if ($dry) {
|
|
||||||
$cmd .= " -dry";
|
|
||||||
}
|
|
||||||
if ($wait) {
|
|
||||||
$cmd .= " -wait $wait";
|
|
||||||
}
|
|
||||||
system($cmd);
|
|
||||||
}
|
|
||||||
|
|
@ -1,108 +0,0 @@
|
||||||
#!/usr/bin/perl -w
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
use Getopt::Long;
|
|
||||||
|
|
||||||
sub usage {
|
|
||||||
print "Usage: perl runPilot2Pipeline.pl\n\t-i <input dir>\n\t-o <output directory>\n\t[-sting Sting dir]\n\t[-q farm queue; default:gsa]\n\t[-dry]\n";
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
my $inputDir = undef;
|
|
||||||
my $outputDir = undef;
|
|
||||||
my $dry;
|
|
||||||
my $queue = "gsa";
|
|
||||||
my $sting = "/humgen/gsa-scr1/ebanks/Sting";
|
|
||||||
|
|
||||||
GetOptions( "i=s" => \$inputDir,
|
|
||||||
"o=s" => \$outputDir,
|
|
||||||
"q:s" => \$queue,
|
|
||||||
"dry!" => \$dry,
|
|
||||||
"sting:s" => \$sting );
|
|
||||||
|
|
||||||
usage() if ( !$inputDir || !$outputDir );
|
|
||||||
|
|
||||||
my @samples = ("NA19238","NA19239","NA19240","NA12878","NA12891","NA12892");
|
|
||||||
|
|
||||||
# Official genome-wide Depth of Coverage tables for pilot 2, freeze 5:
|
|
||||||
# NA12878 NA12891 NA12892 NA19238 NA19239 NA19240
|
|
||||||
# 454: 36 18
|
|
||||||
# SLX: 82 91 70 56 68 86
|
|
||||||
# SOLID: 37 64
|
|
||||||
# 454+SLD: 64 77
|
|
||||||
# ALL: 138 150
|
|
||||||
|
|
||||||
my %DoC_454 = ( "NA12878" => 36, "NA19240" => 18 );
|
|
||||||
my %DoC_slx = ( "NA12878" => 82, "NA12891" => 91, "NA12892" => 70,"NA19238" => 56, "NA19239" => 68, "NA19240" => 86 );
|
|
||||||
my %DoC_solid = ( "NA12878" => 37, "NA19240" => 64 );
|
|
||||||
my %DoC_454solid = ( "NA12878" => 64, "NA19240" => 77 );
|
|
||||||
my %DoC_all = ( "NA12878" => 138, "NA19240" => 150 );
|
|
||||||
my %MQ_hash = ( "SLX" => 100, "SOLID" => 5, "454" => 5, "454SOLID" => 10, "ALL" => 110 );
|
|
||||||
|
|
||||||
foreach my $sample (@samples) {
|
|
||||||
|
|
||||||
my $inputBamSLX = "$inputDir/$sample.pilot2.SLX.bam";
|
|
||||||
my $outputHeadSLX = "$outputDir/$sample.SLX";
|
|
||||||
my $outputBamSLX = "$outputHeadSLX.bam";
|
|
||||||
my $badsnpsSLX = "$outputBamSLX.badsnps";
|
|
||||||
clean($inputBamSLX, $outputBamSLX, $queue, $sting, $dry, $badsnpsSLX);
|
|
||||||
call("-I $outputBamSLX", $outputHeadSLX, $queue, $sting, $dry, "$outputBamSLX.cleaner.*", $sample, $badsnpsSLX, $DoC_slx{$sample}, $MQ_hash{"SLX"});
|
|
||||||
|
|
||||||
if ($sample eq "NA12878" || $sample eq "NA19240") {
|
|
||||||
my $inputBamSOLID = "$inputDir/$sample.pilot2.SOLID.bam";
|
|
||||||
my $outputHeadSOLID = "$outputDir/$sample.SOLID";
|
|
||||||
my $outputBamSOLID = "$outputHeadSOLID.bam";
|
|
||||||
my $badsnpsSOLID = "$outputBamSOLID.badsnps";
|
|
||||||
clean($inputBamSOLID, $outputBamSOLID, $queue, $sting, $dry, $badsnpsSOLID);
|
|
||||||
call("-I $outputBamSOLID", $outputHeadSOLID, $queue, $sting, $dry, "$outputBamSOLID.cleaner.*", $sample, $badsnpsSOLID, $DoC_solid{$sample}, $MQ_hash{"SOLID"});
|
|
||||||
|
|
||||||
my $inputBam454 = "$inputDir/$sample.pilot2.454.bam";
|
|
||||||
my $outputHead454 = "$outputDir/$sample.454";
|
|
||||||
call("-I $inputBam454", $outputHead454, $queue, $sting, $dry, "$outputBamSLX.cleaner.*", $sample, $badsnpsSLX, $DoC_454{$sample}, $MQ_hash{"454"});
|
|
||||||
|
|
||||||
my $outputHead = "$outputDir/$sample.SOLID_454";
|
|
||||||
call("-I $outputBamSOLID -I $inputBam454", $outputHead, $queue, $sting, $dry, "$outputBamSOLID.cleaner.*", $sample, $badsnpsSOLID, $DoC_454solid{$sample}, $MQ_hash{"454SOLID"});
|
|
||||||
|
|
||||||
$outputHead = "$outputDir/$sample.allTechs";
|
|
||||||
call("-I $outputBamSLX -I $outputBamSOLID -I $inputBam454", $outputHead, $queue, $sting, $dry, "*.cleaner.*", $sample, $badsnpsSLX, $DoC_all{$sample}, $MQ_hash{"ALL"});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sub clean {
|
|
||||||
|
|
||||||
my $inputBam = $_[0];
|
|
||||||
my $outputBam = $_[1];
|
|
||||||
my $queue = $_[2];
|
|
||||||
my $sting = $_[3];
|
|
||||||
my $dry = $_[4];
|
|
||||||
my $badsnps = $_[5];
|
|
||||||
|
|
||||||
my $cmd = "perl $sting/perl/1kgScripts/runCleaningPipeline.pl -i $inputBam -obam $outputBam -q $queue -inject -j $outputBam.cleaner.pipeline -sting $sting -badsnps $badsnps";
|
|
||||||
if ($dry) {
|
|
||||||
$cmd .= " -dry";
|
|
||||||
}
|
|
||||||
system($cmd);
|
|
||||||
}
|
|
||||||
|
|
||||||
sub call {
|
|
||||||
|
|
||||||
my $inputBams = $_[0];
|
|
||||||
my $outputHead = $_[1];
|
|
||||||
my $queue = $_[2];
|
|
||||||
my $sting = $_[3];
|
|
||||||
my $dry = $_[4];
|
|
||||||
my $wait = $_[5];
|
|
||||||
my $sample = $_[6];
|
|
||||||
my $badsnps = $_[7];
|
|
||||||
my $doc = $_[8];
|
|
||||||
my $mq = $_[9];
|
|
||||||
|
|
||||||
my $cmd = "perl $sting/perl/1kgScripts/runCallingPipeline.pl -i \"$inputBams\" -o $outputHead -q $queue -snps -sting $sting -sample $sample -badsnps $badsnps -doc $doc -mq $mq";
|
|
||||||
if ($dry) {
|
|
||||||
$cmd .= " -dry";
|
|
||||||
}
|
|
||||||
if ($wait) {
|
|
||||||
$cmd .= " -wait $wait";
|
|
||||||
}
|
|
||||||
system($cmd);
|
|
||||||
}
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
#perl /humgen/gsa-scr1/ebanks/Sting_STABLE/perl/1kgScripts/runPilot1Pipeline.pl -i /humgen/gsa-hphome1/projects/1kg_pilot1/mergedBamsByPopulation -o /broad/hptmp/ebanks/1kg_pilot1/cleaned -sting /humgen/gsa-scr1/ebanks/Sting_STABLE -dry
|
|
||||||
perl /humgen/gsa-scr1/ebanks/Sting_STABLE/perl/1kgScripts/runPilot2Pipeline.pl -i /broad/1KG/DCC_merged/freeze5.2 -o /broad/hptmp/ebanks/1kg_pilot2/cleaned -sting /humgen/gsa-scr1/ebanks/Sting_STABLE
|
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
Before sync'ing 1000 Genomes, you need to be logged in as gsa-dev:
|
||||||
|
% sudo -s -u gsa-dev
|
||||||
|
[Note that this step needs to be done before subsequent steps for stability]
|
||||||
|
|
||||||
|
To use Aspera, you'll need to ssh into one of the appropriate machines:
|
||||||
|
% ssh vbigtube or mirror
|
||||||
|
|
||||||
|
[The NCBI Aspera source is: anonftp@ftp-private.ncbi.nih.gov:/1000genomes/ftp/]
|
||||||
|
|
||||||
|
|
@ -0,0 +1,19 @@
|
||||||
|
#!/usr/bin/perl -w
|
||||||
|
|
||||||
|
# Runs Aspera to pull down files
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use Getopt::Long;
|
||||||
|
|
||||||
|
my $source = undef;
|
||||||
|
my $dest = ".";
|
||||||
|
GetOptions( "source=s" => \$source,
|
||||||
|
"dest=s" => \$dest);
|
||||||
|
|
||||||
|
if ( !$source) {
|
||||||
|
print "Usage: runAspera.pl\n\t-source \t<ftp source>\n\t-dest \t\t<local destination; defaults to '.'>\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
my $cmd = "ascp -i /opt/aspera/etc/asperaweb_id_dsa.putty -k2 -QTr -l2G -d -v $source $dest";
|
||||||
|
system($cmd);
|
||||||
|
|
@ -0,0 +1,19 @@
|
||||||
|
#!/usr/bin/perl -w
|
||||||
|
|
||||||
|
# Runs Wget to pull down a file
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use Getopt::Long;
|
||||||
|
|
||||||
|
my $file = undef;
|
||||||
|
GetOptions( "file=s" => \$file);
|
||||||
|
|
||||||
|
if ( !$file) {
|
||||||
|
print "Usage: runWget.pl\n\t-file \t<file>\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
chomp($file);
|
||||||
|
my $cmd = "wget -O /humgen/1kg/DCC/ftp/$file ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/$file";
|
||||||
|
print "$cmd\n";
|
||||||
|
system($cmd);
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
#!/usr/bin/perl -w
|
||||||
|
|
||||||
|
use Getopt::Long;
|
||||||
|
|
||||||
|
sub usage {
|
||||||
|
print "Usage: perl syncFilesInList.pl\n\t-files <file containing files to sync>\n\t-protocol <protocol to use> [defaults to 'aspera'; can also use 'wget']\n\t[-dry]\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
my $files = undef;
|
||||||
|
my $dry;
|
||||||
|
my $protocol = "aspera";
|
||||||
|
|
||||||
|
GetOptions( "files=s" => \$files,
|
||||||
|
"dry!" => \$dry,
|
||||||
|
"protocol=s" => \$protocol);
|
||||||
|
|
||||||
|
usage() if ( !$files );
|
||||||
|
|
||||||
|
open(LIST, "< $files") or die "can't open $files: $!";
|
||||||
|
while ( <LIST> ) {
|
||||||
|
chomp($_);
|
||||||
|
if ( $protocol eq "aspera" ) {
|
||||||
|
$_ =~ m/data\/(.*)\/alignment.*/;
|
||||||
|
$cmd = "./runAspera.pl -source anonftp\@ftp-private.ncbi.nih.gov:/1000genomes/ftp/$_ -dest /humgen/1kg/DCC/ftp/data/$1/alignment/";
|
||||||
|
execute($cmd, $dry);
|
||||||
|
} elsif ( $protocol eq "wget" ) {
|
||||||
|
$cmd = "./runWget.pl -file $_";
|
||||||
|
execute($cmd, $dry);
|
||||||
|
} else {
|
||||||
|
usage();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(LIST);
|
||||||
|
|
||||||
|
sub execute {
|
||||||
|
|
||||||
|
my $cmd = $_[0];
|
||||||
|
my $dry = $_[1];
|
||||||
|
|
||||||
|
if ($dry) {
|
||||||
|
print "$cmd\n";
|
||||||
|
} else {
|
||||||
|
system($cmd);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue