Checking in the scripts I'm using for syncing the 1000G mirror. Note that very few people can actually use them because you most likely don't have perission to write to /humgen/1kg/DCC, but these should be used as a resource if anyone ever needs to do this in the future. These scripts are very naive and consist of just the actual pulling down of data. Currently aspera and wget are supported, but Mark should feel free to add lftp if he wants. :) Also, while I'm here, I'm removing obsolete scripts for running an obsolete pipeline.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5282 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cd7a7091ba
commit
463bb737c3
|
|
@ -1,85 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
|
||||
sub usage {
|
||||
print "Usage: perl runCallingPipeline.pl\n\t-i <GATK input bam command>\n\t-o <output file head>\n\t[-sting Sting dir]\n\t[-frac multiplier for indel fractions]\n\t[-snps Should we call snps?]\n\t[-sample for writing vcf with -snps]\n\t[-badsnps bad snps file from cleaning with -snps]\n\t[-doc DepthOfCoverage for filtering with -snps]\n\t[-mq mapping quality zero for filtering with -snps ]\n\t[-q farm queue; default:gsa]\n\t[-wait farm wait id]\n\t[-dry]\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
my $inputBamStr = undef;
|
||||
my $outputHead = undef;
|
||||
my $wait = undef;
|
||||
my $dry;
|
||||
my $snps;
|
||||
my $badsnps = undef;
|
||||
my $sample;
|
||||
my $doc = 100;
|
||||
my $mq = 100;
|
||||
my $indelFractionMultiplier = "";
|
||||
my $queue = "gsa";
|
||||
my $sting = "/humgen/gsa-scr1/ebanks/Sting";
|
||||
|
||||
GetOptions( "i=s" => \$inputBamStr,
|
||||
"o=s" => \$outputHead,
|
||||
"q:s" => \$queue,
|
||||
"dry!" => \$dry,
|
||||
"snps!" => \$snps,
|
||||
"sample:s" => \$sample,
|
||||
"doc:s" => \$doc,
|
||||
"mq:s" => \$mq,
|
||||
"frac:s" => \$indelFractionMultiplier,
|
||||
"badsnps:s" => \$badsnps,
|
||||
"wait:s" => \$wait,
|
||||
"sting:s" => \$sting );
|
||||
|
||||
usage() if ( !$inputBamStr || !$outputHead );
|
||||
|
||||
my $indelsHigh = "$outputHead.indels.high.calls";
|
||||
my $bsub = "bsub -q $queue -o $indelsHigh.sdout";
|
||||
if ($wait) {
|
||||
$bsub .= " -w \"ended($wait)\"";
|
||||
}
|
||||
my $command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IndelGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -minConsensusFraction 0.5 -minCnt 2 -1kg -minFraction 0.".$indelFractionMultiplier."3 -O $indelsHigh";
|
||||
execute("$bsub $command", $dry);
|
||||
|
||||
my $indelsLow = "$outputHead.indels.low.calls";
|
||||
$bsub = "bsub -q $queue -o $indelsLow.sdout -J $outputHead";
|
||||
if ($wait) {
|
||||
$bsub .= " -w \"ended($wait)\"";
|
||||
}
|
||||
$command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IndelGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -minConsensusFraction 0.5 -minCnt 2 -1kg -minFraction 0.".$indelFractionMultiplier."1 -O $indelsLow";
|
||||
execute("$bsub $command", $dry);
|
||||
|
||||
if ($snps) {
|
||||
my $snpsFile = "$outputHead.snps.unfiltered.calls";
|
||||
$bsub = "bsub -q $queue -o $snpsFile.sdout -J $outputHead";
|
||||
if ($wait) {
|
||||
$bsub .= " -w \"ended($wait)\"";
|
||||
}
|
||||
$command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -varout $snpsFile -lod 0.0";
|
||||
execute("$bsub $command", $dry);
|
||||
|
||||
my $filterFile = "$outputHead.snps.filtered.calls";
|
||||
my $vcfFile = "$outputHead.snps.vcf";
|
||||
$bsub = "bsub -q $queue -o $filterFile.sdout -w \"ended($outputHead)\"";
|
||||
$command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T VariantFiltration -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -vcf $vcfFile -included $filterFile -sample $sample -B dbsnp,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod,variant,Variants,$snpsFile,";
|
||||
if ($badsnps) {
|
||||
$command .= "cleaned,CleanedOutSNP,$badsnps,";
|
||||
}
|
||||
$command .= "indels,SimpleIndel,$indelsLow -X DepthOfCoverage:max=$doc -X AlleleBalance:low=0.25,high=0.75 -X FisherStrand:pvalue=0.00001 -X LodThreshold:lod=5.0 -X MappingQualityZero:max=$mq -X IndelArtifact -X ClusteredSnps:window=7,snps=3";
|
||||
execute("$bsub $command", $dry);
|
||||
}
|
||||
|
||||
sub execute {
|
||||
|
||||
my $cmd = $_[0];
|
||||
my $dry = $_[1];
|
||||
|
||||
if ($dry) {
|
||||
print "$cmd\n";
|
||||
} else {
|
||||
system($cmd);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,110 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
|
||||
sub usage {
|
||||
print "Usage: perl runCleaningPipeline.pl\n\t-i <input bam>\n\t-obam <output bam name>\n\t[-sting Sting dir]\n\t[-badsnps badsnps file name]\n\t[-inject]\n\t[-q farm queue; default:gsa]\n\t[-wait farm wait id]\n\t[-job final farm job name]\n\t[-dry]\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
my $inputBam = undef;
|
||||
my $outputBam = undef;
|
||||
my $jobName = undef;
|
||||
my $wait = undef;
|
||||
my $badsnps = undef;
|
||||
my $dry;
|
||||
my $inject;
|
||||
my $queue = "gsa";
|
||||
my $sting = "/humgen/gsa-scr1/ebanks/Sting";
|
||||
|
||||
GetOptions( "i=s" => \$inputBam,
|
||||
"obam=s" => \$outputBam,
|
||||
"q:s" => \$queue,
|
||||
"dry!" => \$dry,
|
||||
"inject!" => \$inject,
|
||||
"job:s" => \$jobName,
|
||||
"wait:s" => \$wait,
|
||||
"badsnps:s" => \$badsnps,
|
||||
"sting:s" => \$sting );
|
||||
|
||||
usage() if ( !$inputBam || !$outputBam );
|
||||
|
||||
my $indelIntervals = "$outputBam.indels.intervals";
|
||||
my $command = "perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IndelIntervals -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam\" -o $indelIntervals -oarg o -j $outputBam.intervals -q $queue";
|
||||
if ($dry) {
|
||||
$command .= " -dry";
|
||||
}
|
||||
if ($wait) {
|
||||
$command .= " -wait $wait";
|
||||
}
|
||||
system($command);
|
||||
|
||||
my $mismatchIntervals = "$outputBam.mismatches.intervals";
|
||||
$command = "perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T MismatchIntervals -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam\" -o $mismatchIntervals -oarg o -j $outputBam.intervals -q $queue";
|
||||
if ($dry) {
|
||||
$command .= " -dry";
|
||||
}
|
||||
if ($wait) {
|
||||
$command .= " -wait $wait";
|
||||
}
|
||||
system($command);
|
||||
|
||||
my $mergedIntervals = "$outputBam.merged.intervals";
|
||||
$command = "perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IntervalMerger -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -intervals $indelIntervals -intervals $mismatchIntervals\" -o $mergedIntervals -oarg o -wait $outputBam.intervals -j $outputBam.merged -q $queue";
|
||||
if ($dry) {
|
||||
$command .= " -dry";
|
||||
}
|
||||
system($command);
|
||||
|
||||
my $cleanedBam;
|
||||
if ($inject) {
|
||||
$cleanedBam = "$outputBam.cleaned.bam";
|
||||
} else {
|
||||
$cleanedBam = "$outputBam";
|
||||
}
|
||||
$command = "bsub -q $queue -o $outputBam.cleaner.script1 -w \"ended($outputBam.merged)\" -J $outputBam.cleaner.script perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IntervalCleaner -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -compress 1";
|
||||
if ($inject) {
|
||||
$command .= " -cleanedOnly\" -j $outputBam.cleaner.clean";
|
||||
} else {
|
||||
$command .= "\" -j $jobName";
|
||||
}
|
||||
$command .= " -o $cleanedBam -oarg O -q $queue -bam -i $mergedIntervals -n 50";
|
||||
if ($dry) {
|
||||
$command .= " -dry";
|
||||
print "$command\n";
|
||||
} else {
|
||||
system($command);
|
||||
}
|
||||
|
||||
my $snpsFile = $badsnps;
|
||||
if (!$snpsFile) {
|
||||
$snpsFile = "$outputBam.badsnps";
|
||||
}
|
||||
$command = "bsub -q $queue -o $outputBam.cleaner.script2 -w \"ended($outputBam.merged)\" -J $outputBam.cleaner.script perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IntervalCleaner -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam\"";
|
||||
if ($inject) {
|
||||
$command .= " -j $outputBam.cleaner.badsnps";
|
||||
} else {
|
||||
$command .= " -j $jobName";
|
||||
}
|
||||
$command .= " -o $snpsFile -oarg snps -q $queue -i $mergedIntervals -n 50";
|
||||
if ($dry) {
|
||||
$command .= " -dry";
|
||||
print "$command\n";
|
||||
} else {
|
||||
system($command);
|
||||
}
|
||||
|
||||
if ($inject) {
|
||||
my $bam = "$outputBam";
|
||||
$command = "bsub -q $queue -o $bam.sdout";
|
||||
if ($jobName) {
|
||||
$command .= " -J $jobName";
|
||||
}
|
||||
$command .= " -w \"ended($outputBam.cleaner.*)\" java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T CleanedReadInjector -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam --output_bam $bam --cleaned_reads $cleanedBam -compress 1";
|
||||
if ($dry) {
|
||||
print "$command\n";
|
||||
} else {
|
||||
system($command);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,72 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
|
||||
sub usage {
|
||||
print "Usage: perl runPilot1Pipeline.pl\n\t-i <input dir>\n\t-o <output directory>\n\t[-sting Sting dir]\n\t[-q farm queue; default:gsa]\n\t[-dry]\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
my $inputDir = undef;
|
||||
my $outputDir = undef;
|
||||
my $dry;
|
||||
my $queue = "gsa";
|
||||
my $sting = "/humgen/gsa-scr1/ebanks/Sting";
|
||||
|
||||
GetOptions( "i=s" => \$inputDir,
|
||||
"odir=s" => \$outputDir,
|
||||
"q:s" => \$queue,
|
||||
"dry!" => \$dry,
|
||||
"sting:s" => \$sting );
|
||||
|
||||
usage() if ( !$inputDir || !$outputDir );
|
||||
|
||||
my @samples = ("ceu","yri","chb_jpt");
|
||||
|
||||
foreach my $sample (@samples) {
|
||||
|
||||
my $inputBam = "$inputDir/low_coverage_$sample.bam";
|
||||
my $outputHead = "$outputDir/$sample";
|
||||
my $outputBam = "$outputHead.bam";
|
||||
my $badsnps = "$outputBam.badsnps";
|
||||
clean($inputBam, $outputBam, $queue, $sting, $dry, $badsnps);
|
||||
call("-I $outputBam", $outputHead, $queue, $sting, $dry, "$inputBam.cleaner.*", $sample, $badsnps);
|
||||
}
|
||||
|
||||
sub clean {
|
||||
|
||||
my $inputBam = $_[0];
|
||||
my $outputBam = $_[1];
|
||||
my $queue = $_[2];
|
||||
my $sting = $_[3];
|
||||
my $dry = $_[4];
|
||||
my $badsnps = $_[5];
|
||||
|
||||
my $cmd = "perl $sting/perl/1kgScripts/runCleaningPipeline.pl -i $inputBam -obam $outputBam -q $queue -j $outputBam.cleaner.pipeline -sting $sting -badsnps $badsnps";
|
||||
if ($dry) {
|
||||
$cmd .= " -dry";
|
||||
}
|
||||
system($cmd);
|
||||
}
|
||||
|
||||
sub call {
|
||||
|
||||
my $inputBams = $_[0];
|
||||
my $outputHead = $_[1];
|
||||
my $queue = $_[2];
|
||||
my $sting = $_[3];
|
||||
my $dry = $_[4];
|
||||
my $wait = $_[5];
|
||||
my $sample = $_[6];
|
||||
my $badsnps = $_[7];
|
||||
|
||||
my $cmd = "perl $sting/perl/1kgScripts/runCallingPipeline.pl -i $inputBams -o $outputHead -q $queue -sting $sting -frac 0 -sample $sample -badsnps $badsnps";
|
||||
if ($dry) {
|
||||
$cmd .= " -dry";
|
||||
}
|
||||
if ($wait) {
|
||||
$cmd .= " -wait $wait";
|
||||
}
|
||||
system($cmd);
|
||||
}
|
||||
|
|
@ -1,108 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
|
||||
sub usage {
|
||||
print "Usage: perl runPilot2Pipeline.pl\n\t-i <input dir>\n\t-o <output directory>\n\t[-sting Sting dir]\n\t[-q farm queue; default:gsa]\n\t[-dry]\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
my $inputDir = undef;
|
||||
my $outputDir = undef;
|
||||
my $dry;
|
||||
my $queue = "gsa";
|
||||
my $sting = "/humgen/gsa-scr1/ebanks/Sting";
|
||||
|
||||
GetOptions( "i=s" => \$inputDir,
|
||||
"o=s" => \$outputDir,
|
||||
"q:s" => \$queue,
|
||||
"dry!" => \$dry,
|
||||
"sting:s" => \$sting );
|
||||
|
||||
usage() if ( !$inputDir || !$outputDir );
|
||||
|
||||
my @samples = ("NA19238","NA19239","NA19240","NA12878","NA12891","NA12892");
|
||||
|
||||
# Official genome-wide Depth of Coverage tables for pilot 2, freeze 5:
|
||||
# NA12878 NA12891 NA12892 NA19238 NA19239 NA19240
|
||||
# 454: 36 18
|
||||
# SLX: 82 91 70 56 68 86
|
||||
# SOLID: 37 64
|
||||
# 454+SLD: 64 77
|
||||
# ALL: 138 150
|
||||
|
||||
my %DoC_454 = ( "NA12878" => 36, "NA19240" => 18 );
|
||||
my %DoC_slx = ( "NA12878" => 82, "NA12891" => 91, "NA12892" => 70,"NA19238" => 56, "NA19239" => 68, "NA19240" => 86 );
|
||||
my %DoC_solid = ( "NA12878" => 37, "NA19240" => 64 );
|
||||
my %DoC_454solid = ( "NA12878" => 64, "NA19240" => 77 );
|
||||
my %DoC_all = ( "NA12878" => 138, "NA19240" => 150 );
|
||||
my %MQ_hash = ( "SLX" => 100, "SOLID" => 5, "454" => 5, "454SOLID" => 10, "ALL" => 110 );
|
||||
|
||||
foreach my $sample (@samples) {
|
||||
|
||||
my $inputBamSLX = "$inputDir/$sample.pilot2.SLX.bam";
|
||||
my $outputHeadSLX = "$outputDir/$sample.SLX";
|
||||
my $outputBamSLX = "$outputHeadSLX.bam";
|
||||
my $badsnpsSLX = "$outputBamSLX.badsnps";
|
||||
clean($inputBamSLX, $outputBamSLX, $queue, $sting, $dry, $badsnpsSLX);
|
||||
call("-I $outputBamSLX", $outputHeadSLX, $queue, $sting, $dry, "$outputBamSLX.cleaner.*", $sample, $badsnpsSLX, $DoC_slx{$sample}, $MQ_hash{"SLX"});
|
||||
|
||||
if ($sample eq "NA12878" || $sample eq "NA19240") {
|
||||
my $inputBamSOLID = "$inputDir/$sample.pilot2.SOLID.bam";
|
||||
my $outputHeadSOLID = "$outputDir/$sample.SOLID";
|
||||
my $outputBamSOLID = "$outputHeadSOLID.bam";
|
||||
my $badsnpsSOLID = "$outputBamSOLID.badsnps";
|
||||
clean($inputBamSOLID, $outputBamSOLID, $queue, $sting, $dry, $badsnpsSOLID);
|
||||
call("-I $outputBamSOLID", $outputHeadSOLID, $queue, $sting, $dry, "$outputBamSOLID.cleaner.*", $sample, $badsnpsSOLID, $DoC_solid{$sample}, $MQ_hash{"SOLID"});
|
||||
|
||||
my $inputBam454 = "$inputDir/$sample.pilot2.454.bam";
|
||||
my $outputHead454 = "$outputDir/$sample.454";
|
||||
call("-I $inputBam454", $outputHead454, $queue, $sting, $dry, "$outputBamSLX.cleaner.*", $sample, $badsnpsSLX, $DoC_454{$sample}, $MQ_hash{"454"});
|
||||
|
||||
my $outputHead = "$outputDir/$sample.SOLID_454";
|
||||
call("-I $outputBamSOLID -I $inputBam454", $outputHead, $queue, $sting, $dry, "$outputBamSOLID.cleaner.*", $sample, $badsnpsSOLID, $DoC_454solid{$sample}, $MQ_hash{"454SOLID"});
|
||||
|
||||
$outputHead = "$outputDir/$sample.allTechs";
|
||||
call("-I $outputBamSLX -I $outputBamSOLID -I $inputBam454", $outputHead, $queue, $sting, $dry, "*.cleaner.*", $sample, $badsnpsSLX, $DoC_all{$sample}, $MQ_hash{"ALL"});
|
||||
}
|
||||
}
|
||||
|
||||
sub clean {
|
||||
|
||||
my $inputBam = $_[0];
|
||||
my $outputBam = $_[1];
|
||||
my $queue = $_[2];
|
||||
my $sting = $_[3];
|
||||
my $dry = $_[4];
|
||||
my $badsnps = $_[5];
|
||||
|
||||
my $cmd = "perl $sting/perl/1kgScripts/runCleaningPipeline.pl -i $inputBam -obam $outputBam -q $queue -inject -j $outputBam.cleaner.pipeline -sting $sting -badsnps $badsnps";
|
||||
if ($dry) {
|
||||
$cmd .= " -dry";
|
||||
}
|
||||
system($cmd);
|
||||
}
|
||||
|
||||
sub call {
|
||||
|
||||
my $inputBams = $_[0];
|
||||
my $outputHead = $_[1];
|
||||
my $queue = $_[2];
|
||||
my $sting = $_[3];
|
||||
my $dry = $_[4];
|
||||
my $wait = $_[5];
|
||||
my $sample = $_[6];
|
||||
my $badsnps = $_[7];
|
||||
my $doc = $_[8];
|
||||
my $mq = $_[9];
|
||||
|
||||
my $cmd = "perl $sting/perl/1kgScripts/runCallingPipeline.pl -i \"$inputBams\" -o $outputHead -q $queue -snps -sting $sting -sample $sample -badsnps $badsnps -doc $doc -mq $mq";
|
||||
if ($dry) {
|
||||
$cmd .= " -dry";
|
||||
}
|
||||
if ($wait) {
|
||||
$cmd .= " -wait $wait";
|
||||
}
|
||||
system($cmd);
|
||||
}
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
#perl /humgen/gsa-scr1/ebanks/Sting_STABLE/perl/1kgScripts/runPilot1Pipeline.pl -i /humgen/gsa-hphome1/projects/1kg_pilot1/mergedBamsByPopulation -o /broad/hptmp/ebanks/1kg_pilot1/cleaned -sting /humgen/gsa-scr1/ebanks/Sting_STABLE -dry
|
||||
perl /humgen/gsa-scr1/ebanks/Sting_STABLE/perl/1kgScripts/runPilot2Pipeline.pl -i /broad/1KG/DCC_merged/freeze5.2 -o /broad/hptmp/ebanks/1kg_pilot2/cleaned -sting /humgen/gsa-scr1/ebanks/Sting_STABLE
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
Before sync'ing 1000 Genomes, you need to be logged in as gsa-dev:
|
||||
% sudo -s -u gsa-dev
|
||||
[Note that this step needs to be done before subsequent steps for stability]
|
||||
|
||||
To use Aspera, you'll need to ssh into one of the appropriate machines:
|
||||
% ssh vbigtube or mirror
|
||||
|
||||
[The NCBI Aspera source is: anonftp@ftp-private.ncbi.nih.gov:/1000genomes/ftp/]
|
||||
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
# Runs Aspera to pull down files
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
|
||||
my $source = undef;
|
||||
my $dest = ".";
|
||||
GetOptions( "source=s" => \$source,
|
||||
"dest=s" => \$dest);
|
||||
|
||||
if ( !$source) {
|
||||
print "Usage: runAspera.pl\n\t-source \t<ftp source>\n\t-dest \t\t<local destination; defaults to '.'>\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
my $cmd = "ascp -i /opt/aspera/etc/asperaweb_id_dsa.putty -k2 -QTr -l2G -d -v $source $dest";
|
||||
system($cmd);
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
# Runs Wget to pull down a file
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
|
||||
my $file = undef;
|
||||
GetOptions( "file=s" => \$file);
|
||||
|
||||
if ( !$file) {
|
||||
print "Usage: runWget.pl\n\t-file \t<file>\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
chomp($file);
|
||||
my $cmd = "wget -O /humgen/1kg/DCC/ftp/$file ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/$file";
|
||||
print "$cmd\n";
|
||||
system($cmd);
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use Getopt::Long;
|
||||
|
||||
sub usage {
|
||||
print "Usage: perl syncFilesInList.pl\n\t-files <file containing files to sync>\n\t-protocol <protocol to use> [defaults to 'aspera'; can also use 'wget']\n\t[-dry]\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
my $files = undef;
|
||||
my $dry;
|
||||
my $protocol = "aspera";
|
||||
|
||||
GetOptions( "files=s" => \$files,
|
||||
"dry!" => \$dry,
|
||||
"protocol=s" => \$protocol);
|
||||
|
||||
usage() if ( !$files );
|
||||
|
||||
open(LIST, "< $files") or die "can't open $files: $!";
|
||||
while ( <LIST> ) {
|
||||
chomp($_);
|
||||
if ( $protocol eq "aspera" ) {
|
||||
$_ =~ m/data\/(.*)\/alignment.*/;
|
||||
$cmd = "./runAspera.pl -source anonftp\@ftp-private.ncbi.nih.gov:/1000genomes/ftp/$_ -dest /humgen/1kg/DCC/ftp/data/$1/alignment/";
|
||||
execute($cmd, $dry);
|
||||
} elsif ( $protocol eq "wget" ) {
|
||||
$cmd = "./runWget.pl -file $_";
|
||||
execute($cmd, $dry);
|
||||
} else {
|
||||
usage();
|
||||
}
|
||||
}
|
||||
close(LIST);
|
||||
|
||||
sub execute {
|
||||
|
||||
my $cmd = $_[0];
|
||||
my $dry = $_[1];
|
||||
|
||||
if ($dry) {
|
||||
print "$cmd\n";
|
||||
} else {
|
||||
system($cmd);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue