From 463bb737c3f3396a852c3ae9555540e213580a78 Mon Sep 17 00:00:00 2001 From: ebanks Date: Sun, 20 Feb 2011 03:59:26 +0000 Subject: [PATCH] Checking in the scripts I'm using for syncing the 1000G mirror. Note that very few people can actually use them because you most likely don't have perission to write to /humgen/1kg/DCC, but these should be used as a resource if anyone ever needs to do this in the future. These scripts are very naive and consist of just the actual pulling down of data. Currently aspera and wget are supported, but Mark should feel free to add lftp if he wants. :) Also, while I'm here, I'm removing obsolete scripts for running an obsolete pipeline. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5282 348d0f76-0448-11de-a6fe-93d51630548a --- perl/1kgScripts/runCallingPipeline.pl | 85 ------------------ perl/1kgScripts/runCleaningPipeline.pl | 110 ------------------------ perl/1kgScripts/runPilot1Pipeline.pl | 72 ---------------- perl/1kgScripts/runPilot2Pipeline.pl | 108 ----------------------- perl/1kgScripts/runPilotPipelines.sh | 2 - perl/sync1000Genomes/README.sync | 9 ++ perl/sync1000Genomes/runAspera.pl | 19 ++++ perl/sync1000Genomes/runWget.pl | 19 ++++ perl/sync1000Genomes/syncFilesInList.pl | 46 ++++++++++ 9 files changed, 93 insertions(+), 377 deletions(-) delete mode 100755 perl/1kgScripts/runCallingPipeline.pl delete mode 100755 perl/1kgScripts/runCleaningPipeline.pl delete mode 100755 perl/1kgScripts/runPilot1Pipeline.pl delete mode 100755 perl/1kgScripts/runPilot2Pipeline.pl delete mode 100755 perl/1kgScripts/runPilotPipelines.sh create mode 100644 perl/sync1000Genomes/README.sync create mode 100755 perl/sync1000Genomes/runAspera.pl create mode 100755 perl/sync1000Genomes/runWget.pl create mode 100755 perl/sync1000Genomes/syncFilesInList.pl diff --git a/perl/1kgScripts/runCallingPipeline.pl b/perl/1kgScripts/runCallingPipeline.pl deleted file mode 100755 index 1e5bb550c..000000000 --- a/perl/1kgScripts/runCallingPipeline.pl +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use Getopt::Long; - -sub usage { - print "Usage: perl runCallingPipeline.pl\n\t-i \n\t-o \n\t[-sting Sting dir]\n\t[-frac multiplier for indel fractions]\n\t[-snps Should we call snps?]\n\t[-sample for writing vcf with -snps]\n\t[-badsnps bad snps file from cleaning with -snps]\n\t[-doc DepthOfCoverage for filtering with -snps]\n\t[-mq mapping quality zero for filtering with -snps ]\n\t[-q farm queue; default:gsa]\n\t[-wait farm wait id]\n\t[-dry]\n"; - exit(1); -} - -my $inputBamStr = undef; -my $outputHead = undef; -my $wait = undef; -my $dry; -my $snps; -my $badsnps = undef; -my $sample; -my $doc = 100; -my $mq = 100; -my $indelFractionMultiplier = ""; -my $queue = "gsa"; -my $sting = "/humgen/gsa-scr1/ebanks/Sting"; - -GetOptions( "i=s" => \$inputBamStr, - "o=s" => \$outputHead, - "q:s" => \$queue, - "dry!" => \$dry, - "snps!" => \$snps, - "sample:s" => \$sample, - "doc:s" => \$doc, - "mq:s" => \$mq, - "frac:s" => \$indelFractionMultiplier, - "badsnps:s" => \$badsnps, - "wait:s" => \$wait, - "sting:s" => \$sting ); - -usage() if ( !$inputBamStr || !$outputHead ); - -my $indelsHigh = "$outputHead.indels.high.calls"; -my $bsub = "bsub -q $queue -o $indelsHigh.sdout"; -if ($wait) { - $bsub .= " -w \"ended($wait)\""; -} -my $command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IndelGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -minConsensusFraction 0.5 -minCnt 2 -1kg -minFraction 0.".$indelFractionMultiplier."3 -O $indelsHigh"; -execute("$bsub $command", $dry); - -my $indelsLow = "$outputHead.indels.low.calls"; -$bsub = "bsub -q $queue -o $indelsLow.sdout -J $outputHead"; -if ($wait) { - $bsub .= " -w \"ended($wait)\""; -} -$command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IndelGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -minConsensusFraction 0.5 -minCnt 2 -1kg -minFraction 0.".$indelFractionMultiplier."1 -O $indelsLow"; -execute("$bsub $command", $dry); - -if ($snps) { - my $snpsFile = "$outputHead.snps.unfiltered.calls"; - $bsub = "bsub -q $queue -o $snpsFile.sdout -J $outputHead"; - if ($wait) { - $bsub .= " -w \"ended($wait)\""; - } - $command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -varout $snpsFile -lod 0.0"; - execute("$bsub $command", $dry); - - my $filterFile = "$outputHead.snps.filtered.calls"; - my $vcfFile = "$outputHead.snps.vcf"; - $bsub = "bsub -q $queue -o $filterFile.sdout -w \"ended($outputHead)\""; - $command = "java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T VariantFiltration -R /broad/1KG/reference/human_b36_both.fasta $inputBamStr -vcf $vcfFile -included $filterFile -sample $sample -B dbsnp,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod,variant,Variants,$snpsFile,"; - if ($badsnps) { - $command .= "cleaned,CleanedOutSNP,$badsnps,"; - } - $command .= "indels,SimpleIndel,$indelsLow -X DepthOfCoverage:max=$doc -X AlleleBalance:low=0.25,high=0.75 -X FisherStrand:pvalue=0.00001 -X LodThreshold:lod=5.0 -X MappingQualityZero:max=$mq -X IndelArtifact -X ClusteredSnps:window=7,snps=3"; - execute("$bsub $command", $dry); -} - -sub execute { - - my $cmd = $_[0]; - my $dry = $_[1]; - - if ($dry) { - print "$cmd\n"; - } else { - system($cmd); - } -} diff --git a/perl/1kgScripts/runCleaningPipeline.pl b/perl/1kgScripts/runCleaningPipeline.pl deleted file mode 100755 index 5b5a34dd6..000000000 --- a/perl/1kgScripts/runCleaningPipeline.pl +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use Getopt::Long; - -sub usage { - print "Usage: perl runCleaningPipeline.pl\n\t-i \n\t-obam \n\t[-sting Sting dir]\n\t[-badsnps badsnps file name]\n\t[-inject]\n\t[-q farm queue; default:gsa]\n\t[-wait farm wait id]\n\t[-job final farm job name]\n\t[-dry]\n"; - exit(1); -} - -my $inputBam = undef; -my $outputBam = undef; -my $jobName = undef; -my $wait = undef; -my $badsnps = undef; -my $dry; -my $inject; -my $queue = "gsa"; -my $sting = "/humgen/gsa-scr1/ebanks/Sting"; - -GetOptions( "i=s" => \$inputBam, - "obam=s" => \$outputBam, - "q:s" => \$queue, - "dry!" => \$dry, - "inject!" => \$inject, - "job:s" => \$jobName, - "wait:s" => \$wait, - "badsnps:s" => \$badsnps, - "sting:s" => \$sting ); - -usage() if ( !$inputBam || !$outputBam ); - -my $indelIntervals = "$outputBam.indels.intervals"; -my $command = "perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IndelIntervals -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam\" -o $indelIntervals -oarg o -j $outputBam.intervals -q $queue"; -if ($dry) { - $command .= " -dry"; -} -if ($wait) { - $command .= " -wait $wait"; -} -system($command); - -my $mismatchIntervals = "$outputBam.mismatches.intervals"; -$command = "perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T MismatchIntervals -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam\" -o $mismatchIntervals -oarg o -j $outputBam.intervals -q $queue"; -if ($dry) { - $command .= " -dry"; -} -if ($wait) { - $command .= " -wait $wait"; -} -system($command); - -my $mergedIntervals = "$outputBam.merged.intervals"; -$command = "perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IntervalMerger -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -intervals $indelIntervals -intervals $mismatchIntervals\" -o $mergedIntervals -oarg o -wait $outputBam.intervals -j $outputBam.merged -q $queue"; -if ($dry) { - $command .= " -dry"; -} -system($command); - -my $cleanedBam; -if ($inject) { - $cleanedBam = "$outputBam.cleaned.bam"; -} else { - $cleanedBam = "$outputBam"; -} -$command = "bsub -q $queue -o $outputBam.cleaner.script1 -w \"ended($outputBam.merged)\" -J $outputBam.cleaner.script perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IntervalCleaner -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam -compress 1"; -if ($inject) { - $command .= " -cleanedOnly\" -j $outputBam.cleaner.clean"; -} else { - $command .= "\" -j $jobName"; -} -$command .= " -o $cleanedBam -oarg O -q $queue -bam -i $mergedIntervals -n 50"; -if ($dry) { - $command .= " -dry"; - print "$command\n"; -} else { - system($command); -} - -my $snpsFile = $badsnps; -if (!$snpsFile) { - $snpsFile = "$outputBam.badsnps"; -} -$command = "bsub -q $queue -o $outputBam.cleaner.script2 -w \"ended($outputBam.merged)\" -J $outputBam.cleaner.script perl $sting/perl/splitAndEnqueueGATKjobs.pl -cmd \"java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T IntervalCleaner -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam\""; -if ($inject) { - $command .= " -j $outputBam.cleaner.badsnps"; -} else { - $command .= " -j $jobName"; -} -$command .= " -o $snpsFile -oarg snps -q $queue -i $mergedIntervals -n 50"; -if ($dry) { - $command .= " -dry"; - print "$command\n"; -} else { - system($command); -} - -if ($inject) { - my $bam = "$outputBam"; - $command = "bsub -q $queue -o $bam.sdout"; - if ($jobName) { - $command .= " -J $jobName"; - } - $command .= " -w \"ended($outputBam.cleaner.*)\" java -Djava.io.tmpdir=/broad/hptmp/ -Xmx4096m -jar $sting/dist/GenomeAnalysisTK.jar -S SILENT -T CleanedReadInjector -R /broad/1KG/reference/human_b36_both.fasta -I $inputBam --output_bam $bam --cleaned_reads $cleanedBam -compress 1"; - if ($dry) { - print "$command\n"; - } else { - system($command); - } -} diff --git a/perl/1kgScripts/runPilot1Pipeline.pl b/perl/1kgScripts/runPilot1Pipeline.pl deleted file mode 100755 index 4a3b0ab81..000000000 --- a/perl/1kgScripts/runPilot1Pipeline.pl +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use Getopt::Long; - -sub usage { - print "Usage: perl runPilot1Pipeline.pl\n\t-i \n\t-o \n\t[-sting Sting dir]\n\t[-q farm queue; default:gsa]\n\t[-dry]\n"; - exit(1); -} - -my $inputDir = undef; -my $outputDir = undef; -my $dry; -my $queue = "gsa"; -my $sting = "/humgen/gsa-scr1/ebanks/Sting"; - -GetOptions( "i=s" => \$inputDir, - "odir=s" => \$outputDir, - "q:s" => \$queue, - "dry!" => \$dry, - "sting:s" => \$sting ); - -usage() if ( !$inputDir || !$outputDir ); - -my @samples = ("ceu","yri","chb_jpt"); - -foreach my $sample (@samples) { - - my $inputBam = "$inputDir/low_coverage_$sample.bam"; - my $outputHead = "$outputDir/$sample"; - my $outputBam = "$outputHead.bam"; - my $badsnps = "$outputBam.badsnps"; - clean($inputBam, $outputBam, $queue, $sting, $dry, $badsnps); - call("-I $outputBam", $outputHead, $queue, $sting, $dry, "$inputBam.cleaner.*", $sample, $badsnps); -} - -sub clean { - - my $inputBam = $_[0]; - my $outputBam = $_[1]; - my $queue = $_[2]; - my $sting = $_[3]; - my $dry = $_[4]; - my $badsnps = $_[5]; - - my $cmd = "perl $sting/perl/1kgScripts/runCleaningPipeline.pl -i $inputBam -obam $outputBam -q $queue -j $outputBam.cleaner.pipeline -sting $sting -badsnps $badsnps"; - if ($dry) { - $cmd .= " -dry"; - } - system($cmd); -} - -sub call { - - my $inputBams = $_[0]; - my $outputHead = $_[1]; - my $queue = $_[2]; - my $sting = $_[3]; - my $dry = $_[4]; - my $wait = $_[5]; - my $sample = $_[6]; - my $badsnps = $_[7]; - - my $cmd = "perl $sting/perl/1kgScripts/runCallingPipeline.pl -i $inputBams -o $outputHead -q $queue -sting $sting -frac 0 -sample $sample -badsnps $badsnps"; - if ($dry) { - $cmd .= " -dry"; - } - if ($wait) { - $cmd .= " -wait $wait"; - } - system($cmd); -} diff --git a/perl/1kgScripts/runPilot2Pipeline.pl b/perl/1kgScripts/runPilot2Pipeline.pl deleted file mode 100755 index 58c634859..000000000 --- a/perl/1kgScripts/runPilot2Pipeline.pl +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use Getopt::Long; - -sub usage { - print "Usage: perl runPilot2Pipeline.pl\n\t-i \n\t-o \n\t[-sting Sting dir]\n\t[-q farm queue; default:gsa]\n\t[-dry]\n"; - exit(1); -} - -my $inputDir = undef; -my $outputDir = undef; -my $dry; -my $queue = "gsa"; -my $sting = "/humgen/gsa-scr1/ebanks/Sting"; - -GetOptions( "i=s" => \$inputDir, - "o=s" => \$outputDir, - "q:s" => \$queue, - "dry!" => \$dry, - "sting:s" => \$sting ); - -usage() if ( !$inputDir || !$outputDir ); - -my @samples = ("NA19238","NA19239","NA19240","NA12878","NA12891","NA12892"); - -# Official genome-wide Depth of Coverage tables for pilot 2, freeze 5: -# NA12878 NA12891 NA12892 NA19238 NA19239 NA19240 -# 454: 36 18 -# SLX: 82 91 70 56 68 86 -# SOLID: 37 64 -# 454+SLD: 64 77 -# ALL: 138 150 - -my %DoC_454 = ( "NA12878" => 36, "NA19240" => 18 ); -my %DoC_slx = ( "NA12878" => 82, "NA12891" => 91, "NA12892" => 70,"NA19238" => 56, "NA19239" => 68, "NA19240" => 86 ); -my %DoC_solid = ( "NA12878" => 37, "NA19240" => 64 ); -my %DoC_454solid = ( "NA12878" => 64, "NA19240" => 77 ); -my %DoC_all = ( "NA12878" => 138, "NA19240" => 150 ); -my %MQ_hash = ( "SLX" => 100, "SOLID" => 5, "454" => 5, "454SOLID" => 10, "ALL" => 110 ); - -foreach my $sample (@samples) { - - my $inputBamSLX = "$inputDir/$sample.pilot2.SLX.bam"; - my $outputHeadSLX = "$outputDir/$sample.SLX"; - my $outputBamSLX = "$outputHeadSLX.bam"; - my $badsnpsSLX = "$outputBamSLX.badsnps"; - clean($inputBamSLX, $outputBamSLX, $queue, $sting, $dry, $badsnpsSLX); - call("-I $outputBamSLX", $outputHeadSLX, $queue, $sting, $dry, "$outputBamSLX.cleaner.*", $sample, $badsnpsSLX, $DoC_slx{$sample}, $MQ_hash{"SLX"}); - - if ($sample eq "NA12878" || $sample eq "NA19240") { - my $inputBamSOLID = "$inputDir/$sample.pilot2.SOLID.bam"; - my $outputHeadSOLID = "$outputDir/$sample.SOLID"; - my $outputBamSOLID = "$outputHeadSOLID.bam"; - my $badsnpsSOLID = "$outputBamSOLID.badsnps"; - clean($inputBamSOLID, $outputBamSOLID, $queue, $sting, $dry, $badsnpsSOLID); - call("-I $outputBamSOLID", $outputHeadSOLID, $queue, $sting, $dry, "$outputBamSOLID.cleaner.*", $sample, $badsnpsSOLID, $DoC_solid{$sample}, $MQ_hash{"SOLID"}); - - my $inputBam454 = "$inputDir/$sample.pilot2.454.bam"; - my $outputHead454 = "$outputDir/$sample.454"; - call("-I $inputBam454", $outputHead454, $queue, $sting, $dry, "$outputBamSLX.cleaner.*", $sample, $badsnpsSLX, $DoC_454{$sample}, $MQ_hash{"454"}); - - my $outputHead = "$outputDir/$sample.SOLID_454"; - call("-I $outputBamSOLID -I $inputBam454", $outputHead, $queue, $sting, $dry, "$outputBamSOLID.cleaner.*", $sample, $badsnpsSOLID, $DoC_454solid{$sample}, $MQ_hash{"454SOLID"}); - - $outputHead = "$outputDir/$sample.allTechs"; - call("-I $outputBamSLX -I $outputBamSOLID -I $inputBam454", $outputHead, $queue, $sting, $dry, "*.cleaner.*", $sample, $badsnpsSLX, $DoC_all{$sample}, $MQ_hash{"ALL"}); - } -} - -sub clean { - - my $inputBam = $_[0]; - my $outputBam = $_[1]; - my $queue = $_[2]; - my $sting = $_[3]; - my $dry = $_[4]; - my $badsnps = $_[5]; - - my $cmd = "perl $sting/perl/1kgScripts/runCleaningPipeline.pl -i $inputBam -obam $outputBam -q $queue -inject -j $outputBam.cleaner.pipeline -sting $sting -badsnps $badsnps"; - if ($dry) { - $cmd .= " -dry"; - } - system($cmd); -} - -sub call { - - my $inputBams = $_[0]; - my $outputHead = $_[1]; - my $queue = $_[2]; - my $sting = $_[3]; - my $dry = $_[4]; - my $wait = $_[5]; - my $sample = $_[6]; - my $badsnps = $_[7]; - my $doc = $_[8]; - my $mq = $_[9]; - - my $cmd = "perl $sting/perl/1kgScripts/runCallingPipeline.pl -i \"$inputBams\" -o $outputHead -q $queue -snps -sting $sting -sample $sample -badsnps $badsnps -doc $doc -mq $mq"; - if ($dry) { - $cmd .= " -dry"; - } - if ($wait) { - $cmd .= " -wait $wait"; - } - system($cmd); -} diff --git a/perl/1kgScripts/runPilotPipelines.sh b/perl/1kgScripts/runPilotPipelines.sh deleted file mode 100755 index d95c9634b..000000000 --- a/perl/1kgScripts/runPilotPipelines.sh +++ /dev/null @@ -1,2 +0,0 @@ -#perl /humgen/gsa-scr1/ebanks/Sting_STABLE/perl/1kgScripts/runPilot1Pipeline.pl -i /humgen/gsa-hphome1/projects/1kg_pilot1/mergedBamsByPopulation -o /broad/hptmp/ebanks/1kg_pilot1/cleaned -sting /humgen/gsa-scr1/ebanks/Sting_STABLE -dry -perl /humgen/gsa-scr1/ebanks/Sting_STABLE/perl/1kgScripts/runPilot2Pipeline.pl -i /broad/1KG/DCC_merged/freeze5.2 -o /broad/hptmp/ebanks/1kg_pilot2/cleaned -sting /humgen/gsa-scr1/ebanks/Sting_STABLE diff --git a/perl/sync1000Genomes/README.sync b/perl/sync1000Genomes/README.sync new file mode 100644 index 000000000..d8492750a --- /dev/null +++ b/perl/sync1000Genomes/README.sync @@ -0,0 +1,9 @@ +Before sync'ing 1000 Genomes, you need to be logged in as gsa-dev: +% sudo -s -u gsa-dev +[Note that this step needs to be done before subsequent steps for stability] + +To use Aspera, you'll need to ssh into one of the appropriate machines: +% ssh vbigtube or mirror + +[The NCBI Aspera source is: anonftp@ftp-private.ncbi.nih.gov:/1000genomes/ftp/] + diff --git a/perl/sync1000Genomes/runAspera.pl b/perl/sync1000Genomes/runAspera.pl new file mode 100755 index 000000000..cf4c6554d --- /dev/null +++ b/perl/sync1000Genomes/runAspera.pl @@ -0,0 +1,19 @@ +#!/usr/bin/perl -w + +# Runs Aspera to pull down files + +use strict; +use Getopt::Long; + +my $source = undef; +my $dest = "."; +GetOptions( "source=s" => \$source, + "dest=s" => \$dest); + +if ( !$source) { + print "Usage: runAspera.pl\n\t-source \t\n\t-dest \t\t\n"; + exit(1); +} + +my $cmd = "ascp -i /opt/aspera/etc/asperaweb_id_dsa.putty -k2 -QTr -l2G -d -v $source $dest"; +system($cmd); diff --git a/perl/sync1000Genomes/runWget.pl b/perl/sync1000Genomes/runWget.pl new file mode 100755 index 000000000..c2bdac748 --- /dev/null +++ b/perl/sync1000Genomes/runWget.pl @@ -0,0 +1,19 @@ +#!/usr/bin/perl -w + +# Runs Wget to pull down a file + +use strict; +use Getopt::Long; + +my $file = undef; +GetOptions( "file=s" => \$file); + +if ( !$file) { + print "Usage: runWget.pl\n\t-file \t\n"; + exit(1); +} + +chomp($file); +my $cmd = "wget -O /humgen/1kg/DCC/ftp/$file ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/$file"; +print "$cmd\n"; +system($cmd); diff --git a/perl/sync1000Genomes/syncFilesInList.pl b/perl/sync1000Genomes/syncFilesInList.pl new file mode 100755 index 000000000..6a7ac83c5 --- /dev/null +++ b/perl/sync1000Genomes/syncFilesInList.pl @@ -0,0 +1,46 @@ +#!/usr/bin/perl -w + +use Getopt::Long; + +sub usage { + print "Usage: perl syncFilesInList.pl\n\t-files \n\t-protocol [defaults to 'aspera'; can also use 'wget']\n\t[-dry]\n"; + exit(1); +} + +my $files = undef; +my $dry; +my $protocol = "aspera"; + +GetOptions( "files=s" => \$files, + "dry!" => \$dry, + "protocol=s" => \$protocol); + +usage() if ( !$files ); + +open(LIST, "< $files") or die "can't open $files: $!"; +while ( ) { + chomp($_); + if ( $protocol eq "aspera" ) { + $_ =~ m/data\/(.*)\/alignment.*/; + $cmd = "./runAspera.pl -source anonftp\@ftp-private.ncbi.nih.gov:/1000genomes/ftp/$_ -dest /humgen/1kg/DCC/ftp/data/$1/alignment/"; + execute($cmd, $dry); + } elsif ( $protocol eq "wget" ) { + $cmd = "./runWget.pl -file $_"; + execute($cmd, $dry); + } else { + usage(); + } +} +close(LIST); + +sub execute { + + my $cmd = $_[0]; + my $dry = $_[1]; + + if ($dry) { + print "$cmd\n"; + } else { + system($cmd); + } +}