2 more scripts I found helpful in syncing (and cleaning up) the 1000G mirror
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5287 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d7f98ccd9c
commit
63f40215b3
|
|
@ -0,0 +1,43 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use Getopt::Long;
|
||||
|
||||
sub usage {
|
||||
print "Usage: perl checkMD5s.pl\n\t-ai <alignment.index to check>\n\t-o <file to store results>\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
my $ai = undef;
|
||||
my $out = undef;
|
||||
GetOptions( "ai=s" => \$ai,
|
||||
"o=s" => \$out);
|
||||
|
||||
usage() if ( !$ai || !$out );
|
||||
|
||||
open(OUT, "> $out") or die "can't open $out: $!";
|
||||
|
||||
open(LIST, "< $ai") or die "can't open $ai: $!";
|
||||
while ( <LIST> ) {
|
||||
@pieces = split(' ', $_);
|
||||
if ( @pieces == 6 ) {
|
||||
check($pieces[0], $pieces[1]);
|
||||
check($pieces[2], $pieces[3]);
|
||||
check($pieces[4], $pieces[5]);
|
||||
}
|
||||
}
|
||||
|
||||
close(LIST);
|
||||
close(OUT);
|
||||
|
||||
sub check {
|
||||
|
||||
my $file = $_[0];
|
||||
my $target = $_[1];
|
||||
|
||||
print "Checking /humgen/1kg/DCC/ftp/$file\n";
|
||||
@md5 = split(' ', `md5sum /humgen/1kg/DCC/ftp/$file`);
|
||||
if ( $md5[0] ne $target ) {
|
||||
print OUT "$file\t$md5[0]\t$target\n";
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
#!/bin/bash
|
||||
find /humgen/1kg/DCC/ftp/data/ -type f | awk -F "/" '{print $6 "/" $7 "/" $8 "/" $9}' | sort > filesWeHave.list
|
||||
grep -v MD5 /humgen/1kg/DCC/ftp/alignment.index | awk '{print $1 "\n" $3 "\n" $5}' | sort > filesWeWant.list
|
||||
comm -23 filesWeHave.list filesWeWant.list > filesToDelete.list
|
||||
comm -13 filesWeHave.list filesWeWant.list > filesToGet.list
|
||||
|
||||
Loading…
Reference in New Issue