2 more scripts I found helpful in syncing (and cleaning up) the 1000G mirror
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5287 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d7f98ccd9c
commit
63f40215b3
|
|
@ -0,0 +1,43 @@
|
||||||
|
#!/usr/bin/perl -w
|
||||||
|
|
||||||
|
use Getopt::Long;
|
||||||
|
|
||||||
|
sub usage {
|
||||||
|
print "Usage: perl checkMD5s.pl\n\t-ai <alignment.index to check>\n\t-o <file to store results>\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
my $ai = undef;
|
||||||
|
my $out = undef;
|
||||||
|
GetOptions( "ai=s" => \$ai,
|
||||||
|
"o=s" => \$out);
|
||||||
|
|
||||||
|
usage() if ( !$ai || !$out );
|
||||||
|
|
||||||
|
open(OUT, "> $out") or die "can't open $out: $!";
|
||||||
|
|
||||||
|
open(LIST, "< $ai") or die "can't open $ai: $!";
|
||||||
|
while ( <LIST> ) {
|
||||||
|
@pieces = split(' ', $_);
|
||||||
|
if ( @pieces == 6 ) {
|
||||||
|
check($pieces[0], $pieces[1]);
|
||||||
|
check($pieces[2], $pieces[3]);
|
||||||
|
check($pieces[4], $pieces[5]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(LIST);
|
||||||
|
close(OUT);
|
||||||
|
|
||||||
|
sub check {
|
||||||
|
|
||||||
|
my $file = $_[0];
|
||||||
|
my $target = $_[1];
|
||||||
|
|
||||||
|
print "Checking /humgen/1kg/DCC/ftp/$file\n";
|
||||||
|
@md5 = split(' ', `md5sum /humgen/1kg/DCC/ftp/$file`);
|
||||||
|
if ( $md5[0] ne $target ) {
|
||||||
|
print OUT "$file\t$md5[0]\t$target\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
#!/bin/bash
|
||||||
|
find /humgen/1kg/DCC/ftp/data/ -type f | awk -F "/" '{print $6 "/" $7 "/" $8 "/" $9}' | sort > filesWeHave.list
|
||||||
|
grep -v MD5 /humgen/1kg/DCC/ftp/alignment.index | awk '{print $1 "\n" $3 "\n" $5}' | sort > filesWeWant.list
|
||||||
|
comm -23 filesWeHave.list filesWeWant.list > filesToDelete.list
|
||||||
|
comm -13 filesWeHave.list filesWeWant.list > filesToGet.list
|
||||||
|
|
||||||
Loading…
Reference in New Issue