diff --git a/perl/sync1000Genomes/checkMD5s.pl b/perl/sync1000Genomes/checkMD5s.pl new file mode 100755 index 000000000..199b2d04b --- /dev/null +++ b/perl/sync1000Genomes/checkMD5s.pl @@ -0,0 +1,43 @@ +#!/usr/bin/perl -w + +use Getopt::Long; + +sub usage { + print "Usage: perl checkMD5s.pl\n\t-ai \n\t-o \n"; + exit(1); +} + + +my $ai = undef; +my $out = undef; +GetOptions( "ai=s" => \$ai, + "o=s" => \$out); + +usage() if ( !$ai || !$out ); + +open(OUT, "> $out") or die "can't open $out: $!"; + +open(LIST, "< $ai") or die "can't open $ai: $!"; +while ( ) { + @pieces = split(' ', $_); + if ( @pieces == 6 ) { + check($pieces[0], $pieces[1]); + check($pieces[2], $pieces[3]); + check($pieces[4], $pieces[5]); + } +} + +close(LIST); +close(OUT); + +sub check { + + my $file = $_[0]; + my $target = $_[1]; + + print "Checking /humgen/1kg/DCC/ftp/$file\n"; + @md5 = split(' ', `md5sum /humgen/1kg/DCC/ftp/$file`); + if ( $md5[0] ne $target ) { + print OUT "$file\t$md5[0]\t$target\n"; + } +} diff --git a/perl/sync1000Genomes/findFilesNotInAlignmentIndex.sh b/perl/sync1000Genomes/findFilesNotInAlignmentIndex.sh new file mode 100755 index 000000000..1592b1fff --- /dev/null +++ b/perl/sync1000Genomes/findFilesNotInAlignmentIndex.sh @@ -0,0 +1,6 @@ +#!/bin/bash +find /humgen/1kg/DCC/ftp/data/ -type f | awk -F "/" '{print $6 "/" $7 "/" $8 "/" $9}' | sort > filesWeHave.list +grep -v MD5 /humgen/1kg/DCC/ftp/alignment.index | awk '{print $1 "\n" $3 "\n" $5}' | sort > filesWeWant.list +comm -23 filesWeHave.list filesWeWant.list > filesToDelete.list +comm -13 filesWeHave.list filesWeWant.list > filesToGet.list +