From 63f40215b3a73bb0a2e22774f6e3c26bb1c3dbf3 Mon Sep 17 00:00:00 2001 From: ebanks Date: Tue, 22 Feb 2011 04:17:36 +0000 Subject: [PATCH] 2 more scripts I found helpful in syncing (and cleaning up) the 1000G mirror git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5287 348d0f76-0448-11de-a6fe-93d51630548a --- perl/sync1000Genomes/checkMD5s.pl | 43 +++++++++++++++++++ .../findFilesNotInAlignmentIndex.sh | 6 +++ 2 files changed, 49 insertions(+) create mode 100755 perl/sync1000Genomes/checkMD5s.pl create mode 100755 perl/sync1000Genomes/findFilesNotInAlignmentIndex.sh diff --git a/perl/sync1000Genomes/checkMD5s.pl b/perl/sync1000Genomes/checkMD5s.pl new file mode 100755 index 000000000..199b2d04b --- /dev/null +++ b/perl/sync1000Genomes/checkMD5s.pl @@ -0,0 +1,43 @@ +#!/usr/bin/perl -w + +use Getopt::Long; + +sub usage { + print "Usage: perl checkMD5s.pl\n\t-ai \n\t-o \n"; + exit(1); +} + + +my $ai = undef; +my $out = undef; +GetOptions( "ai=s" => \$ai, + "o=s" => \$out); + +usage() if ( !$ai || !$out ); + +open(OUT, "> $out") or die "can't open $out: $!"; + +open(LIST, "< $ai") or die "can't open $ai: $!"; +while ( ) { + @pieces = split(' ', $_); + if ( @pieces == 6 ) { + check($pieces[0], $pieces[1]); + check($pieces[2], $pieces[3]); + check($pieces[4], $pieces[5]); + } +} + +close(LIST); +close(OUT); + +sub check { + + my $file = $_[0]; + my $target = $_[1]; + + print "Checking /humgen/1kg/DCC/ftp/$file\n"; + @md5 = split(' ', `md5sum /humgen/1kg/DCC/ftp/$file`); + if ( $md5[0] ne $target ) { + print OUT "$file\t$md5[0]\t$target\n"; + } +} diff --git a/perl/sync1000Genomes/findFilesNotInAlignmentIndex.sh b/perl/sync1000Genomes/findFilesNotInAlignmentIndex.sh new file mode 100755 index 000000000..1592b1fff --- /dev/null +++ b/perl/sync1000Genomes/findFilesNotInAlignmentIndex.sh @@ -0,0 +1,6 @@ +#!/bin/bash +find /humgen/1kg/DCC/ftp/data/ -type f | awk -F "/" '{print $6 "/" $7 "/" $8 "/" $9}' | sort > filesWeHave.list +grep -v MD5 /humgen/1kg/DCC/ftp/alignment.index | awk '{print $1 "\n" $3 "\n" $5}' | sort > filesWeWant.list +comm -23 filesWeHave.list filesWeWant.list > filesToDelete.list +comm -13 filesWeHave.list filesWeWant.list > filesToGet.list +