better feedback now

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1579 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-09-10 12:43:45 +00:00
parent 296878e8e3
commit fc0d9578f6
1 changed files with 85 additions and 23 deletions

View File

@ -6,6 +6,18 @@ from datetime import date
import glob import glob
import operator import operator
import itertools import itertools
from urlparse import urlparse
from ftplib import FTP
import MergeBAMsUtils
import time
import re
import hashlib
FTPSERVER = None
DEBUG = False
CACHED_LIST = dict() # from directories to lists of lines
class Status: class Status:
def __init__(self, file, exists, size): def __init__(self, file, exists, size):
@ -22,6 +34,13 @@ class Status:
def viewSize(self): def viewSize(self):
return MergeBAMsUtils.greek(self.size) return MergeBAMsUtils.greek(self.size)
def md5(file):
m = hashlib.md5()
for line in open(file):
m.update(line)
return m.hexdigest()
class ComparedFiles: class ComparedFiles:
def __init__(self, file, status, localStat, ftpStat): def __init__(self, file, status, localStat, ftpStat):
@ -45,17 +64,11 @@ class ComparedFiles:
return 0 return 0
def modTimeStr(t): def modTimeStr(t):
return time.strftime("%m/%d/%y", time.localtime(t)) if t == 0:
return 'N/A'
else:
return time.strftime("%m/%d/%y", time.localtime(t))
from urlparse import urlparse
from ftplib import FTP
FTPSERVER = None
DEBUG = False
# from directories to lists of lines
CACHED_LIST = dict()
def getSizeForFile(dir, filename): def getSizeForFile(dir, filename):
global CACHED_LIST global CACHED_LIST
size = [0] size = [0]
@ -95,6 +108,16 @@ def ftpStatus( ftpPath ):
if DEBUG: print ' result was', size if DEBUG: print ' result was', size
return Status( ftpPath, size <> 0, size ) return Status( ftpPath, size <> 0, size )
def fetchFtpFile( file ):
filename = os.path.split(file)[1]
destFile = filename + '.fetched.' + date.today().strftime("%m_%d_%y")
#print 'destFile', destFile
fd = open(destFile, 'w')
result = FTPSERVER.retrbinary('RETR ' + file, lambda x: fd.write(x))
fd.close()
#print "done"
return Status(destFile, True, os.path.getsize(destFile))
def localStatus(file): def localStatus(file):
exists = os.path.exists(file) exists = os.path.exists(file)
size = 0 size = 0
@ -117,9 +140,6 @@ def validateFile(relPath, localRoot, ftpRoot):
print 'STATUS %20s for %s ' % (compared.status, relPath) print 'STATUS %20s for %s ' % (compared.status, relPath)
return compared return compared
import MergeBAMsUtils
import time
def compareFileStatus(localStat, ftpStat): def compareFileStatus(localStat, ftpStat):
if localStat.exists: if localStat.exists:
if ftpStat.exists: if ftpStat.exists:
@ -138,7 +158,6 @@ def compareFileStatus(localStat, ftpStat):
return ComparedFiles(localStat.file, status, localStat, ftpStat) return ComparedFiles(localStat.file, status, localStat, ftpStat)
import re
def filesInLocalPath(root, subdir): def filesInLocalPath(root, subdir):
regex = re.compile(".*\.(bam|bai)$") regex = re.compile(".*\.(bam|bai)$")
localFiles = set() localFiles = set()
@ -161,6 +180,37 @@ def readAlignmentIndex(file):
files.add(line.split()[4]) files.add(line.split()[4])
return files return files
def compareAlignmentIndices(remoteAlignmentIndex, alignmentIndex):
if remoteAlignmentIndex <> None and alignmentIndex <> None:
printHeaderSep()
print 'Comparing remote and local alignment indices: '
remotePath = os.path.join(ftpParsed[2], remoteAlignmentIndex)
remoteAlignmentIndexFile = fetchFtpFile( remotePath )
print ' Fetched', remotePath, 'to', remoteAlignmentIndexFile.file
raImd5 = md5(remoteAlignmentIndexFile.file)
laImd5 = md5(alignmentIndex)
print ' md5s: local=%s remote=%s' % (raImd5, laImd5)
if raImd5 <> laImd5:
print ' [FAIL] -- alignment indices do not have the same hash!'
else:
print ' [PASS] -- alignment indices are the same'
def displayChangeLog( changelog ):
if changelog <> None:
printHeaderSep()
print 'Displaying remote changelog for examination '
remotePath = os.path.join(ftpParsed[2], changelog)
remoteChangeLog = fetchFtpFile( remotePath )
print ' Fetched', remotePath, 'to', remoteChangeLog.file
print
for line in itertools.islice(open(remoteChangeLog.file), 20):
print 'CHANGELOG', line,
def printHeaderSep():
print
print ''.join(['-'] * 80)
if __name__ == "__main__": if __name__ == "__main__":
usage = "usage: %prog -l and/or -a root ftpRoot" usage = "usage: %prog -l and/or -a root ftpRoot"
parser = OptionParser(usage=usage) parser = OptionParser(usage=usage)
@ -176,6 +226,12 @@ if __name__ == "__main__":
parser.add_option("-q", "--quiet", dest="quiet", parser.add_option("-q", "--quiet", dest="quiet",
action='store_true', default=False, action='store_true', default=False,
help="If provided, prints out the individual status of all files") help="If provided, prints out the individual status of all files")
parser.add_option("-i", "--remoteAlignmentIndex", dest="remoteAlignmentIndex",
type='string', default=None,
help="relative path to the FTP's alignment.index file for comparison")
parser.add_option("-c", "--remoteChangeLog", dest="remoteChangeLog",
type='string', default=None,
help="relative path to the FTP's CHANGELOG file for display")
(OPTIONS, args) = parser.parse_args() (OPTIONS, args) = parser.parse_args()
if len(args) != 2: if len(args) != 2:
@ -195,21 +251,27 @@ if __name__ == "__main__":
results[file] = compared results[file] = compared
#localIndex #localIndex
compareAlignmentIndices(OPTIONS.remoteAlignmentIndex, OPTIONS.alignmentIndex)
displayChangeLog(OPTIONS.remoteChangeLog)
printHeaderSep()
print 'SUMMARY: Total files examined', len(results) print 'SUMMARY: Total files examined', len(results)
for status in ['in-sync', 'size-mismatch', 'unknown-local-file', 'local-file-missing', 'orphaned-file']: for status in ['in-sync', 'size-mismatch', 'unknown-local-file', 'local-file-missing', 'orphaned-file']:
print ''.join(['-'] * 80) printHeaderSep()
filesOfStatus = filter(lambda x: x.status == status, results.itervalues()) filesOfStatus = filter(lambda x: x.status == status, results.itervalues())
n = len(filesOfStatus) n = len(filesOfStatus)
print 'SUMMARY: %s' % ( status ) print 'SUMMARY: %s' % ( status )
print 'SUMMARY: files %d (%.2f%% of total)' % ( n, n * 100.0 / len(results)) print 'SUMMARY: Files %d (%.2f%% of total)' % ( n, n * 100.0 / len(results))
statusForFileListing = ['size-mismatch', 'local-file-missing']
maxFilesToList = 10
if status in statusForFileListing:
print 'SUMMARY: listing the first', maxFilesToList, 'of', n
for file in itertools.islice(filesOfStatus, maxFilesToList):
print 'SUMMARY: File: %8s %12s %s' % ( MergeBAMsUtils.greek(file.size()), modTimeStr(file.modTime()), file.file)
if n > 0: if n > 0:
fileSizes = MergeBAMsUtils.greek(reduce(operator.__add__, map( ComparedFiles.size, filesOfStatus ), 0 )) fileSizes = MergeBAMsUtils.greek(reduce(operator.__add__, map( ComparedFiles.size, filesOfStatus ), 0 ))
mostRecentMod = apply(max, map( ComparedFiles.modTime, filesOfStatus )) mostRecentMod = modTimeStr(apply(max, map( ComparedFiles.modTime, filesOfStatus )))
if mostRecentMod > 0:
modTime = modTimeStr(mostRecentMod)
else:
modTime = "N/A"
print 'SUMMARY: total size %s' % ( fileSizes ) print 'SUMMARY: total size %s' % ( fileSizes )
print 'SUMMARY: last modification time %s' % ( modTime ) print 'SUMMARY: last modification time %s' % ( mostRecentMod )