Notes for Aaron in SSG
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1576 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1362a56227
commit
c988205884
|
|
@ -154,7 +154,8 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
|||
*/
|
||||
public CallResult reduce(SSGenotypeCall call, CallResult sum) {
|
||||
sum.nCalledBases++;
|
||||
|
||||
|
||||
// todo -- aaron, fixme -- this should be using variation() in discovery mode and genotype if not
|
||||
if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) {
|
||||
if (call.getNegLog10PError() >= LOD_THRESHOLD) {
|
||||
sum.nConfidentCalls++;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,215 @@
|
|||
import farm_commands
|
||||
import os.path
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
from datetime import date
|
||||
import glob
|
||||
import operator
|
||||
import itertools
|
||||
|
||||
class Status:
|
||||
def __init__(self, file, exists, size):
|
||||
self.file = file
|
||||
self.exists = exists
|
||||
self.size = size
|
||||
|
||||
if not exists: self.status = "missing"
|
||||
if size == 0: self.status = "no-data"
|
||||
else: self.status = "exists: bytes=" + str(self.size)
|
||||
|
||||
def __str__(self):
|
||||
return self.status
|
||||
|
||||
def viewSize(self):
|
||||
return MergeBAMsUtils.greek(self.size)
|
||||
|
||||
class ComparedFiles:
|
||||
def __init__(self, file, status, localStat, ftpStat):
|
||||
self.file = file
|
||||
self.status = status
|
||||
self.localStat = localStat
|
||||
self.ftpStat = ftpStat
|
||||
|
||||
def size(self):
|
||||
if self.localStat.size <> 0:
|
||||
return self.localStat.size
|
||||
if self.ftpStat.size <> 0:
|
||||
return self.ftpStat.size
|
||||
else:
|
||||
return 0
|
||||
|
||||
def modTime(self):
|
||||
if self.localStat.exists:
|
||||
return os.path.getmtime(self.localStat.file)
|
||||
else:
|
||||
return 0
|
||||
|
||||
def modTimeStr(t):
|
||||
return time.strftime("%m/%d/%y", time.localtime(t))
|
||||
|
||||
from urlparse import urlparse
|
||||
from ftplib import FTP
|
||||
|
||||
FTPSERVER = None
|
||||
|
||||
DEBUG = False
|
||||
|
||||
# from directories to lists of lines
|
||||
CACHED_LIST = dict()
|
||||
def getSizeForFile(dir, filename):
|
||||
global CACHED_LIST
|
||||
size = [0]
|
||||
def captureSize(line, cache = True):
|
||||
#print line
|
||||
if cache: CACHED_LIST[dir].append(line)
|
||||
s = line.split()
|
||||
if len(s) >= 9 and s[8] == filename:
|
||||
size[0] = int(s[4])
|
||||
#print 'Found size', s, size
|
||||
|
||||
if dir in CACHED_LIST:
|
||||
#print 'cached is', CACHED_LIST[dir]
|
||||
map( lambda l: captureSize(l, False), CACHED_LIST[dir] )
|
||||
else:
|
||||
FTPSERVER.cwd(dir)
|
||||
CACHED_LIST[dir] = list()
|
||||
result = FTPSERVER.retrlines('LIST', captureSize)
|
||||
|
||||
return size[0]
|
||||
|
||||
def ftpStatus( ftpPath ):
|
||||
if DEBUG: print 'ftpPath', ftpPath
|
||||
dir, filename = os.path.split(ftpPath)
|
||||
if DEBUG: print 'listing', dir
|
||||
|
||||
try:
|
||||
size = getSizeForFile(dir, filename)
|
||||
except:
|
||||
#print 'failing...'
|
||||
size = 0
|
||||
# finally:
|
||||
# pass
|
||||
#print 'FTPSERVER', FTPSERVER
|
||||
#FTPSERVER.quit()
|
||||
|
||||
if DEBUG: print ' result was', size
|
||||
return Status( ftpPath, size <> 0, size )
|
||||
|
||||
def localStatus(file):
|
||||
exists = os.path.exists(file)
|
||||
size = 0
|
||||
if exists: size = os.path.getsize(file)
|
||||
return Status(file, exists, int(size) )
|
||||
|
||||
def validateFile(relPath, localRoot, ftpRoot):
|
||||
localPath = os.path.join(root, relPath)
|
||||
ftpPath = os.path.join(ftpRoot, relPath)
|
||||
|
||||
# check the local file
|
||||
if DEBUG: print 'Checking', relPath
|
||||
localStat = localStatus(localPath)
|
||||
ftpStat = ftpStatus(ftpPath)
|
||||
if DEBUG: print ' local status is', localStat
|
||||
if DEBUG: print ' ftp status is ', ftpStat
|
||||
compared = compareFileStatus(localStat, ftpStat)
|
||||
|
||||
if not OPTIONS.quiet:
|
||||
print 'STATUS %20s for %s ' % (compared.status, relPath)
|
||||
return compared
|
||||
|
||||
import MergeBAMsUtils
|
||||
|
||||
import time
|
||||
def compareFileStatus(localStat, ftpStat):
|
||||
if localStat.exists:
|
||||
if ftpStat.exists:
|
||||
if localStat.size == ftpStat.size:
|
||||
status = 'in-sync'
|
||||
else:
|
||||
status = 'size-mismatch'
|
||||
else:
|
||||
status = 'unknown-local-file'
|
||||
else:
|
||||
if ftpStat.exists:
|
||||
status = 'local-file-missing'
|
||||
else:
|
||||
status = 'orphaned-file'
|
||||
|
||||
return ComparedFiles(localStat.file, status, localStat, ftpStat)
|
||||
|
||||
|
||||
import re
|
||||
def filesInLocalPath(root, subdir):
|
||||
regex = re.compile(".*\.(bam|bai)$")
|
||||
localFiles = set()
|
||||
|
||||
if subdir <> None:
|
||||
for fullroot, dirs, files in os.walk(os.path.join(root, subdir)):
|
||||
for file in filter( regex.match, files ):
|
||||
fullpath = os.path.join(fullroot, file)
|
||||
path = fullpath.split(root)[1]
|
||||
#print 'adding relpath=', path, 'fullpath=', fullpath
|
||||
localFiles.add(path)
|
||||
if OPTIONS.maxLocalFiles <> None and len(localFiles) > OPTIONS.maxLocalFiles: return localFiles
|
||||
return localFiles
|
||||
|
||||
def readAlignmentIndex(file):
|
||||
files = set()
|
||||
if file <> None:
|
||||
for line in open(file):
|
||||
files.add(line.split()[0])
|
||||
files.add(line.split()[4])
|
||||
return files
|
||||
|
||||
if __name__ == "__main__":
|
||||
usage = "usage: %prog -l and/or -a root ftpRoot"
|
||||
parser = OptionParser(usage=usage)
|
||||
parser.add_option("-l", "--local", dest="scanLocal",
|
||||
type='string', default=None,
|
||||
help="If provided, checks all of the local files against the archive")
|
||||
parser.add_option("-a", "--alignmentIndex", dest="alignmentIndex",
|
||||
type='string', default=None,
|
||||
help="If provided, checks all of the files in the alignment.index in the archive")
|
||||
parser.add_option("-m", "--maxLocal", dest="maxLocalFiles",
|
||||
type='int', default=None,
|
||||
help="If provided, maximum number of files in the local archive to examine")
|
||||
parser.add_option("-q", "--quiet", dest="quiet",
|
||||
action='store_true', default=False,
|
||||
help="If provided, prints out the individual status of all files")
|
||||
|
||||
(OPTIONS, args) = parser.parse_args()
|
||||
if len(args) != 2:
|
||||
parser.error("incorrect number of arguments")
|
||||
root, ftpRoot = args
|
||||
|
||||
ftpParsed = urlparse(ftpRoot)
|
||||
FTPSERVER = FTP(ftpParsed[1])
|
||||
FTPSERVER.login()
|
||||
|
||||
results = dict()
|
||||
for file in itertools.chain(readAlignmentIndex(OPTIONS.alignmentIndex), filesInLocalPath(root, OPTIONS.scanLocal )):
|
||||
#print line
|
||||
#bas = line.split()[6]
|
||||
if file not in results:
|
||||
compared = validateFile( file, root, ftpParsed[2] )
|
||||
results[file] = compared
|
||||
#localIndex
|
||||
|
||||
print 'SUMMARY: Total files examined', len(results)
|
||||
for status in ['in-sync', 'size-mismatch', 'unknown-local-file', 'local-file-missing', 'orphaned-file']:
|
||||
print ''.join(['-'] * 80)
|
||||
filesOfStatus = filter(lambda x: x.status == status, results.itervalues())
|
||||
n = len(filesOfStatus)
|
||||
print 'SUMMARY: %s' % ( status )
|
||||
print 'SUMMARY: files %d (%.2f%% of total)' % ( n, n * 100.0 / len(results))
|
||||
|
||||
if n > 0:
|
||||
fileSizes = MergeBAMsUtils.greek(reduce(operator.__add__, map( ComparedFiles.size, filesOfStatus ), 0 ))
|
||||
mostRecentMod = apply(max, map( ComparedFiles.modTime, filesOfStatus ))
|
||||
if mostRecentMod > 0:
|
||||
modTime = modTimeStr(mostRecentMod)
|
||||
else:
|
||||
modTime = "N/A"
|
||||
|
||||
print 'SUMMARY: total size %s' % ( fileSizes )
|
||||
print 'SUMMARY: last modification time %s' % ( modTime )
|
||||
Loading…
Reference in New Issue