now supports -o option as well as verbose output mode

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4162 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-08-29 16:00:00 +00:00
parent 4c28fc3a39
commit 9556004dbb
1 changed files with 28 additions and 9 deletions

View File

@ -12,22 +12,36 @@ def main():
parser.add_option("-f", "--f", dest="fai",
type='string', default=None,
help="FAI file defining the sort order of the VCF")
parser.add_option("-o", "--o", dest="output",
type='string', default=None,
help="if provided, output will go here instead of stdout")
parser.add_option("-a", "--assumeSorted", dest="assumeSorted",
action='store_true', default=False,
help="If provided, this assumes the input VCF files are themselves sorted, enabling a simple efficent merge")
parser.add_option("-v", "--verbose", dest="verbose",
action='store_true', default=False,
help="If provided, verbose progress will be enabled")
(OPTIONS, args) = parser.parse_args()
if len(args) == 0:
parser.error("Requires at least 1 VCF to merge")
order = None
if OPTIONS.fai <> None: order = faiReader.readFAIContigOrdering(OPTIONS.fai)
if OPTIONS.fai <> None:
if OPTIONS.verbose: print 'reading FAI', OPTIONS.fai
order = faiReader.readFAIContigOrdering(OPTIONS.fai)
#print 'Order', order
if OPTIONS.assumeSorted:
mergeSort(args, order)
if OPTIONS.output != None:
out = open(OPTIONS.output,'w')
else:
memSort(args, order)
out = sys.stdout
if OPTIONS.assumeSorted:
mergeSort(out, args, order)
else:
memSort(out, args, order)
def cmpVCFRecords(order, r1, r2):
if order <> None:
@ -38,7 +52,7 @@ def cmpVCFRecords(order, r1, r2):
return orderCmp
return cmp(r1.getPos(), r2.getPos())
def mergeSort(args, order):
def mergeSort(out, args, order):
#print 'MergeSort', args, order
header = None
@ -55,11 +69,16 @@ def mergeSort(args, order):
sortedOrderMap = sorted(orderMap, key=lambda x: x[0], cmp = lambda r1, r2: cmpVCFRecords(order, r1, r2))
#print sortedOrderMap
for headerLine in header: print headerLine
for headerLine in header: print >> out, headerLine
i = 0
n = len(sortedOrderMap)
for file in map( lambda x: x[1], sortedOrderMap):
#print file
if OPTIONS.verbose:
i += 1
print 'Processing', file, ':', i, 'of', n
for record in lines2VCF(open(file), extendedOutput = False, decodeAll = False):
print record.format()
print >> out, record.format()
def memSort(args, order):
header = None
@ -72,7 +91,7 @@ def memSort(args, order):
records.sort(lambda r1, r2: cmpVCFRecords(order, r1, r2))
for line in formatVCF(header, records):
#pass
print line
print >> out, line
PROFILE = False
if __name__ == "__main__":