From f5e547ed6ef642406e771a62796d738649c31a0f Mon Sep 17 00:00:00 2001 From: andrewk Date: Wed, 16 Dec 2009 20:38:50 +0000 Subject: [PATCH] Add ability for flat file table parsing module to skip ahead to first occurence of a regular expression (use case: consistently parsing DepthOfCoverage output for histogram section of file across file format changes) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2377 348d0f76-0448-11de-a6fe-93d51630548a --- python/FlatFileTable.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/python/FlatFileTable.py b/python/FlatFileTable.py index 9f54c603e..dbb725673 100644 --- a/python/FlatFileTable.py +++ b/python/FlatFileTable.py @@ -2,15 +2,31 @@ import sys, itertools -def record_generator(filename, sep="\t", skip_n_lines=0): +def record_generator(filename, sep="\t", skip_n_lines=0, skip_until_regex_line=""): """Given a file with field headers on the first line and records on subsequent lines, generates a dictionary for each line keyed by the header fields""" fin = open(filename) - for i in range(skip_n_lines): # Skip a number of lines - fin.readline() + if skip_n_lines > 0: + for i in range(skip_n_lines): # Skip a number of lines + fin.readline() - header = fin.readline().rstrip().split(sep) # Pull off header + found_regex = False + if skip_until_regex_line != "": + import re + regex_line = re.compile(skip_until_regex_line) + for line in fin: + match = regex_line.search(line) + if match: + found_regex = line + break + if not found_regex: + print "Warning: Regex "+skip_until_regex_line+" not found in FlatFileTable:record_generator" + + if found_regex: + header = found_regex.rstrip().split(sep) # Parse header + else: + header = fin.readline().rstrip().split(sep) # Pull off header for line in fin: # fields = line.rstrip().split(sep)