Add ability for flat file table parsing module to skip ahead to first occurence of a regular expression (use case: consistently parsing DepthOfCoverage output for histogram section of file across file format changes)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2377 348d0f76-0448-11de-a6fe-93d51630548a
2009-12-16 20:38:50 +00:00 · 2009-12-16 20:38:50 +00:00 · f5e547ed6e
parent b626fc0684
commit f5e547ed6e
1 changed files with 20 additions and 4 deletions
--- a/python/FlatFileTable.py
+++ b/python/FlatFileTable.py
@ -2,15 +2,31 @@

 import sys, itertools

-def record_generator(filename, sep="\t", skip_n_lines=0):
+def record_generator(filename, sep="\t", skip_n_lines=0, skip_until_regex_line=""):
    """Given a file with field headers on the first line and records on subsequent lines,
 generates a dictionary for each line keyed by the header fields"""
    fin = open(filename)

-    for i in range(skip_n_lines): # Skip a number of lines
-        fin.readline()
+    if skip_n_lines > 0:
+        for i in range(skip_n_lines): # Skip a number of lines
+            fin.readline()

-    header = fin.readline().rstrip().split(sep) # Pull off header
+    found_regex = False
+    if skip_until_regex_line != "":
+        import re
+        regex_line = re.compile(skip_until_regex_line)
+        for line in fin:
+            match = regex_line.search(line)
+            if match:
+                found_regex = line
+                break
+        if not found_regex:
+            print "Warning: Regex "+skip_until_regex_line+" not found in FlatFileTable:record_generator"
+
+    if found_regex:
+        header = found_regex.rstrip().split(sep) # Parse header
+    else:
+        header = fin.readline().rstrip().split(sep) # Pull off header
    
    for line in fin: # 
        fields = line.rstrip().split(sep)