From f5e547ed6ef642406e771a62796d738649c31a0f Mon Sep 17 00:00:00 2001
From: andrewk <andrewk@348d0f76-0448-11de-a6fe-93d51630548a>
Date: Wed, 16 Dec 2009 20:38:50 +0000
Subject: [PATCH] Add ability for flat file table parsing module to skip ahead
 to first occurence of a regular expression (use case: consistently parsing
 DepthOfCoverage output for histogram section of file across file format
 changes)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2377 348d0f76-0448-11de-a6fe-93d51630548a
---
 python/FlatFileTable.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/python/FlatFileTable.py b/python/FlatFileTable.py
index 9f54c603e..dbb725673 100644
--- a/python/FlatFileTable.py
+++ b/python/FlatFileTable.py
@@ -2,15 +2,31 @@
 
 import sys, itertools
 
-def record_generator(filename, sep="\t", skip_n_lines=0):
+def record_generator(filename, sep="\t", skip_n_lines=0, skip_until_regex_line=""):
     """Given a file with field headers on the first line and records on subsequent lines,
 generates a dictionary for each line keyed by the header fields"""
     fin = open(filename)
 
-    for i in range(skip_n_lines): # Skip a number of lines
-        fin.readline()
+    if skip_n_lines > 0:
+        for i in range(skip_n_lines): # Skip a number of lines
+            fin.readline()
 
-    header = fin.readline().rstrip().split(sep) # Pull off header
+    found_regex = False
+    if skip_until_regex_line != "":
+        import re
+        regex_line = re.compile(skip_until_regex_line)
+        for line in fin:
+            match = regex_line.search(line)
+            if match:
+                found_regex = line
+                break
+        if not found_regex:
+            print "Warning: Regex "+skip_until_regex_line+" not found in FlatFileTable:record_generator"
+
+    if found_regex:
+        header = found_regex.rstrip().split(sep) # Parse header
+    else:
+        header = fin.readline().rstrip().split(sep) # Pull off header
     
     for line in fin: # 
         fields = line.rstrip().split(sep)