diff --git a/ivy.xml b/ivy.xml
index 6ece07367..c2a6c4ccd 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -60,6 +60,10 @@
+
+
+
+
diff --git a/python/parse_pm_input.py b/python/parse_pm_input.py
new file mode 100644
index 000000000..9857c74df
--- /dev/null
+++ b/python/parse_pm_input.py
@@ -0,0 +1,81 @@
+#
+# Generates BAM lists from Excel and TSV files provided by project managers. Suitable for input into the pre-QC metrics generation
+# script.
+#
+# To run:
+# /humgen/gsa-hpprojects/software/bin/jython2.5.2/jython \
+# -J-classpath $STING_HOME/lib/poi-3.8-beta3.jar:$STING_HOME/lib/poi-ooxml-3.8-beta3.jar:$STING_HOME/lib/poi-ooxml-schemas-3.8-beta3.jar:$STING_HOME/lib/xmlbeans-2.3.0.jar:$STING_HOME/lib/dom4j-1.6.1.jar
+# parse_pm_input.py >
+#
+from java.io import FileInputStream
+from org.apache.poi.ss.usermodel import Row,Sheet,Workbook,WorkbookFactory
+
+import os,sys
+
+base_path = '/seq/picard_aggregation/%s/%s'
+
+def excel_reader(filename):
+ wb = WorkbookFactory.create(FileInputStream(filename));
+ for sheet_number in range(wb.getNumberOfSheets()):
+ project_column = None
+ sample_column = None
+
+ sheet = wb.getSheetAt(sheet_number);
+
+ for cell in sheet.getRow(0):
+ column_index = cell.getColumnIndex()
+ column_contents = cell.getStringCellValue()
+ if column_contents == 'Project':
+ project_column = column_index
+ if column_contents == 'External ID' or column_contents == 'Individual ID':
+ sample_column = column_index
+
+ if project_column != None and sample_column != None:
+ for row_number in range(1,sheet.getLastRowNum()+1):
+ project = sheet.getRow(row_number).getCell(project_column).getStringCellValue()
+ sample = sheet.getRow(row_number).getCell(sample_column).getStringCellValue()
+ yield project,sample
+ return
+
+def tsv_reader(filename):
+ f = open(filename,'rU')
+ for line in f:
+ tokens =line.split('\t')
+ project = tokens[0].strip()
+ sample = tokens[1].strip()
+ yield project,sample
+ f.close()
+
+def create_reader(filename):
+ extension = os.path.splitext(filename)[1]
+ if extension == '.xls' or extension == '.xlsx':
+ return excel_reader(filename)
+ elif extensions == '.tsv' or extension == '.txt':
+ return tsv_reader(filename)
+ else:
+ print 'Unrecognized file extension',extension
+ sys.exit(1)
+
+if len(sys.argv) != 2:
+ print 'USAGE: %s '
+ sys.exit(1)
+if not os.path.exists(sys.argv[1]):
+ print 'Input file %s not found' % sys.argv[1]
+ sys.exit(1)
+
+input_filename = sys.argv[1]
+
+for project,sample in create_reader(input_filename):
+ sample_path = base_path % (project,sample)
+ versions = []
+ for version_path in os.listdir(sample_path):
+ if version_path[0] != 'v':
+ print 'Hit a path name that cannot be parsed: ',version_path
+ sys.exit(1)
+ versions.append(int(version_path[1:]))
+ versions = sorted(versions)
+ bam_file = '%s/v%d/%s.bam' % (sample_path,versions[-1],sample)
+ if not os.path.exists(bam_file):
+ print 'Malformed file: tried to find %s, but no such path exists' % bam_file
+ sys.exit(1)
+ print bam_file