Fixing GATKReport exception handling when loading a report

* allowing tables with no description to go through
   * GATKReportTable should be more lenient with the format requirements (added to-dos for roger)
This commit is contained in:
Mauricio Carneiro 2012-03-23 13:54:46 -04:00
parent 4a54a4b11c
commit 539da9e3e1
2 changed files with 120 additions and 105 deletions

View File

@ -28,10 +28,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.BufferedReader; import java.io.*;
import java.io.File;
import java.io.FileReader;
import java.io.PrintStream;
import java.util.Collection; import java.util.Collection;
import java.util.TreeMap; import java.util.TreeMap;
@ -85,36 +82,32 @@ public class GATKReport {
* @param file the file to load * @param file the file to load
*/ */
private void loadReport(File file) { private void loadReport(File file) {
BufferedReader reader;
String reportHeader;
try { try {
BufferedReader reader = new BufferedReader(new FileReader(file)); reader = new BufferedReader(new FileReader(file));
reportHeader = reader.readLine();
String reportHeader = reader.readLine(); } catch (FileNotFoundException e) {
throw new ReviewedStingException("Could not open file : " + file);
// Read the first line for the version and number of tables. } catch (IOException e) {
version = GATKReportVersion.fromHeader(reportHeader); throw new ReviewedStingException("Could not read file : " + file);
if (version.equals(GATKReportVersion.V0_1) || }
version.equals(GATKReportVersion.V0_2))
throw new UserException("The GATK no longer supports reading legacy GATK Reports. Please use v1.0 or newer.");
int nTables = Integer.parseInt(reportHeader.split(":")[2]);
// Read each tables according ot the number of tables
for (int i = 0; i < nTables; i++) {
addTable(new GATKReportTable(reader, version));
/*
if ( !blankLine.equals("") ) {
throw new StingException("The GATK Report File is corrupted or not formatted correctly");
}
*/
}
} catch (Exception e) { // Read the first line for the version and number of tables.
// todo - improve exception handling version = GATKReportVersion.fromHeader(reportHeader);
//throw new StingException("Cannot read GATKReport: " + e); if (version.equals(GATKReportVersion.V0_1) ||
e.printStackTrace(); version.equals(GATKReportVersion.V0_2))
throw new UserException("The GATK no longer supports reading legacy GATK Reports. Please use v1.0 or newer.");
int nTables = Integer.parseInt(reportHeader.split(":")[2]);
// Read each tables according ot the number of tables
for (int i = 0; i < nTables; i++) {
addTable(new GATKReportTable(reader, version));
} }
} }
/** /**

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.*; import java.util.*;
import java.util.regex.Matcher; import java.util.regex.Matcher;
@ -54,85 +55,107 @@ public class GATKReportTable {
private GATKReportColumns columns; private GATKReportColumns columns;
private static final String COULD_NOT_READ_HEADER = "Could not read the header of this file -- ";
private static final String COULD_NOT_READ_COLUMN_NAMES = "Could not read the column names of this file -- ";
private static final String COULD_NOT_READ_DATA_LINE = "Could not read a data line of this table -- ";
private static final String COULD_NOT_READ_EMPTY_LINE = "Could not read the last empty line of this table -- ";
private static final String OLD_GATK_TABLE_VERSION = "We no longer support older versions of the GATK Tables";
public GATKReportTable(BufferedReader reader, GATKReportVersion version) { public GATKReportTable(BufferedReader reader, GATKReportVersion version) {
try { int counter = 0;
int counter = 0; switch (version) {
case V1_0:
switch (version) { int nHeaders = 2;
case V1_0: String[] tableHeaders = new String[nHeaders];
int nHeaders = 2;
String[] tableHeaders = new String[nHeaders]; // Read in the headers
for (int i = 0; i < nHeaders; i++) {
// Read in the headers try {
for (int i = 0; i < nHeaders; i++) {
tableHeaders[i] = reader.readLine(); tableHeaders[i] = reader.readLine();
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_HEADER + e.getMessage());
} }
String[] tableData = tableHeaders[0].split(":"); }
String[] userData = tableHeaders[1].split(":"); String[] tableData = tableHeaders[0].split(":");
String[] userData = tableHeaders[1].split(":");
// Fill in the fields
tableName = userData[2]; // Fill in the fields
tableDescription = userData[3]; tableName = userData[2];
primaryKeyDisplay = Boolean.parseBoolean(tableData[2]); tableDescription = (userData.length <= 3) ? "" : userData[3]; // table may have no description! (and that's okay)
columns = new GATKReportColumns(); primaryKeyDisplay = Boolean.parseBoolean(tableData[2]);
columns = new GATKReportColumns();
int nColumns = Integer.parseInt(tableData[3]);
int nRows = Integer.parseInt(tableData[4]); int nColumns = Integer.parseInt(tableData[3]);
int nRows = Integer.parseInt(tableData[4]);
// Read column names
String columnLine = reader.readLine(); // Read column names
String columnLine;
List<Integer> columnStarts = TextFormattingUtils.getWordStarts(columnLine); try {
String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts); columnLine = reader.readLine();
} catch (IOException e) {
if (primaryKeyDisplay) { throw new ReviewedStingException(COULD_NOT_READ_COLUMN_NAMES);
addPrimaryKey(columnNames[0]); }
} else { List<Integer> columnStarts = TextFormattingUtils.getWordStarts(columnLine);
sortByPrimaryKey = true; String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts);
addPrimaryKey("id", false);
counter = 1; if (primaryKeyDisplay) {
addPrimaryKey(columnNames[0]);
} else {
sortByPrimaryKey = true;
addPrimaryKey("id", false);
counter = 1;
}
// Put in columns using the format string from the header
for (int i = 0; i < nColumns; i++) {
String format = tableData[5 + i];
if (primaryKeyDisplay)
addColumn(columnNames[i + 1], true, format);
else
addColumn(columnNames[i], true, format);
}
for (int i = 0; i < nRows; i++) {
// read line
String dataLine;
try {
dataLine = reader.readLine();
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_DATA_LINE + e.getMessage());
} }
// Put in columns using the format string from the header List<String> lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(dataLine, columnStarts));
for (int i = 0; i < nColumns; i++) {
String format = tableData[5 + i]; for (int columnIndex = 0; columnIndex < nColumns; columnIndex++) {
if (primaryKeyDisplay)
addColumn(columnNames[i + 1], true, format); //Input all the remaining values
else GATKReportDataType type = getColumns().getByIndex(columnIndex).getDataType();
addColumn(columnNames[i], true, format);
} if (primaryKeyDisplay) {
String columnName = columnNames[columnIndex + 1];
for (int i = 0; i < nRows; i++) { String primaryKey = lineSplits.get(0);
// read line set(primaryKey, columnName, type.Parse(lineSplits.get(columnIndex + 1)));
List<String> lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(reader.readLine(), columnStarts)); } else {
String columnName = columnNames[columnIndex];
for (int columnIndex = 0; columnIndex < nColumns; columnIndex++) { set(counter, columnName, type.Parse(lineSplits.get(columnIndex)));
//Input all the remaining values
GATKReportDataType type = getColumns().getByIndex(columnIndex).getDataType();
if (primaryKeyDisplay) {
String columnName = columnNames[columnIndex + 1];
String primaryKey = lineSplits.get(0);
set(primaryKey, columnName, type.Parse(lineSplits.get(columnIndex + 1)));
} else {
String columnName = columnNames[columnIndex];
set(counter, columnName, type.Parse(lineSplits.get(columnIndex)));
}
} }
counter++;
} }
counter++;
}
try {
reader.readLine(); reader.readLine();
// When you see empty line or null, quit out } catch (IOException e) {
} throw new ReviewedStingException(COULD_NOT_READ_EMPTY_LINE + e.getMessage());
} catch (Exception e) { }
//throw new StingException("Cannot read GATKReport: " + e); break;
e.printStackTrace();
default:
throw new ReviewedStingException(OLD_GATK_TABLE_VERSION);
} }
} }
@ -418,12 +441,11 @@ public class GATKReportTable {
if (newValue != null) if (newValue != null)
value = newValue; value = newValue;
if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || // todo -- Types have to be more flexible. For example, %d should accept Integers, Shorts and Bytes.
column.getDataType().equals(GATKReportDataType.Unknown) ) if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || column.getDataType().equals(GATKReportDataType.Unknown) )
columns.get(columnName).put(primaryKey, value); columns.get(columnName).put(primaryKey, value);
else else
throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", GATKReportDataType.fromObject(value).name(), column.getDataType().name()));
GATKReportDataType.fromObject(value).name(), column.getDataType().name()));
} }
/** /**