Fixing GATKReport exception handling when loading a report

* allowing tables with no description to go through
   * GATKReportTable should be more lenient with the format requirements (added to-dos for roger)
This commit is contained in:
Mauricio Carneiro 2012-03-23 13:54:46 -04:00
parent 4a54a4b11c
commit 539da9e3e1
2 changed files with 120 additions and 105 deletions

View File

@ -28,10 +28,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintStream;
import java.io.*;
import java.util.Collection;
import java.util.TreeMap;
@ -85,36 +82,32 @@ public class GATKReport {
* @param file the file to load
*/
private void loadReport(File file) {
BufferedReader reader;
String reportHeader;
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
String reportHeader = reader.readLine();
// Read the first line for the version and number of tables.
version = GATKReportVersion.fromHeader(reportHeader);
if (version.equals(GATKReportVersion.V0_1) ||
version.equals(GATKReportVersion.V0_2))
throw new UserException("The GATK no longer supports reading legacy GATK Reports. Please use v1.0 or newer.");
int nTables = Integer.parseInt(reportHeader.split(":")[2]);
// Read each tables according ot the number of tables
for (int i = 0; i < nTables; i++) {
addTable(new GATKReportTable(reader, version));
/*
if ( !blankLine.equals("") ) {
throw new StingException("The GATK Report File is corrupted or not formatted correctly");
}
*/
}
reader = new BufferedReader(new FileReader(file));
reportHeader = reader.readLine();
} catch (FileNotFoundException e) {
throw new ReviewedStingException("Could not open file : " + file);
} catch (IOException e) {
throw new ReviewedStingException("Could not read file : " + file);
}
} catch (Exception e) {
// todo - improve exception handling
//throw new StingException("Cannot read GATKReport: " + e);
e.printStackTrace();
// Read the first line for the version and number of tables.
version = GATKReportVersion.fromHeader(reportHeader);
if (version.equals(GATKReportVersion.V0_1) ||
version.equals(GATKReportVersion.V0_2))
throw new UserException("The GATK no longer supports reading legacy GATK Reports. Please use v1.0 or newer.");
int nTables = Integer.parseInt(reportHeader.split(":")[2]);
// Read each tables according ot the number of tables
for (int i = 0; i < nTables; i++) {
addTable(new GATKReportTable(reader, version));
}
}
/**

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.*;
import java.util.regex.Matcher;
@ -54,85 +55,107 @@ public class GATKReportTable {
private GATKReportColumns columns;
private static final String COULD_NOT_READ_HEADER = "Could not read the header of this file -- ";
private static final String COULD_NOT_READ_COLUMN_NAMES = "Could not read the column names of this file -- ";
private static final String COULD_NOT_READ_DATA_LINE = "Could not read a data line of this table -- ";
private static final String COULD_NOT_READ_EMPTY_LINE = "Could not read the last empty line of this table -- ";
private static final String OLD_GATK_TABLE_VERSION = "We no longer support older versions of the GATK Tables";
public GATKReportTable(BufferedReader reader, GATKReportVersion version) {
try {
int counter = 0;
int counter = 0;
switch (version) {
case V1_0:
int nHeaders = 2;
String[] tableHeaders = new String[nHeaders];
// Read in the headers
for (int i = 0; i < nHeaders; i++) {
switch (version) {
case V1_0:
int nHeaders = 2;
String[] tableHeaders = new String[nHeaders];
// Read in the headers
for (int i = 0; i < nHeaders; i++) {
try {
tableHeaders[i] = reader.readLine();
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_HEADER + e.getMessage());
}
String[] tableData = tableHeaders[0].split(":");
String[] userData = tableHeaders[1].split(":");
// Fill in the fields
tableName = userData[2];
tableDescription = userData[3];
primaryKeyDisplay = Boolean.parseBoolean(tableData[2]);
columns = new GATKReportColumns();
int nColumns = Integer.parseInt(tableData[3]);
int nRows = Integer.parseInt(tableData[4]);
// Read column names
String columnLine = reader.readLine();
List<Integer> columnStarts = TextFormattingUtils.getWordStarts(columnLine);
String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts);
if (primaryKeyDisplay) {
addPrimaryKey(columnNames[0]);
} else {
sortByPrimaryKey = true;
addPrimaryKey("id", false);
counter = 1;
}
String[] tableData = tableHeaders[0].split(":");
String[] userData = tableHeaders[1].split(":");
// Fill in the fields
tableName = userData[2];
tableDescription = (userData.length <= 3) ? "" : userData[3]; // table may have no description! (and that's okay)
primaryKeyDisplay = Boolean.parseBoolean(tableData[2]);
columns = new GATKReportColumns();
int nColumns = Integer.parseInt(tableData[3]);
int nRows = Integer.parseInt(tableData[4]);
// Read column names
String columnLine;
try {
columnLine = reader.readLine();
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_COLUMN_NAMES);
}
List<Integer> columnStarts = TextFormattingUtils.getWordStarts(columnLine);
String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts);
if (primaryKeyDisplay) {
addPrimaryKey(columnNames[0]);
} else {
sortByPrimaryKey = true;
addPrimaryKey("id", false);
counter = 1;
}
// Put in columns using the format string from the header
for (int i = 0; i < nColumns; i++) {
String format = tableData[5 + i];
if (primaryKeyDisplay)
addColumn(columnNames[i + 1], true, format);
else
addColumn(columnNames[i], true, format);
}
for (int i = 0; i < nRows; i++) {
// read line
String dataLine;
try {
dataLine = reader.readLine();
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_DATA_LINE + e.getMessage());
}
// Put in columns using the format string from the header
for (int i = 0; i < nColumns; i++) {
String format = tableData[5 + i];
if (primaryKeyDisplay)
addColumn(columnNames[i + 1], true, format);
else
addColumn(columnNames[i], true, format);
}
for (int i = 0; i < nRows; i++) {
// read line
List<String> lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(reader.readLine(), columnStarts));
for (int columnIndex = 0; columnIndex < nColumns; columnIndex++) {
//Input all the remaining values
GATKReportDataType type = getColumns().getByIndex(columnIndex).getDataType();
if (primaryKeyDisplay) {
String columnName = columnNames[columnIndex + 1];
String primaryKey = lineSplits.get(0);
set(primaryKey, columnName, type.Parse(lineSplits.get(columnIndex + 1)));
} else {
String columnName = columnNames[columnIndex];
set(counter, columnName, type.Parse(lineSplits.get(columnIndex)));
}
List<String> lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(dataLine, columnStarts));
for (int columnIndex = 0; columnIndex < nColumns; columnIndex++) {
//Input all the remaining values
GATKReportDataType type = getColumns().getByIndex(columnIndex).getDataType();
if (primaryKeyDisplay) {
String columnName = columnNames[columnIndex + 1];
String primaryKey = lineSplits.get(0);
set(primaryKey, columnName, type.Parse(lineSplits.get(columnIndex + 1)));
} else {
String columnName = columnNames[columnIndex];
set(counter, columnName, type.Parse(lineSplits.get(columnIndex)));
}
counter++;
}
counter++;
}
try {
reader.readLine();
// When you see empty line or null, quit out
}
} catch (Exception e) {
//throw new StingException("Cannot read GATKReport: " + e);
e.printStackTrace();
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_EMPTY_LINE + e.getMessage());
}
break;
default:
throw new ReviewedStingException(OLD_GATK_TABLE_VERSION);
}
}
@ -418,12 +441,11 @@ public class GATKReportTable {
if (newValue != null)
value = newValue;
if (column.getDataType().equals(GATKReportDataType.fromObject(value)) ||
column.getDataType().equals(GATKReportDataType.Unknown) )
// todo -- Types have to be more flexible. For example, %d should accept Integers, Shorts and Bytes.
if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || column.getDataType().equals(GATKReportDataType.Unknown) )
columns.get(columnName).put(primaryKey, value);
else
throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s",
GATKReportDataType.fromObject(value).name(), column.getDataType().name()));
throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", GATKReportDataType.fromObject(value).name(), column.getDataType().name()));
}
/**