Extensive refactoring of the GATKReports. This was a beast.

The practical differences between version 1.0 and this one (v1.1) are:

* the underlying data structure now uses arrays instead of hashes, which should drastically reduce the memory overhead required to create large tables.
* no more primary keys; you can still create arbitrary IDs to index into rows, but there is no special cased primary key column in the table.
* no more dangerous/ugly table operations supported except to increment a cell's value (if an int) or to concatenate 2 tables.

Integration tests change because table headers are different.
Old classes are still lying around.  Will clean those up in a subsequent commit.
This commit is contained in:
Eric Banks 2012-05-18 01:11:26 -04:00
parent 5189b06468
commit a26b04ba17
25 changed files with 1161 additions and 373 deletions

View File

@ -31,6 +31,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.*;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
/**
@ -38,11 +39,11 @@ import java.util.TreeMap;
*/
public class GATKReport {
public static final String GATKREPORT_HEADER_PREFIX = "#:GATKReport.";
public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_0;
public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_1;
private static final String SEPARATOR = ":";
private GATKReportVersion version = LATEST_REPORT_VERSION;
private final TreeMap<String, GATKReportTable> tables = new TreeMap<String, GATKReportTable>();
private final TreeMap<String, GATKReportTableV2> tables = new TreeMap<String, GATKReportTableV2>();
/**
* Create a new, empty GATKReport.
@ -70,10 +71,10 @@ public class GATKReport {
/**
* Create a new GATK report from GATK report tables
* @param tables Any number of tables that you want ot add to the report
* @param tables Any number of tables that you want to add to the report
*/
public GATKReport(GATKReportTable... tables) {
for( GATKReportTable table: tables)
public GATKReport(GATKReportTableV2... tables) {
for( GATKReportTableV2 table: tables)
addTable(table);
}
@ -103,12 +104,10 @@ public class GATKReport {
int nTables = Integer.parseInt(reportHeader.split(":")[2]);
// Read each tables according ot the number of tables
// Read each table according ot the number of tables
for (int i = 0; i < nTables; i++) {
addTable(new GATKReportTable(reader, version));
addTable(new GATKReportTableV2(reader, version));
}
}
/**
@ -116,9 +115,10 @@ public class GATKReport {
*
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param numColumns the number of columns in this table
*/
public void addTable(String tableName, String tableDescription) {
addTable(tableName, tableDescription, true);
public void addTable(final String tableName, final String tableDescription, final int numColumns) {
addTable(tableName, tableDescription, numColumns, false);
}
/**
@ -126,10 +126,11 @@ public class GATKReport {
*
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param sortByPrimaryKey whether to sort the rows by the primary key
* @param numColumns the number of columns in this table
* @param sortByRowID whether to sort the rows by the row ID
*/
public void addTable(String tableName, String tableDescription, boolean sortByPrimaryKey) {
GATKReportTable table = new GATKReportTable(tableName, tableDescription, sortByPrimaryKey);
public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID) {
GATKReportTableV2 table = new GATKReportTableV2(tableName, tableDescription, numColumns, sortByRowID);
tables.put(tableName, table);
}
@ -138,12 +139,12 @@ public class GATKReport {
*
* @param table the table to add
*/
public void addTable(GATKReportTable table) {
public void addTable(GATKReportTableV2 table) {
tables.put(table.getTableName(), table);
}
public void addTables(List<GATKReportTable> gatkReportTables) {
for (GATKReportTable table : gatkReportTables)
public void addTables(List<GATKReportTableV2> gatkReportTableV2s) {
for ( GATKReportTableV2 table : gatkReportTableV2s )
addTable(table);
}
@ -163,8 +164,8 @@ public class GATKReport {
* @param tableName the name of the table
* @return the table object
*/
public GATKReportTable getTable(String tableName) {
GATKReportTable table = tables.get(tableName);
public GATKReportTableV2 getTable(String tableName) {
GATKReportTableV2 table = tables.get(tableName);
if (table == null)
throw new ReviewedStingException("Table is not in GATKReport: " + tableName);
return table;
@ -177,30 +178,29 @@ public class GATKReport {
*/
public void print(PrintStream out) {
out.println(GATKREPORT_HEADER_PREFIX + getVersion().toString() + SEPARATOR + getTables().size());
for (GATKReportTable table : tables.values())
for (GATKReportTableV2 table : tables.values())
table.write(out);
}
public Collection<GATKReportTable> getTables() {
public Collection<GATKReportTableV2> getTables() {
return tables.values();
}
/**
* This is the main function is charge of gathering the reports. It checks that the reports are compatible and then
* calls the table atheirng functions.
* calls the table gathering functions.
*
* @param input another GATKReport of the same format
*/
public void combineWith(GATKReport input) {
public void concat(GATKReport input) {
if (!this.isSameFormat(input)) {
if ( !isSameFormat(input) ) {
throw new ReviewedStingException("Failed to combine GATKReport, format doesn't match!");
}
for (String tableName : input.tables.keySet()) {
tables.get(tableName).combineWith(input.getTable(tableName));
for ( Map.Entry<String, GATKReportTableV2> table : tables.entrySet() ) {
table.getValue().concat(input.getTable(table.getKey()));
}
}
public GATKReportVersion getVersion() {
@ -271,9 +271,8 @@ public class GATKReport {
* @param columns The names of the columns in your table
* @return a simplified GATK report
*/
public static GATKReport newSimpleReport(String tableName, String... columns) {
GATKReportTable table = new GATKReportTable(tableName, "A simplified GATK table report");
table.addPrimaryKey("id", false);
public static GATKReport newSimpleReport(final String tableName, final String... columns) {
GATKReportTableV2 table = new GATKReportTableV2(tableName, "A simplified GATK table report", columns.length);
for (String column : columns) {
table.addColumn(column, "");
@ -289,48 +288,20 @@ public class GATKReport {
* This method provides an efficient way to populate a simplified GATK report. This method will only work on reports
* that qualify as simplified GATK reports. See the newSimpleReport() constructor for more information.
*
* @param values the row of data to be added to the table.
* @param values the row of data to be added to the table.
* Note: the number of arguments must match the columns in the table.
*/
public void addRow(Object... values) {
// Must be a simplified GATK Report
if (isSimpleReport()) {
public void addRow(final Object... values) {
// Must be a simple report
if ( tables.size() != 1 )
throw new ReviewedStingException("Cannot write a row to a complex GATK Report");
GATKReportTable table = tables.firstEntry().getValue();
if (table.getColumns().size() != values.length) {
throw new StingException("The number of arguments in addRow() must match the number of columns in the table");
}
int counter = table.getNumRows() + 1;
int i = 0;
for (String columnName : table.getColumns().keySet()) {
table.set(counter, columnName, values[i]);
i++;
}
} else {
throw new StingException("Cannot add a Row to a non-Simplified GATK Report");
}
}
/**
* Checks if the GATK report qualifies as a "simple" GATK report
*
* @return true is the report is a simplified GATK report
*/
private boolean isSimpleReport() {
if (tables.size() != 1)
return false;
GATKReportTable table = tables.firstEntry().getValue();
if (!table.getPrimaryKeyName().equals("id"))
return false;
return true;
GATKReportTableV2 table = tables.firstEntry().getValue();
if ( table.getNumColumns() != values.length )
throw new ReviewedStingException("The number of arguments in writeRow() must match the number of columns in the table");
final int rowIndex = table.getNumRows();
for ( int i = 0; i < values.length; i++ )
table.set(rowIndex, i, values[i]);
}
}

View File

@ -0,0 +1,146 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.math.NumberUtils;
import java.util.Arrays;
import java.util.Collection;
/**
* column information within a GATK report table
*/
public class GATKReportColumnV2 {
final private String columnName;
final private String format;
final private GATKReportDataType dataType;
private GATKReportColumnFormat columnFormat;
private GATKReportColumnFormat.Alignment alignment = GATKReportColumnFormat.Alignment.RIGHT; // default alignment is to the right unless values added ask for a left alignment
private int maxWidth = 0;
/**
* Construct the column object, specifying the column name, default value, whether or not the column should be
* displayed, and the format string. This cannot be null.
*
* @param columnName the name of the column
* @param format format string
*/
public GATKReportColumnV2(final String columnName, final String format) {
this.columnName = columnName;
this.maxWidth = columnName.length();
if ( format.equals("") ) {
this.format = "%s";
this.dataType = GATKReportDataType.Unknown;
}
else {
this.format = format;
this.dataType = GATKReportDataType.fromFormatString(format);
}
}
/**
* Get the display width for this column. This allows the entire column to be displayed with the appropriate, fixed
* width.
*
* @return the format string for this column
*/
public GATKReportColumnFormat getColumnFormat() {
if (columnFormat != null)
return columnFormat;
columnFormat = new GATKReportColumnFormat(maxWidth, alignment);
return columnFormat;
}
private static final Collection<String> RIGHT_ALIGN_STRINGS = Arrays.asList(
"null",
"NA",
String.valueOf(Double.POSITIVE_INFINITY),
String.valueOf(Double.NEGATIVE_INFINITY),
String.valueOf(Double.NaN));
/**
* Check if the value can be right aligned. Does not trim the values before checking if numeric since it assumes
* the spaces mean that the value is already padded.
*
* @param value to check
* @return true if the value is a right alignable
*/
protected static boolean isRightAlign(final String value) {
return value == null || RIGHT_ALIGN_STRINGS.contains(value) || NumberUtils.isNumber(value.trim());
}
/**
* Returns a string version of the values.
*
* @param obj The object to convert to a string
* @return The string representation of the column
*/
private String formatValue(final Object obj) {
String value;
if (obj == null) {
value = "null";
}
else if ( dataType.equals(GATKReportDataType.Unknown) && (obj instanceof Double || obj instanceof Float) ) {
value = String.format("%.8f", obj);
}
else
value = String.format(format, obj);
return value;
}
public GATKReportDataType getDataType() {
return dataType;
}
public String getColumnName() {
return columnName;
}
public String getFormat() {
return dataType.equals(GATKReportDataType.Unknown) ? "%s" : format;
}
public void updateFormatting(final Object value) {
if (value != null) {
final String formatted = formatValue(value);
if ( formatted.length() > 0 ) {
updateMaxWidth(formatted);
updateFormat(formatted);
}
}
}
private void updateMaxWidth(final String formatted) {
maxWidth = Math.max(formatted.length(), maxWidth);
}
private void updateFormat(final String formatted) {
if (alignment == GATKReportColumnFormat.Alignment.RIGHT)
alignment = isRightAlign(formatted) ? GATKReportColumnFormat.Alignment.RIGHT : GATKReportColumnFormat.Alignment.LEFT;
}
}

View File

@ -54,7 +54,7 @@ public class GATKReportGatherer extends Gatherer {
isFirst = false;
} else {
GATKReport toAdd = new GATKReport(input);
current.combineWith(toAdd);
current.concat(toAdd);
}
}

View File

@ -0,0 +1,672 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class GATKReportTableV2 {
/**
* REGEX that matches any table with an invalid name
*/
public static final String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]";
private static final String GATKTABLE_HEADER_PREFIX = "#:GATKTable";
private static final String SEPARATOR = ":";
private static final String ENDLINE = ":;";
private final String tableName;
private final String tableDescription;
private final boolean sortByRowID;
private List<Object[]> underlyingData;
private final List<GATKReportColumnV2> columnInfo;
private final Map<Object, Integer> columnNameToIndex;
private final HashMap<Object, Integer> rowIdToIndex;
private static final String COULD_NOT_READ_HEADER = "Could not read the header of this file -- ";
private static final String COULD_NOT_READ_COLUMN_NAMES = "Could not read the column names of this file -- ";
private static final String COULD_NOT_READ_DATA_LINE = "Could not read a data line of this table -- ";
private static final String COULD_NOT_READ_EMPTY_LINE = "Could not read the last empty line of this table -- ";
private static final String OLD_GATK_TABLE_VERSION = "We no longer support older versions of the GATK Tables";
private static final int INITITAL_ARRAY_SIZE = 10000;
private static final String NUMBER_CONVERSION_EXCEPTION = "String is a number but is not a long or a double: ";
protected enum TableDataHeaderFields {
COLS(2),
ROWS(3),
FORMAT_START(4);
private final int index;
TableDataHeaderFields(int index) { this.index = index; }
public int index() { return index; }
}
protected enum TableNameHeaderFields {
NAME(2),
DESCRIPTION(3);
private final int index;
TableNameHeaderFields(int index) { this.index = index; }
public int index() { return index; }
}
public GATKReportTableV2(BufferedReader reader, GATKReportVersion version) {
switch ( version ) {
case V1_1:
// read in the header lines
final String[] tableData, tableNameData;
try {
tableData = reader.readLine().split(SEPARATOR);
tableNameData = reader.readLine().split(SEPARATOR);
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_HEADER + e.getMessage());
}
// parse the header fields
tableName = tableNameData[TableNameHeaderFields.NAME.index()];
tableDescription = (tableNameData.length <= TableNameHeaderFields.DESCRIPTION.index()) ? "" : tableNameData[TableNameHeaderFields.DESCRIPTION.index()]; // table may have no description! (and that's okay)
// when reading from a file, we do not re-sort the rows
sortByRowID = false;
// initialize the data
final int nColumns = Integer.parseInt(tableData[TableDataHeaderFields.COLS.index()]);
final int nRows = Integer.parseInt(tableData[TableDataHeaderFields.ROWS.index()]);
underlyingData = new ArrayList<Object[]>(nRows);
columnInfo = new ArrayList<GATKReportColumnV2>(nColumns);
columnNameToIndex = new HashMap<Object, Integer>(nColumns);
// when reading from a file, the row ID mapping is just the index
rowIdToIndex = new HashMap<Object, Integer>();
for ( int i = 0; i < nRows; i++ )
rowIdToIndex.put(i, i);
// read the column names
final String columnLine;
try {
columnLine = reader.readLine();
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_COLUMN_NAMES);
}
final List<Integer> columnStarts = TextFormattingUtils.getWordStarts(columnLine);
final String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts);
// Put in columns using the format string from the header
for ( int i = 0; i < nColumns; i++ ) {
final String format = tableData[TableDataHeaderFields.FORMAT_START.index() + i];
addColumn(columnNames[i], format);
}
// fill in the table
try {
for ( int i = 0; i < nRows; i++ ) {
// read a data line
final String dataLine = reader.readLine();
final List<String> lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(dataLine, columnStarts));
underlyingData.add(new Object[nColumns]);
for ( int columnIndex = 0; columnIndex < nColumns; columnIndex++ ) {
final GATKReportDataType type = columnInfo.get(columnIndex).getDataType();
final String columnName = columnNames[columnIndex];
set(i, columnName, type.Parse(lineSplits.get(columnIndex)));
}
}
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_DATA_LINE + e.getMessage());
}
try {
reader.readLine();
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_EMPTY_LINE + e.getMessage());
}
break;
default:
throw new ReviewedStingException(OLD_GATK_TABLE_VERSION);
}
}
/**
* Construct a new GATK report table with the specified name and description
*
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param numColumns the number of columns in this table
*/
public GATKReportTableV2(final String tableName, final String tableDescription, final int numColumns) {
this(tableName, tableDescription, numColumns, true);
}
/**
* Construct a new GATK report table with the specified name and description and whether to sort rows by the row ID.
*
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param numColumns the number of columns in this table
* @param sortByRowID whether to sort rows by the row ID (instead of the order in which they were added)
*/
public GATKReportTableV2(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID) {
if ( !isValidName(tableName) ) {
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
}
if ( !isValidDescription(tableDescription) ) {
throw new ReviewedStingException("Attempted to set a GATKReportTable description of '" + tableDescription + "'. GATKReportTable descriptions must not contain newlines.");
}
this.tableName = tableName;
this.tableDescription = tableDescription;
this.sortByRowID = sortByRowID;
underlyingData = new ArrayList<Object[]>(INITITAL_ARRAY_SIZE);
columnInfo = new ArrayList<GATKReportColumnV2>(numColumns);
columnNameToIndex = new HashMap<Object, Integer>(numColumns);
rowIdToIndex = new HashMap<Object, Integer>();
}
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
*
* @param name the name of the table or column
* @return true if the name is valid, false if otherwise
*/
private boolean isValidName(String name) {
Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
Matcher m = p.matcher(name);
return !m.find();
}
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
*
* @param description the name of the table or column
* @return true if the name is valid, false if otherwise
*/
private boolean isValidDescription(String description) {
Pattern p = Pattern.compile("\\r|\\n");
Matcher m = p.matcher(description);
return !m.find();
}
/**
* Add a mapping from ID to the index of a new row added to the table.
*
* @param ID the unique ID
*/
public void addRowID(final String ID) {
addRowID(ID, false);
}
/**
* Add a mapping from ID to the index of a new row added to the table.
*
* @param ID the unique ID
* @param populateFirstColumn should we automatically populate the first column with the row's ID?
*/
public void addRowID(final String ID, final boolean populateFirstColumn) {
addRowIDMapping(ID, underlyingData.size(), populateFirstColumn);
}
/**
* Add a mapping from ID to row index.
*
* @param ID the unique ID
* @param index the index associated with the ID
*/
public void addRowIDMapping(final String ID, final int index) {
addRowIDMapping(ID, index, false);
}
/**
* Add a mapping from ID to row index.
*
* @param ID the unique ID
* @param index the index associated with the ID
* @param populateFirstColumn should we automatically populate the first column with the row's ID?
*/
public void addRowIDMapping(final Object ID, final int index, final boolean populateFirstColumn) {
if ( populateFirstColumn && !isValidName(ID.toString()) )
throw new ReviewedStingException("Attempted to set a GATKReportTable ID of '" + ID + "'; GATKReportTable IDs must be purely alphanumeric - no spaces or special characters are allowed.");
expandTo(index, false);
rowIdToIndex.put(ID, index);
if ( populateFirstColumn )
set(index, 0, ID);
}
/**
* Add a column to the report and specify the default value that should be supplied if a given position in the table
* is never explicitly set.
*
* @param columnName the name of the column
*/
public void addColumn(String columnName) {
addColumn(columnName, "");
}
/**
* Add a column to the report, specify the default column value, whether the column should be displayed in the final
* output (useful when intermediate columns are necessary for later calculations, but are not required to be in the
* output file), and the format string used to display the data.
*
* @param columnName the name of the column
* @param format the format string used to display data
*/
public void addColumn(String columnName, String format) {
if (!isValidName(columnName)) {
throw new ReviewedStingException("Attempted to set a GATKReportTable column name of '" + columnName + "'. GATKReportTable column names must be purely alphanumeric - no spaces or special characters are allowed.");
}
columnNameToIndex.put(columnName, columnInfo.size());
columnInfo.add(new GATKReportColumnV2(columnName, format));
}
/**
* Check if the requested cell is valid and expand the table if necessary
*
* @param rowIndex the row index
* @param colIndex the column index
*/
private void verifyEntry(final int rowIndex, final int colIndex) {
if ( rowIndex < 0 || colIndex < 0 || colIndex >= getNumColumns() )
throw new ReviewedStingException("attempted to access a cell that does not exist in table '" + tableName + "'");
}
/**
* Set the value for a given position in the table
*
* @param rowIndex the row index
* @param updateRowIdMap should we update the row ID map?
*/
private void expandTo(final int rowIndex, final boolean updateRowIdMap) {
int currentSize = underlyingData.size();
if ( rowIndex >= currentSize ) {
final int numNewRows = rowIndex - currentSize + 1;
for ( int i = 0; i < numNewRows; i++ ) {
if ( updateRowIdMap )
rowIdToIndex.put(currentSize, currentSize);
underlyingData.add(new Object[getNumColumns()]);
currentSize++;
}
}
}
/**
* Set the value for a given position in the table
*
* @param rowID the row ID
* @param columnName the name of the column
* @param value the value to set
*/
public void set(final Object rowID, final String columnName, final Object value) {
if ( !rowIdToIndex.containsKey(rowID) ) {
rowIdToIndex.put(rowID, underlyingData.size());
expandTo(underlyingData.size(), false);
}
set(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName), value);
}
public void set(final int rowIndex, final int colIndex, Object value) {
expandTo(rowIndex, true);
verifyEntry(rowIndex, colIndex);
GATKReportColumnV2 column = columnInfo.get(colIndex);
// We do not accept internal null values
if (value == null)
value = "null";
else
value = fixType(value, column);
if ( column.getDataType().equals(GATKReportDataType.fromObject(value)) || column.getDataType().equals(GATKReportDataType.Unknown) ) {
underlyingData.get(rowIndex)[colIndex] = value;
column.updateFormatting(value);
} else {
throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", GATKReportDataType.fromObject(value).name(), column.getDataType().name()));
}
}
/**
* Returns true if the table contains a row mapping with the given ID
*
* @param rowID the row ID
*/
public boolean containsRowID(final Object rowID) {
return rowIdToIndex.containsKey(rowID);
}
/**
* Returns the row mapping IDs
*
*/
public Collection<Object> getRowIDs() {
return rowIdToIndex.keySet();
}
/**
* Set the value for a given position in the table
*
* @param rowID the row ID
* @param columnName the name of the column
*/
public void increment(final Object rowID, final String columnName) {
int prevValue;
if ( !rowIdToIndex.containsKey(rowID) ) {
rowIdToIndex.put(rowID, underlyingData.size());
underlyingData.add(new Object[getNumColumns()]);
prevValue = 0;
} else {
Object obj = get(rowID, columnName);
if ( !(obj instanceof Integer) )
throw new ReviewedStingException("Attempting to increment a value in a cell that is not an integer");
prevValue = (Integer)obj;
}
set(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName), prevValue + 1);
}
/**
* Returns the index of the first row matching the column values.
* Ex: "CountVariants", "dbsnp", "eval", "called", "all", "novel", "all"
*
* @param columnValues column values.
* @return The index of the first row matching the column values or -1 if no such row exists.
*/
public int findRowByData(final Object... columnValues) {
if ( columnValues == null || columnValues.length == 0 || columnValues.length > getNumColumns() )
return -1;
for ( int rowIndex = 0; rowIndex < underlyingData.size(); rowIndex++ ) {
final Object[] row = underlyingData.get(rowIndex);
boolean matches = true;
for ( int colIndex = 0; colIndex < columnValues.length; colIndex++ ) {
if ( !columnValues[colIndex].equals(row[colIndex]) ) {
matches = false;
break;
}
}
if ( matches )
return rowIndex;
}
return -1;
}
private Object fixType(final Object value, final GATKReportColumnV2 column) {
// Below is some code to convert a string into its appropriate type.
// todo -- Types have to be more flexible. For example, %d should accept Integers, Shorts and Bytes.
Object newValue = null;
if ( value instanceof String && !column.getDataType().equals(GATKReportDataType.String) ) {
// Integer case
if ( column.getDataType().equals(GATKReportDataType.Integer) ) {
try {
newValue = Long.parseLong((String) value);
} catch (Exception e) {
/** do nothing */
}
}
if ( column.getDataType().equals(GATKReportDataType.Decimal) ) {
try {
newValue = Double.parseDouble((String) value);
} catch (Exception e) {
/** do nothing */
}
}
if ( column.getDataType().equals(GATKReportDataType.Character) && ((String) value).length() == 1 ) {
newValue = ((String) value).charAt(0);
}
}
return (newValue != null) ? newValue : value;
}
/**
* Get a value from the given position in the table
*
* @param rowID the row ID
* @param columnName the name of the column
* @return the value stored at the specified position in the table
*/
public Object get(final Object rowID, final String columnName) {
return get(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName));
}
/**
* Get a value from the given position in the table
*
* @param rowIndex the index of the row
* @param columnIndex the index of the column
* @return the value stored at the specified position in the table
*/
public Object get(int rowIndex, int columnIndex) {
verifyEntry(rowIndex, columnIndex);
return underlyingData.get(rowIndex)[columnIndex];
}
/**
* Write the table to the PrintStream, formatted nicely to be human-readable, AWK-able, and R-friendly.
*
* @param out the PrintStream to which the table should be written
*/
void write(final PrintStream out) {
/*
* Table header:
* #:GATKTable:nColumns:nRows:(DataType for each column):;
* #:GATKTable:TableName:Description :;
* key colA colB
* row1 xxxx xxxxx
*/
// write the table definition
out.printf(GATKTABLE_HEADER_PREFIX + ":%d:%d", getNumColumns(), getNumRows());
// write the formats for all the columns
for ( final GATKReportColumnV2 column : columnInfo )
out.print(SEPARATOR + column.getFormat());
out.println(ENDLINE);
// write the table name & description
out.printf(GATKTABLE_HEADER_PREFIX + ":%s:%s\n", tableName, tableDescription);
// write the column names
boolean needsPadding = false;
for ( final GATKReportColumnV2 column : columnInfo ) {
if ( needsPadding )
out.printf(" ");
needsPadding = true;
out.printf(column.getColumnFormat().getNameFormat(), column.getColumnName());
}
out.println();
// write the table body
if ( sortByRowID ) {
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
}
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
writeRow(out, underlyingData.get(rowKey.getValue()));
} else {
for ( final Object[] row : underlyingData )
writeRow(out, row);
}
out.println();
}
private void writeRow(final PrintStream out, final Object[] row) {
boolean needsPadding = false;
for ( int i = 0; i < row.length; i++ ) {
if ( needsPadding )
out.printf(" ");
needsPadding = true;
final Object obj = row[i];
final String value;
final GATKReportColumnV2 info = columnInfo.get(i);
if ( obj == null )
value = "null";
else if ( info.getDataType().equals(GATKReportDataType.Unknown) && (obj instanceof Double || obj instanceof Float) )
value = String.format("%.8f", obj);
else
value = String.format(info.getFormat(), obj);
out.printf(info.getColumnFormat().getValueFormat(), value);
}
out.println();
}
public int getNumRows() {
return underlyingData.size();
}
public int getNumColumns() {
return columnInfo.size();
}
public List<GATKReportColumnV2> getColumnInfo() {
return columnInfo;
}
public String getTableName() {
return tableName;
}
public String getTableDescription() {
return tableDescription;
}
/**
* Concatenates the rows from the table to this one
*
* @param table another GATK table
*/
public void concat(final GATKReportTableV2 table) {
if ( !isSameFormat(table) )
throw new ReviewedStingException("Error trying to concatenate tables with different formats");
// add the data
underlyingData.addAll(table.underlyingData);
// update the row index map
final int currentNumRows = getNumRows();
for ( Map.Entry<Object, Integer> entry : table.rowIdToIndex.entrySet() )
rowIdToIndex.put(entry.getKey(), entry.getValue() + currentNumRows);
}
/**
* Returns whether or not the two tables have the same format including columns and everything in between. This does
* not check if the data inside is the same. This is the check to see if the two tables are gatherable or
* reduceable
*
* @param table another GATK table
* @return true if the the tables are gatherable
*/
public boolean isSameFormat(final GATKReportTableV2 table) {
if ( !tableName.equals(table.tableName) ||
!tableDescription.equals(table.tableDescription) ||
columnInfo.size() != table.columnInfo.size() )
return false;
for ( int i = 0; i < columnInfo.size(); i++ ) {
if ( !columnInfo.get(i).getFormat().equals(table.columnInfo.get(i).getFormat()) ||
!columnInfo.get(i).getColumnName().equals(table.columnInfo.get(i).getColumnName()) )
return false;
}
return true;
}
/**
* Checks that the tables are exactly the same.
*
* @param table another GATK report
* @return true if all field in the reports, tables, and columns are equal.
*/
public boolean equals(final GATKReportTableV2 table) {
if ( !isSameFormat(table) ||
underlyingData.size() != table.underlyingData.size() )
return false;
final List<Object[]> myOrderedRows = getOrderedRows();
final List<Object[]> otherOrderedRows = table.getOrderedRows();
for ( int i = 0; i < underlyingData.size(); i++ ) {
final Object[] myData = myOrderedRows.get(i);
final Object[] otherData = otherOrderedRows.get(i);
for ( int j = 0; j < myData.length; j++ ) {
if ( !myData[j].toString().equals(otherData[j].toString()) ) // need to deal with different typing (e.g. Long vs. Integer)
return false;
}
}
return true;
}
private List<Object[]> getOrderedRows() {
if ( !sortByRowID )
return underlyingData;
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
return underlyingData;
}
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
for ( final int rowKey : sortedMap.values() )
orderedData.add(underlyingData.get(rowKey));
return orderedData;
}
}

View File

@ -45,10 +45,17 @@ public enum GATKReportVersion {
/*
* Differences between v0.x
* - Added table and report headers
* - Headers changed format, include the numbe rof tables, rows, and metadata for gathering
* - Headers changed format, include the number of tables, rows, and metadata for gathering
* - IS GATHERABLE
*/
V1_0("v1.0");
V1_0("v1.0"),
/*
* Differences between v1.0
* - column numbers in header reflect the actual count of columns
* - primary keys are never displayed
*/
V1_1("v1.1");
private final String versionString;
@ -81,6 +88,9 @@ public enum GATKReportVersion {
if (header.startsWith("#:GATKReport.v1.0"))
return GATKReportVersion.V1_0;
if (header.startsWith("#:GATKReport.v1.1"))
return GATKReportVersion.V1_1;
throw new ReviewedStingException("Unknown GATK report version in header: " + header);
}
}

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.report.GATKReportTableV2;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
@ -77,13 +77,14 @@ public class QuantizationInfo {
return quantizationLevels;
}
public GATKReportTable generateReportTable() {
GATKReportTable quantizedTable = new GATKReportTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map");
quantizedTable.addPrimaryKey(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
quantizedTable.addColumn(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, 0L);
quantizedTable.addColumn(RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, (byte) 0);
public GATKReportTableV2 generateReportTable() {
GATKReportTableV2 quantizedTable = new GATKReportTableV2(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
quantizedTable.addColumn(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
quantizedTable.addColumn(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
quantizedTable.addColumn(RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) {
quantizedTable.set(qual, RecalDataManager.QUALITY_SCORE_COLUMN_NAME, qual);
quantizedTable.set(qual, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
quantizedTable.set(qual, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
}

View File

@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.report.GATKReportTableV2;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.Utils;
@ -223,8 +223,8 @@ public class RecalDataManager {
logger.info("");
}
private static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
private static List<GATKReportTableV2> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
List<GATKReportTableV2> result = new LinkedList<GATKReportTableV2>();
int tableIndex = 0;
final Pair<String, String> covariateValue = new Pair<String, String>(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s");
@ -240,7 +240,6 @@ public class RecalDataManager {
Map<BitSet, RecalDatum> recalTable = entry.getValue();
boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs.
GATKReportTable reportTable = new GATKReportTable("RecalTable" + tableIndex++, "");
List<Covariate> requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
List<Covariate> optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
@ -264,11 +263,11 @@ public class RecalDataManager {
columnNames.add(nObservations);
columnNames.add(nErrors);
reportTable.addPrimaryKey("PrimaryKey", false); // every table must have a primary key (hidden)
GATKReportTableV2 reportTable = new GATKReportTableV2("RecalTable" + tableIndex++, "", columnNames.size());
for (Pair<String, String> columnName : columnNames)
reportTable.addColumn(columnName.getFirst(), true, columnName.getSecond()); // every table must have the event type
reportTable.addColumn(columnName.getFirst(), columnName.getSecond()); // every table must have the event type
long primaryKey = 0L;
int rowIndex = 0;
for (Map.Entry<BitSet, RecalDatum> recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys
BitSet bitSetKey = recalTableEntry.getKey();
@ -288,9 +287,9 @@ public class RecalDataManager {
for (Map.Entry<String, Object> dataEntry : columnData.entrySet()) {
String columnName = dataEntry.getKey();
Object value = dataEntry.getValue();
reportTable.set(primaryKey, columnName, value.toString());
reportTable.set(rowIndex, columnName, value.toString());
}
primaryKey++;
rowIndex++;
}
result.add(reportTable);
}
@ -301,11 +300,11 @@ public class RecalDataManager {
outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile);
}
public static void outputRecalibrationReport(GATKReportTable argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager,Map<BitSet, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
public static void outputRecalibrationReport(GATKReportTableV2 argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager,Map<BitSet, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile);
}
private static void outputRecalibrationReport(GATKReportTable argumentTable, GATKReportTable quantizationTable, List<GATKReportTable> recalTables, PrintStream outputFile) {
private static void outputRecalibrationReport(GATKReportTableV2 argumentTable, GATKReportTableV2 quantizationTable, List<GATKReportTableV2> recalTables, PrintStream outputFile) {
GATKReport report = new GATKReport();
report.addTable(argumentTable);
report.addTable(quantizationTable);

View File

@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.report.GATKReportTableV2;
import org.broadinstitute.sting.utils.Utils;
import java.io.File;
@ -172,26 +172,43 @@ public class RecalibrationArgumentCollection {
public File recalibrationReport = null;
public GATKReportTable generateReportTable() {
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run");
argumentsTable.addPrimaryKey("Argument");
argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, "null");
public GATKReportTableV2 generateReportTable() {
GATKReportTableV2 argumentsTable = new GATKReportTableV2("Arguments", "Recalibration argument collection values used in this run", 2);
argumentsTable.addColumn("Argument");
argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
argumentsTable.addRowID("covariate", true);
argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
argumentsTable.addRowID("standard_covs", true);
argumentsTable.set("standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, USE_STANDARD_COVARIATES);
argumentsTable.addRowID("run_without_dbsnp", true);
argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
argumentsTable.addRowID("solid_recal_mode", true);
argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
argumentsTable.addRowID("solid_nocall_strategy", true);
argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
argumentsTable.addRowID("mismatches_context_size", true);
argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
argumentsTable.addRowID("insertions_context_size", true);
argumentsTable.set("insertions_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_CONTEXT_SIZE);
argumentsTable.addRowID("deletions_context_size", true);
argumentsTable.set("deletions_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DELETIONS_CONTEXT_SIZE);
argumentsTable.addRowID("mismatches_default_quality", true);
argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
argumentsTable.addRowID("insertions_default_quality", true);
argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
argumentsTable.addRowID("low_quality_tail", true);
argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
argumentsTable.addRowID("default_platform", true);
argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
argumentsTable.addRowID("force_platform", true);
argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
argumentsTable.addRowID("quantizing_levels", true);
argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.addRowID("keep_intermediate_files", true);
argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
argumentsTable.addRowID("no_plots", true);
argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
argumentsTable.addRowID("recalibration_report", true);
argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
return argumentsTable;
}

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.report.GATKReportTableV2;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -21,7 +21,7 @@ public class RecalibrationReport {
private final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
private final GATKReportTableV2 argumentTable; // keep the argument table untouched just for output purposes
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
public RecalibrationReport(final File RECAL_FILE) {
@ -30,7 +30,7 @@ public class RecalibrationReport {
argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE);
RAC = initializeArgumentCollectionTable(argumentTable);
GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE);
GATKReportTableV2 quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE);
quantizationInfo = initializeQuantizationTable(quantizedTable);
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates
@ -53,11 +53,11 @@ public class RecalibrationReport {
int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
final GATKReportTableV2 reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
table = parseReadGroupTable(keyManager, reportTable);
}
else if (nRequiredCovariates == 2 && optionalCovariatesToAdd.isEmpty()) { // when we have both required covariates and no optional covariates we're at the QUAL table
final GATKReportTable reportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE);
final GATKReportTableV2 reportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE);
table = parseQualityScoreTable(keyManager, reportTable);
}
else
@ -68,12 +68,12 @@ public class RecalibrationReport {
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
final GATKReportTableV2 reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
final Map<BitSet, RecalDatum> table = parseAllCovariatesTable(keyManager, reportTable);
keysAndTablesMap.put(keyManager, table);
}
protected RecalibrationReport(QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, GATKReportTable argumentTable, RecalibrationArgumentCollection RAC) {
protected RecalibrationReport(QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, GATKReportTableV2 argumentTable, RecalibrationArgumentCollection RAC) {
this.quantizationInfo = quantizationInfo;
this.keysAndTablesMap = keysAndTablesMap;
this.argumentTable = argumentTable;
@ -138,7 +138,7 @@ public class RecalibrationReport {
* @param reportTable the GATKReport table containing data for this table
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
*/
private Map<BitSet, RecalDatum> parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
private Map<BitSet, RecalDatum> parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTableV2 reportTable) {
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(5);
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
@ -155,7 +155,7 @@ public class RecalibrationReport {
* @param reportTable the GATKReport table containing data for this table
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
*/
private Map<BitSet, RecalDatum> parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
private Map<BitSet, RecalDatum> parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTableV2 reportTable) {
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(3);
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
@ -170,7 +170,7 @@ public class RecalibrationReport {
* @param reportTable the GATKReport table containing data for this table
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
*/
private Map<BitSet, RecalDatum> parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
private Map<BitSet, RecalDatum> parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTableV2 reportTable) {
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(2);
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
@ -185,24 +185,24 @@ public class RecalibrationReport {
* @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
*/
private Map<BitSet, RecalDatum> genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList<String> columnNamesOrderedList, boolean hasEstimatedQReportedColumn) {
private Map<BitSet, RecalDatum> genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTableV2 reportTable, ArrayList<String> columnNamesOrderedList, boolean hasEstimatedQReportedColumn) {
Map<BitSet, RecalDatum> result = new HashMap<BitSet, RecalDatum>(reportTable.getNumRows()*2);
for (Object primaryKey : reportTable.getPrimaryKeys()) {
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
int nKeys = columnNamesOrderedList.size();
Object [] keySet = new Object[nKeys];
for (int i = 0; i < nKeys; i++)
keySet[i] = reportTable.get(primaryKey, columnNamesOrderedList.get(i)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below)
for (int j = 0; j < nKeys; j++)
keySet[j] = reportTable.get(i, columnNamesOrderedList.get(j)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below)
keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager).
BitSet bitKey = keyManager.bitSetFromKey(keySet);
long nObservations = (Long) reportTable.get(primaryKey, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
long nErrors = (Long) reportTable.get(primaryKey, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
double empiricalQuality = (Double) reportTable.get(primaryKey, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
long nObservations = (Long) reportTable.get(i, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
long nErrors = (Long) reportTable.get(i, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
double empiricalQuality = (Double) reportTable.get(i, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
(Double) reportTable.get(primaryKey, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
Byte.parseByte((String) reportTable.get(primaryKey, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
(Double) reportTable.get(i, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
Byte.parseByte((String) reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
RecalDatum recalDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
result.put(bitKey, recalDatum);
@ -216,13 +216,13 @@ public class RecalibrationReport {
* @param table the GATKReportTable containing the quantization mappings
* @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE
*/
private QuantizationInfo initializeQuantizationTable(GATKReportTable table) {
private QuantizationInfo initializeQuantizationTable(GATKReportTableV2 table) {
Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1];
Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
for (Object primaryKey : table.getPrimaryKeys()) {
Object quantizedObject = table.get(primaryKey, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
Object countObject = table.get(primaryKey, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
byte originalQual = Byte.parseByte(primaryKey.toString());
for ( int i = 0; i < table.getNumRows(); i++ ) {
byte originalQual = (byte)i;
Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
byte quantizedQual = Byte.parseByte(quantizedObject.toString());
long quantizedCount = Long.parseLong(countObject.toString());
quals[originalQual] = quantizedQual;
@ -237,63 +237,64 @@ public class RecalibrationReport {
* @param table the GATKReportTable containing the arguments and its corresponding values
* @return a RAC object properly initialized with all the objects in the table
*/
private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) {
private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTableV2 table) {
RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
for (Object primaryKey : table.getPrimaryKeys()) {
Object value = table.get(primaryKey, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
for ( int i = 0; i < table.getNumRows(); i++ ) {
final String argument = table.get(i, "Argument").toString();
Object value = table.get(i, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
if (value.equals("null"))
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
if (primaryKey.equals("covariate") && value != null)
if (argument.equals("covariate") && value != null)
RAC.COVARIATES = value.toString().split(",");
else if (primaryKey.equals("standard_covs"))
else if (argument.equals("standard_covs"))
RAC.USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value);
else if (primaryKey.equals("solid_recal_mode"))
else if (argument.equals("solid_recal_mode"))
RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value);
else if (primaryKey.equals("solid_nocall_strategy"))
else if (argument.equals("solid_nocall_strategy"))
RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);
else if (primaryKey.equals("mismatches_context_size"))
else if (argument.equals("mismatches_context_size"))
RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value);
else if (primaryKey.equals("insertions_context_size"))
else if (argument.equals("insertions_context_size"))
RAC.INSERTIONS_CONTEXT_SIZE = Integer.parseInt((String) value);
else if (primaryKey.equals("deletions_context_size"))
else if (argument.equals("deletions_context_size"))
RAC.DELETIONS_CONTEXT_SIZE = Integer.parseInt((String) value);
else if (primaryKey.equals("mismatches_default_quality"))
else if (argument.equals("mismatches_default_quality"))
RAC.MISMATCHES_DEFAULT_QUALITY = Byte.parseByte((String) value);
else if (primaryKey.equals("insertions_default_quality"))
else if (argument.equals("insertions_default_quality"))
RAC.INSERTIONS_DEFAULT_QUALITY = Byte.parseByte((String) value);
else if (primaryKey.equals("deletions_default_quality"))
else if (argument.equals("deletions_default_quality"))
RAC.DELETIONS_DEFAULT_QUALITY = Byte.parseByte((String) value);
else if (primaryKey.equals("low_quality_tail"))
else if (argument.equals("low_quality_tail"))
RAC.LOW_QUAL_TAIL = Byte.parseByte((String) value);
else if (primaryKey.equals("default_platform"))
else if (argument.equals("default_platform"))
RAC.DEFAULT_PLATFORM = (String) value;
else if (primaryKey.equals("force_platform"))
else if (argument.equals("force_platform"))
RAC.FORCE_PLATFORM = (String) value;
else if (primaryKey.equals("quantizing_levels"))
else if (argument.equals("quantizing_levels"))
RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value);
else if (primaryKey.equals("keep_intermediate_files"))
else if (argument.equals("keep_intermediate_files"))
RAC.KEEP_INTERMEDIATE_FILES = Boolean.parseBoolean((String) value);
else if (primaryKey.equals("no_plots"))
else if (argument.equals("no_plots"))
RAC.NO_PLOTS = Boolean.parseBoolean((String) value);
else if (primaryKey.equals("recalibration_report"))
else if (argument.equals("recalibration_report"))
RAC.recalibrationReport = (value == null) ? null : new File((String) value);
}

View File

@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.report.GATKReportTableV2;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
@ -75,7 +75,7 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
public Integer MIN_MAPPING_QUAL = 20;
private GATKReport report;
private GATKReportTable table;
private GATKReportTableV2 table;
private final static String reportName = "ErrorRatePerCycle";
private final static String reportDescription = "The error rate per sequenced position in the reads";
@ -122,15 +122,14 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
public void initialize() {
report = new GATKReport();
report.addTable(reportName, reportDescription);
report.addTable(reportName, reportDescription, 6, true);
table = report.getTable(reportName);
table.addPrimaryKey("key", false);
table.addColumn("readgroup", 0);
table.addColumn("cycle", 0);
table.addColumn("mismatches", 0);
table.addColumn("counts", 0);
table.addColumn("qual", 0);
table.addColumn("errorrate", 0.0f, "%.2e");
table.addColumn("readgroup");
table.addColumn("cycle");
table.addColumn("mismatches");
table.addColumn("counts");
table.addColumn("qual");
table.addColumn("errorrate", "%.2e");
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
@ -147,9 +146,11 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
if ( BaseUtils.isRegularBase(readBase) && BaseUtils.isRegularBase(refBase) ) {
final TableKey key = new TableKey(read.getReadGroup().getReadGroupId(), cycle);
if ( ! table.containsKey(key) ) {
if ( ! table.containsRowID(key) ) {
table.set(key, "cycle", cycle);
table.set(key, "readgroup", read.getReadGroup().getReadGroupId());
table.set(key, "counts", 0);
table.set(key, "mismatches", 0);
}
table.increment(key, "counts");
@ -167,7 +168,7 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
public Integer reduce(Integer value, Integer sum) { return null; }
public void onTraversalDone(Integer sum) {
for ( final Object key : table.getPrimaryKeys() ) {
for ( Object key : table.getRowIDs() ) {
final int mismatches = (Integer)table.get(key, "mismatches");
final int count = (Integer)table.get(key, "counts");
final double errorRate = (mismatches + 1) / (1.0*(count + 1));

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.report.GATKReportTableV2;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.Median;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -168,27 +168,28 @@ public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
@Override
public void onTraversalDone(Integer sum) {
final GATKReport report = new GATKReport();
report.addTable(TABLE_NAME, "Table of read group properties");
GATKReportTable table = report.getTable(TABLE_NAME);
report.addTable(TABLE_NAME, "Table of read group properties", 12);
GATKReportTableV2 table = report.getTable(TABLE_NAME);
DateFormat dateFormatter = DateFormat.getDateInstance(DateFormat.SHORT);
table.addPrimaryKey("readgroup");
table.addColumn("readgroup");
//* Emits a GATKReport containing read group, sample, library, platform, center, median insert size and
//* median read length for each read group in every BAM file.
table.addColumn("sample", "NA");
table.addColumn("library", "NA");
table.addColumn("platform", "NA");
table.addColumn("center", "NA");
table.addColumn("date", "NA");
table.addColumn("has.any.reads", "false");
table.addColumn("is.paired.end", "false");
table.addColumn("n.reads.analyzed", "NA");
table.addColumn("simple.read.type", "NA");
table.addColumn("median.read.length", Integer.valueOf(0));
table.addColumn("median.insert.size", Integer.valueOf(0));
table.addColumn("sample", "%s");
table.addColumn("library", "%s");
table.addColumn("platform", "%s");
table.addColumn("center", "%s");
table.addColumn("date", "%s");
table.addColumn("has.any.reads");
table.addColumn("is.paired.end");
table.addColumn("n.reads.analyzed", "%d");
table.addColumn("simple.read.type", "%s");
table.addColumn("median.read.length");
table.addColumn("median.insert.size");
for ( final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
final String rgID = rg.getId();
table.addRowID(rgID, true);
PerReadGroupInfo info = readGroupInfo.get(rgID);
// we are paired if > 25% of reads are paired
@ -217,7 +218,7 @@ public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
report.print(out);
}
private final void setTableValue(GATKReportTable table, final String rgID, final String key, final Object value) {
private final void setTableValue(GATKReportTableV2 table, final String rgID, final String key, final Object value) {
table.set(rgID, key, value == null ? "NA" : value);
}
}

View File

@ -5,7 +5,7 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.report.GATKReportTableV2;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -53,20 +53,19 @@ public class ReadLengthDistribution extends ReadWalker<Integer, Integer> {
private GATKReport report;
public void initialize() {
final List<SAMReadGroupRecord> readGroups = getToolkit().getSAMFileHeader().getReadGroups();
report = new GATKReport();
report.addTable("ReadLengthDistribution", "Table of read length distributions");
GATKReportTable table = report.getTable("ReadLengthDistribution");
report.addTable("ReadLengthDistribution", "Table of read length distributions", 1 + (readGroups.isEmpty() ? 1 : readGroups.size()));
GATKReportTableV2 table = report.getTable("ReadLengthDistribution");
table.addPrimaryKey("readLength");
table.addColumn("readLength");
List<SAMReadGroupRecord> readGroups = getToolkit().getSAMFileHeader().getReadGroups();
if (readGroups.isEmpty())
table.addColumn("SINGLE_SAMPLE", 0);
table.addColumn("SINGLE_SAMPLE");
else
for (SAMReadGroupRecord rg : readGroups)
table.addColumn(rg.getSample(), 0);
table.addColumn(rg.getSample());
}
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
@ -75,7 +74,7 @@ public class ReadLengthDistribution extends ReadWalker<Integer, Integer> {
@Override
public Integer map(ReferenceContext referenceContext, GATKSAMRecord samRecord, ReadMetaDataTracker readMetaDataTracker) {
GATKReportTable table = report.getTable("ReadLengthDistribution");
GATKReportTableV2 table = report.getTable("ReadLengthDistribution");
int length = Math.abs(samRecord.getReadLength());
String sample = samRecord.getReadGroup().getSample();

View File

@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.diffengine;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.report.GATKReportTableV2;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -235,14 +235,17 @@ public class DiffEngine {
// now that we have a specific list of values we want to show, display them
GATKReport report = new GATKReport();
final String tableName = "differences";
report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false);
GATKReportTable table = report.getTable(tableName);
table.addPrimaryKey("Difference", true);
table.addColumn("NumberOfOccurrences", 0);
table.addColumn("ExampleDifference", 0);
for ( Difference diff : toShow ) {
table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount());
table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString());
report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", 3);
final GATKReportTableV2 table = report.getTable(tableName);
table.addColumn("Difference");
table.addColumn("NumberOfOccurrences");
table.addColumn("ExampleDifference");
for ( int i = 0; i > toShow.size(); i++ ) {
final Difference diff = toShow.get(i);
final String key = diff.getPath();
table.addRowIDMapping(key, i, true);
table.set(key, "NumberOfOccurrences", diff.getCount());
table.set(key, "ExampleDifference", diff.valueDiffString());
}
GATKReport output = new GATKReport(table);
output.print(params.out);

View File

@ -26,7 +26,8 @@ package org.broadinstitute.sting.gatk.walkers.diffengine;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportColumn;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.report.GATKReportColumnV2;
import org.broadinstitute.sting.gatk.report.GATKReportTableV2;
import java.io.File;
import java.io.FileReader;
@ -52,7 +53,7 @@ public class GATKReportDiffableReader implements DiffableReader {
// one line reads the whole thing into memory
GATKReport report = new GATKReport(file);
for (GATKReportTable table : report.getTables()) {
for (GATKReportTableV2 table : report.getTables()) {
root.add(tableToNode(table, root));
}
@ -62,23 +63,22 @@ public class GATKReportDiffableReader implements DiffableReader {
}
}
private DiffNode tableToNode(GATKReportTable table, DiffNode root) {
private DiffNode tableToNode(GATKReportTableV2 table, DiffNode root) {
DiffNode tableRoot = DiffNode.empty(table.getTableName(), root);
tableRoot.add("Description", table.getTableDescription());
tableRoot.add("NumberOfRows", table.getNumRows());
for (GATKReportColumn column : table.getColumns().values()) {
for ( GATKReportColumnV2 column : table.getColumnInfo() ) {
DiffNode columnRoot = DiffNode.empty(column.getColumnName(), tableRoot);
columnRoot.add("Width", column.getColumnFormat().getWidth());
// NOTE: as the values are trimmed during parsing left/right alignment is not currently preserved
columnRoot.add("Displayable", column.isDisplayable());
columnRoot.add("Displayable", true);
int n = 1;
for (Object elt : column.values()) {
String name = column.getColumnName() + n++;
columnRoot.add(name, elt.toString());
for ( int i = 0; i < table.getNumRows(); i++ ) {
String name = column.getColumnName() + (i+1);
columnRoot.add(name, table.get(i, column.getColumnName()).toString());
}
tableRoot.add(columnRoot);

View File

@ -25,7 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.report.GATKReportTableV2;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.StratificationManager;
@ -50,30 +50,32 @@ import java.util.Map;
* and supports writing out the data in these evaluators to a GATKReport.
*/
public class VariantEvalReportWriter {
private final GATKReport report;
private final StratificationManager<VariantStratifier, EvaluationContext> stratManager;
public VariantEvalReportWriter(final StratificationManager<VariantStratifier, EvaluationContext> stratManager,
final Collection<VariantStratifier> stratifiers,
final Collection<VariantEvaluator> evaluators) {
this.stratManager = stratManager;
this.report = initializeGATKReport(stratifiers, evaluators);
}
protected VariantEvalReportWriter() {} // no public access
/**
* The business end of the class. Writes out the data in the provided stratManager
* to the PrintStream out
*
* @param out
* @param out the output stream
* @param stratManager the stratification manager
* @param stratifiers the stratifiers
* @param evaluators the evaluators
*/
public final void writeReport(final PrintStream out) {
public static void writeReport(final PrintStream out,
final StratificationManager<VariantStratifier, EvaluationContext> stratManager,
final Collection<VariantStratifier> stratifiers,
final Collection<VariantEvaluator> evaluators) {
final GATKReport report = initializeGATKReport(stratifiers, evaluators);
for ( int key = 0; key < stratManager.size(); key++ ) {
final String stratStateString = stratManager.getStratsAndStatesStringForKey(key);
final List<Pair<VariantStratifier, Object>> stratsAndStates = stratManager.getStratsAndStatesForKey(key);
final EvaluationContext nec = stratManager.get(key);
for ( final VariantEvaluator ve : nec.getVariantEvaluators() ) {
final GATKReportTable table = report.getTable(ve.getSimpleName());
final GATKReportTableV2 table = report.getTable(ve.getSimpleName());
final AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
final Map<Field, DataPoint> datamap = scanner.getData();
@ -120,9 +122,10 @@ public class VariantEvalReportWriter {
* @param primaryKey
* @param stratsAndStates
*/
private void setStratificationColumns(final GATKReportTable table,
final String primaryKey,
final List<Pair<VariantStratifier, Object>> stratsAndStates) {
private static void setStratificationColumns(final GATKReportTableV2 table,
final String primaryKey,
final List<Pair<VariantStratifier, Object>> stratsAndStates) {
table.set(primaryKey, table.getTableName(), table.getTableName());
for ( final Pair<VariantStratifier, Object> stratAndState : stratsAndStates ) {
final VariantStratifier vs = stratAndState.getFirst();
final String columnName = vs.getName();
@ -148,34 +151,33 @@ public class VariantEvalReportWriter {
*
* @return an initialized report object
*/
private GATKReport initializeGATKReport(final Collection<VariantStratifier> stratifiers,
final Collection<VariantEvaluator> evaluators) {
private static GATKReport initializeGATKReport(final Collection<VariantStratifier> stratifiers,
final Collection<VariantEvaluator> evaluators) {
final GATKReport report = new GATKReport();
for (final VariantEvaluator ve : evaluators) {
final AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
final Map<Field, DataPoint> datamap = scanner.getData();
// create the table
final String tableName = ve.getSimpleName();
final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description();
report.addTable(tableName, tableDesc, true);
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), true);
// grab the table, and add the columns we need to it
final GATKReportTable table = report.getTable(tableName);
table.addPrimaryKey("entry", false);
final GATKReportTableV2 table = report.getTable(tableName);
table.addColumn(tableName, tableName);
// first create a column to hold each stratifier state
for (final VariantStratifier vs : stratifiers) {
final String columnName = vs.getName();
table.addColumn(columnName, null, vs.getFormat());
table.addColumn(columnName, vs.getFormat());
}
final AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
final Map<Field, DataPoint> datamap = scanner.getData();
if ( scanner.hasMoltenField() ) {
// deal with molten data
table.addColumn(scanner.getMoltenAnnotation().variableName(), true, scanner.getMoltenAnnotation().variableFormat());
table.addColumn(scanner.getMoltenAnnotation().valueName(), true, scanner.getMoltenAnnotation().valueFormat());
table.addColumn(scanner.getMoltenAnnotation().variableName(), scanner.getMoltenAnnotation().variableFormat());
table.addColumn(scanner.getMoltenAnnotation().valueName(), scanner.getMoltenAnnotation().valueFormat());
} else {
if ( datamap.isEmpty() )
throw new ReviewedStingException("Datamap is empty for analysis " + scanner.getAnalysis());
@ -187,7 +189,7 @@ public class VariantEvalReportWriter {
// this is an atomic value, add a column for it
final String format = datamap.get(field).format();
table.addColumn(field.getName(), true, format);
table.addColumn(field.getName(), format);
} catch (SecurityException e) {
throw new StingException("SecurityException: " + e);
}

View File

@ -555,8 +555,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
for ( final VariantEvaluator ve : nec.getVariantEvaluators() )
ve.finalizeEvaluation();
final VariantEvalReportWriter writer = new VariantEvalReportWriter(stratManager, stratManager.getStratifiers(), stratManager.get(0).getVariantEvaluators());
writer.writeReport(out);
VariantEvalReportWriter.writeReport(out, stratManager, stratManager.getStratifiers(), stratManager.get(0).getVariantEvaluators());
}
// Accessors

View File

@ -29,7 +29,7 @@ import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.report.GATKReportTableV2;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -422,40 +422,42 @@ public class QualQuantizer {
}
private final void addQualHistogramToReport(final GATKReport report) {
report.addTable("QualHistogram", "Quality score histogram provided to report");
GATKReportTable table = report.getTable("QualHistogram");
report.addTable("QualHistogram", "Quality score histogram provided to report", 2);
GATKReportTableV2 table = report.getTable("QualHistogram");
table.addPrimaryKey("qual");
table.addColumn("count", "NA");
table.addColumn("qual");
table.addColumn("count");
for ( int q = 0; q < nObservationsPerQual.size(); q++ ) {
table.set(q, "qual", q);
table.set(q, "count", nObservationsPerQual.get(q));
}
}
private final void addIntervalsToReport(final GATKReport report) {
report.addTable("QualQuantizerIntervals", "Table of QualQuantizer quantization intervals");
GATKReportTable table = report.getTable("QualQuantizerIntervals");
report.addTable("QualQuantizerIntervals", "Table of QualQuantizer quantization intervals", 10);
GATKReportTableV2 table = report.getTable("QualQuantizerIntervals");
table.addPrimaryKey("name");
table.addColumn("qStart", "NA");
table.addColumn("qEnd", "NA");
table.addColumn("level", "NA");
table.addColumn("merge.order", "NA");
table.addColumn("nErrors", "NA");
table.addColumn("nObservations", "NA");
table.addColumn("qual", "NA");
table.addColumn("penalty", "NA");
table.addColumn("root.node", "NA");
table.addColumn("name");
table.addColumn("qStart");
table.addColumn("qEnd");
table.addColumn("level");
table.addColumn("merge.order");
table.addColumn("nErrors");
table.addColumn("nObservations");
table.addColumn("qual");
table.addColumn("penalty");
table.addColumn("root.node");
//table.addColumn("subintervals", "NA");
for ( QualInterval interval : quantizedIntervals)
for ( QualInterval interval : quantizedIntervals )
addIntervalToReport(table, interval, true);
}
private final void addIntervalToReport(final GATKReportTable table, QualInterval interval, final boolean atRootP) {
private final void addIntervalToReport(final GATKReportTableV2 table, final QualInterval interval, final boolean atRootP) {
final String name = interval.getName();
table.set(name, "name", name);
table.set(name, "qStart", interval.qStart);
table.set(name, "qEnd", interval.qEnd);
table.set(name, "level", interval.level);

View File

@ -36,20 +36,18 @@ import java.io.PrintStream;
public class GATKReportUnitTest extends BaseTest {
@Test
public void testParse() throws Exception {
String reportPath = validationDataLocation + "exampleGATKReportv1.tbl";
String reportPath = validationDataLocation + "exampleGATKReportv2.tbl";
GATKReport report = new GATKReport(reportPath);
Assert.assertEquals(report.getVersion(), GATKReportVersion.V1_0);
Assert.assertEquals(report.getVersion(), GATKReportVersion.V1_1);
Assert.assertEquals(report.getTables().size(), 5);
GATKReportTable countVariants = report.getTable("CountVariants");
Object countVariantsPK = countVariants.getPrimaryKeyByData("CountVariants", "dbsnp", "eval", "none", "all");
Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "63025520");
Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "0");
Assert.assertEquals(countVariants.get(countVariantsPK, "heterozygosity"), 4.73e-06);
GATKReportTableV2 countVariants = report.getTable("CountVariants");
Assert.assertEquals(countVariants.get(0, "nProcessedLoci"), "63025520");
Assert.assertEquals(countVariants.get(0, "nNoCalls"), "0");
Assert.assertEquals(countVariants.get(0, "heterozygosity"), 4.73e-06);
GATKReportTable validationReport = report.getTable("ValidationReport");
Object validationReportPK = countVariants.getPrimaryKeyByData("CountVariants", "dbsnp", "eval", "none", "novel");
Assert.assertEquals(validationReport.get(validationReportPK, "PPV"), Double.NaN);
GATKReportTableV2 validationReport = report.getTable("ValidationReport");
Assert.assertEquals(validationReport.get(2, "PPV"), Double.NaN);
}
@DataProvider(name = "rightAlignValues")
@ -79,9 +77,9 @@ public class GATKReportUnitTest extends BaseTest {
Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'");
}
private GATKReportTable makeBasicTable() {
private GATKReportTableV2 makeBasicTable() {
GATKReport report = GATKReport.newSimpleReport("TableName", "sample", "value");
GATKReportTable table = report.getTable("TableName");
GATKReportTableV2 table = report.getTable("TableName");
report.addRow("foo.1", "hello");
report.addRow("foo.2", "world");
return table;
@ -89,37 +87,9 @@ public class GATKReportUnitTest extends BaseTest {
@Test
public void testDottedSampleName() {
GATKReportTable table = makeBasicTable();
Object pk;
pk = table.getPrimaryKeyByData("foo.1");
Assert.assertEquals(table.get(pk, "value"), "hello");
pk = table.getPrimaryKeyByData("foo.2");
Assert.assertEquals(table.get(pk, "value"), "world");
}
@Test
public void testFindPrimaryKeyByData() {
GATKReportTable table = makeBasicTable();
Assert.assertNotNull(table.findPrimaryKeyByData("foo.1"));
Assert.assertNotNull(table.findPrimaryKeyByData("foo.1", "hello"));
Assert.assertNotNull(table.findPrimaryKeyByData("foo.2"));
Assert.assertNotNull(table.findPrimaryKeyByData("foo.2", "world"));
Assert.assertNull(table.findPrimaryKeyByData("list", "longer", "than", "column", "count"));
Assert.assertNull(table.findPrimaryKeyByData("short"));
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testEmptyFindPrimaryKeyByData() {
GATKReportTable table = makeBasicTable();
table.findPrimaryKeyByData();
}
@Test(expectedExceptions = NullPointerException.class)
public void testNullFindPrimaryKeyByData() {
GATKReportTable table = makeBasicTable();
table.findPrimaryKeyByData((Object[]) null);
GATKReportTableV2 table = makeBasicTable();
Assert.assertEquals(table.get(0, "value"), "hello");
Assert.assertEquals(table.get(1, "value"), "world");
}
@Test
@ -128,7 +98,7 @@ public class GATKReportUnitTest extends BaseTest {
GATKReport report = GATKReport.newSimpleReport("TableName", "Roger", "is", "Awesome");
// Add data to simple GATK report
report.addRow( 12, 23.45, true);
report.addRow(12, 23.45, true);
report.addRow("ans", '3', 24.5);
report.addRow("hi", "", 2.3);
@ -154,42 +124,40 @@ public class GATKReportUnitTest extends BaseTest {
@Test
public void testGATKReportGatherer() {
boolean displayPK = false;
GATKReport report1, report2, report3;
report1 = new GATKReport();
report1.addTable("TableName", "Description");
report1.getTable("TableName").addPrimaryKey("id", displayPK);
report1.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
report1.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c");
report1.getTable("TableName").set(1, "colA", "NotNum");
report1.getTable("TableName").set(1, "colB", (char) 64);
report1.addTable("TableName", "Description", 2);
report1.getTable("TableName").addColumn("colA", "%s");
report1.getTable("TableName").addColumn("colB", "%c");
report1.getTable("TableName").set(0, "colA", "NotNum");
report1.getTable("TableName").set(0, "colB", (char) 64);
report2 = new GATKReport();
report2.addTable("TableName", "Description");
report2.getTable("TableName").addPrimaryKey("id", displayPK);
report2.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
report2.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c");
report2.getTable("TableName").set(2, "colA", "df3");
report2.getTable("TableName").set(2, "colB", 'A');
report2.addTable("TableName", "Description", 2);
report2.getTable("TableName").addColumn("colA", "%s");
report2.getTable("TableName").addColumn("colB", "%c");
report2.getTable("TableName").set(0, "colA", "df3");
report2.getTable("TableName").set(0, "colB", 'A');
report3 = new GATKReport();
report3.addTable("TableName", "Description");
report3.getTable("TableName").addPrimaryKey("id", displayPK);
report3.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
report3.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c");
report3.getTable("TableName").set(3, "colA", "df5f");
report3.getTable("TableName").set(3, "colB", 'c');
report3.addTable("TableName", "Description", 2);
report3.getTable("TableName").addColumn("colA", "%s");
report3.getTable("TableName").addColumn("colB", "%c");
report3.getTable("TableName").set(0, "colA", "df5f");
report3.getTable("TableName").set(0, "colB", 'c');
report1.combineWith(report2);
report1.combineWith(report3);
report1.concat(report2);
report1.concat(report3);
report1.addTable("Table2", "To contain some more data types");
GATKReportTable table = report1.getTable("Table2");
table.addPrimaryKey("KEY");
table.addColumn("SomeInt", GATKReportDataType.Integer.getDefaultValue(), true, "%d");
table.addColumn("SomeFloat", GATKReportDataType.Decimal.getDefaultValue(), true, "%.16E");
table.addColumn("TrueFalse", false, true, "%B");
report1.addTable("Table2", "To contain some more data types", 3);
GATKReportTableV2 table = report1.getTable("Table2");
table.addColumn("SomeInt", "%d");
table.addColumn("SomeFloat", "%.16E");
table.addColumn("TrueFalse", "%B");
table.addRowIDMapping("12df", 0);
table.addRowIDMapping("5f", 1);
table.addRowIDMapping("RZ", 2);
table.set("12df", "SomeInt", Byte.MAX_VALUE);
table.set("12df", "SomeFloat", 34.0);
table.set("12df", "TrueFalse", true);
@ -200,17 +168,17 @@ public class GATKReportUnitTest extends BaseTest {
table.set("RZ", "SomeFloat", 535646345.657453464576);
table.set("RZ", "TrueFalse", true);
report1.addTable("Table3", "blah");
report1.getTable("Table3").addPrimaryKey("HAI");
report1.getTable("Table3").addColumn("a", true, GATKReportDataType.String.getDefaultFormatString());
report1.addTable("Table3", "blah", 1, true);
report1.getTable("Table3").addColumn("a");
report1.getTable("Table3").addRowIDMapping("q", 2);
report1.getTable("Table3").addRowIDMapping("5", 3);
report1.getTable("Table3").addRowIDMapping("573s", 0);
report1.getTable("Table3").addRowIDMapping("ZZZ", 1);
report1.getTable("Table3").set("q", "a", "34");
report1.getTable("Table3").set("5", "a", "c4g34");
report1.getTable("Table3").set("573s", "a", "fDlwueg");
report1.getTable("Table3").set("ZZZ", "a", "Dfs");
//report1.print(System.out);
try {
File file = createTempFile("GATKReportGatherer-UnitTest", ".tbl");
//System.out.format("The temporary file" + " has been created: %s%n", file);
@ -226,8 +194,5 @@ public class GATKReportUnitTest extends BaseTest {
} catch (IOException x) {
System.err.format("IOException: %s%n", x);
}
//Assert.assertEquals(1,1);
}
}

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.report.GATKReportTableV2;
import org.testng.Assert;
import org.testng.annotations.Test;
@ -30,8 +30,8 @@ public class BQSRGathererUnitTest {
GATKReport originalReport = new GATKReport(recal);
GATKReport calculatedReport = new GATKReport(output);
for (GATKReportTable originalTable : originalReport.getTables()) {
GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName());
for (GATKReportTableV2 originalTable : originalReport.getTables()) {
GATKReportTableV2 calculatedTable = calculatedReport.getTable(originalTable.getTableName());
List<String> columnsToTest = new LinkedList<String>();
columnsToTest.add(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
columnsToTest.add(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
@ -59,11 +59,11 @@ public class BQSRGathererUnitTest {
* @param columnsToTest list of columns to test. All columns will be tested with the same criteria (equality given factor)
* @param factor 1 to test for equality, any other value to multiply the original value and match with the calculated
*/
private void testTablesWithColumnsAndFactor(GATKReportTable original, GATKReportTable calculated, List<String> columnsToTest, int factor) {
for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys
private void testTablesWithColumnsAndFactor(GATKReportTableV2 original, GATKReportTableV2 calculated, List<String> columnsToTest, int factor) {
for (int row = 0; row < original.getNumRows(); row++ ) {
for (String column : columnsToTest) {
Object actual = calculated.get(primaryKey, column);
Object expected = original.get(primaryKey, column);
Object actual = calculated.get(new Integer(row), column);
Object expected = original.get(row, column);
if (factor != 1) {
if (expected instanceof Double)
@ -76,7 +76,7 @@ public class BQSRGathererUnitTest {
expected = (Byte) expected * factor;
}
}
Assert.assertEquals(actual, expected, "Primary key: " + primaryKey + " Original Table: " + original.getTableName() + " Calc Table: " + calculated.getTableName());
Assert.assertEquals(actual, expected, "Row: " + row + " Original Table: " + original.getTableName() + " Calc Table: " + calculated.getTableName());
}
}

View File

@ -35,7 +35,7 @@ public class ErrorRatePerCycleIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s",
1,
Arrays.asList("71685716c7dde64c51bbd908c06ea742"));
Arrays.asList("dccdf3cb3193d01a1a767097e4a5c35e"));
executeTest("ErrorRatePerCycle:", spec);
}
}

View File

@ -38,7 +38,7 @@ public class ReadGroupPropertiesIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ReadGroupProperties -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-11,000,000 -o %s",
1,
Arrays.asList("3f1f97a1d2c5fb552ed4f33ea30d136d"));
Arrays.asList("618a671c61014deb3b284061a87b61d6"));
executeTest("ReadGroupProperties:", spec);
}
}

View File

@ -57,7 +57,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("e87932ffa1d310cecee49e7829a0f056")
Arrays.asList("7091cbeb47d041463806c8c8f98239a6")
);
executeTest("testFunctionClassWithSnpeff", spec);
}
@ -77,7 +77,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("8279ee42a6785f9c2b3dda8d82674e00")
Arrays.asList("7a09b8a6759ccee5da55f1f85a43fe9c")
);
executeTest("testStratifySamplesAndExcludeMonomorphicSites", spec);
}
@ -97,7 +97,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("0bac64d5615f901d3005247c6d016549")
Arrays.asList("f70da7be5d4d8305f3e4433c9004aee4")
);
executeTest("testFundamentalsCountVariantsSNPsandIndels", spec);
}
@ -118,7 +118,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("b84d8b4429116c887ceb5489c8782f00")
Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
}
@ -140,7 +140,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("e4f37642d9113a65fbe8bc1d091c206f")
Arrays.asList("087a2d9943c53e7f49663667c3305c7e")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
}
@ -161,7 +161,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("c5412ee824b4815dc8eea62a4c5462ef")
Arrays.asList("bca988c81a761f12627610e5a3bab5a0")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec);
}
@ -182,7 +182,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("1d42e97643afd3e7f5f8c9f6416c5883")
Arrays.asList("7ca5c0c5e79ba6cd1e5102ced851a1b4")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec);
}
@ -203,7 +203,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("8c2ba70bed2f0fdb0ca371f7038819ef")
Arrays.asList("a6a31f658ad1e76c79190ada758f157c")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec);
}
@ -224,7 +224,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("c912b4b0bf1925d042119b301c183b93")
Arrays.asList("c1a3df6f89f5ddf7b7c296eb944f3fdd")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec);
}
@ -247,7 +247,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("dea3d2cc53265ff8ed2f0030c40f3747")
Arrays.asList("48652b360ce031aa2f9004c9bae6bda5")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec);
}
@ -272,7 +272,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("dede22b15936c38e29b850c805c7b706")
Arrays.asList("c3521b18388aff7f53691a63619b3b07")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec);
}
@ -291,7 +291,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("9a94c4c613bf69feb3d9579c353baaf2")
Arrays.asList("90a46e045f3fe8b22f102acaaeec0201")
);
executeTest("testFundamentalsCountVariantsNoCompRod", spec);
}
@ -304,7 +304,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("8d4530e9cef8531c46bbb693b84d04c7"));
1, Arrays.asList("4b9dcbce0717285e3c0c736c1bed744c"));
executeTestParallel("testSelect1", spec);
}
@ -314,7 +314,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG --eval:VCF3 " + validationDataLocation + vcfFile + " --comp:VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
1,
Arrays.asList("9bbc762f459023af0480774eb2986af4"));
Arrays.asList("810d55b67de592f6375d9dfb282145ef"));
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
}
@ -325,14 +325,14 @@ public class VariantEvalIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s -mvq 0 -noST",
1,
Arrays.asList("ddcabc30c88a755a78100e30e0d491d2"));
Arrays.asList("c56e19d0647d826485d8a3b559d5c56d"));
executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec);
}
@Test
public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("bb076f7239039191fde883c5e68483ea"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("659a15cc842f0310106fa595a26da71d"));
executeTestParallel("testCompVsEvalAC",spec);
}
@ -350,7 +350,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testCompOverlap() {
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals --comp:comphapmap " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf --eval " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("98f9c2f5fef43dbda688d32360908615"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("59ad39e03678011b5f62492fa83ede04"));
executeTestParallel("testCompOverlap",spec);
}
@ -362,7 +362,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --dbsnp " + b37dbSNP132 +
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9d24f34d94d74417e00e3b7bcf84650f"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("112bb3221688acad83f29542bfb33151"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
}
@ -374,7 +374,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("7329b0bc73c9ccaf5facd754f3410c38"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("81dcdde458c1ebb9aa35289ea8f12bc8"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
}
@ -391,13 +391,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -noST -noEV -ST Novelty -EV CompOverlap" +
" -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d0218c5435c8601f2355b7d183ab032f"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("44146b8d4ddbaeb9409c9b88eefe7f40"));
executeTestParallel("testMultipleCompTracks",spec);
}
@Test
public void testPerSampleAndSubsettedSampleHaveSameResults1() {
String md5 = "b5cd5c286d459b8edd4ca54320e560a3";
String md5 = "5f894d726cfaa0b29d7c11ff5bb9b3fd";
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
@ -452,7 +452,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("1198bfea6183bd43219071a84c79a386")
Arrays.asList("0d51321693d4afc262e4059353993d12")
);
executeTest("testAlleleCountStrat", spec);
}
@ -475,7 +475,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("1198bfea6183bd43219071a84c79a386")
Arrays.asList("0d51321693d4afc262e4059353993d12")
);
executeTest("testMultipleEvalTracksAlleleCountWithMerge", spec);
}
@ -518,7 +518,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("6decba040051daafad4ecad5a411e1e1")
Arrays.asList("74fc726760c0fcfe50c44c853756f261")
);
executeTest("testIntervalStrat", spec);
}
@ -535,7 +535,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("aad01b26198b30da5d59a05c08d863bb")
Arrays.asList("f8460af997436a5ce4407fefb0e2724d")
);
executeTest("testModernVCFWithLargeIndels", spec);
}
@ -555,7 +555,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("4fa2557663ef8fb4cdeecd667791985c")
Arrays.asList("7dc2d8983cb7d98b291ca2f60a9151b2")
);
executeTest("testStandardIndelEval", spec);
}
@ -588,7 +588,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
executeTest("testIncludingAC0 keep ac 0 = " + includeAC0, spec);
}
@Test public void testWithAC0() { testIncludingAC0(true, "0ed2c8e4b4e06973a06838bc930a132d"); }
@Test public void testWithoutAC0() { testIncludingAC0(false, "79d28ddd0ab9584776b6cbefe48331df"); }
@Test public void testWithAC0() { testIncludingAC0(true, "c786128cfe4d3e28cdbc15c5c838ad20"); }
@Test public void testWithoutAC0() { testIncludingAC0(false, "7bc505c07d9aee49571ad4b3fc9f7feb"); }
}

View File

@ -25,7 +25,7 @@
package org.broadinstitute.sting.queue.util
import org.broadinstitute.sting.queue.function.QFunction
import org.broadinstitute.sting.gatk.report.{GATKReportTable, GATKReport}
import org.broadinstitute.sting.gatk.report.{GATKReportTableV2, GATKReport}
import org.broadinstitute.sting.utils.exceptions.UserException
import org.broadinstitute.sting.queue.engine.JobRunInfo
import java.io.{PrintStream, File}
@ -65,7 +65,7 @@ trait QJobReport extends Logging {
}
/** The report Group is the analysis name transform to only contain valid GATKReportTable characters */
def getReportGroup = self.analysisName.replaceAll(GATKReportTable.INVALID_TABLE_NAME_REGEX, "_")
def getReportGroup = self.analysisName.replaceAll(GATKReportTableV2.INVALID_TABLE_NAME_REGEX, "_")
def getReportFeatures = reportFeatures
def getReportFeatureNames: Seq[String] = getReportFeatures.keys.toSeq
@ -139,13 +139,12 @@ object QJobReport {
// create a table for each group of logs
for ( (group, groupLogs) <- groupLogs(logs) ) {
report.addTable(group, "Job logs for " + group)
val table: GATKReportTable = report.getTable(group)
table.addPrimaryKey("jobName", false)
val keys = logKeys(groupLogs)
report.addTable(group, "Job logs for " + group, keys.size)
val table: GATKReportTableV2 = report.getTable(group)
// add the columns
keys.foreach(table.addColumn(_, 0))
keys.foreach(table.addColumn(_))
for (log <- groupLogs) {
for ( key <- keys )
table.set(log.getReportName, key, log.getReportFeature(key))

View File

@ -136,7 +136,7 @@ object PipelineTest extends BaseTest with Logging {
println(" value (min,target,max) table key metric")
for (validation <- evalSpec.validations) {
val table = report.getTable(validation.table)
val key = table.getPrimaryKeyByData(validation.table +: validation.key.split('.') : _*)
val key = table.findRowByData(validation.table +: validation.key.split('.') : _*)
val value = String.valueOf(table.get(key, validation.metric))
val inRange = if (value == null) false else validation.inRange(value)
val flag = if (!inRange) "*" else " "

View File

@ -1,5 +1,5 @@
#:GATKReport.v1.0:5
#:GATKTable:true:1:14::;
#:GATKReport.v1.1:5
#:GATKTable:2:14::;
#:GATKTable:Arguments:Recalibration argument collection values used in this run
Argument Value
covariate null
@ -17,7 +17,7 @@ solid_nocall_strategy THROW_EXCEPTION
solid_recal_mode SET_Q_ZERO
standard_covs true
#:GATKTable:true:2:94:::;
#:GATKTable:3:94:::;
#:GATKTable:Quantized:Quality quantization map
QualityScore Count QuantizedScore
0 20 3
@ -115,14 +115,14 @@ QualityScore Count QuantizedScore
92 0 92
93 0 93
#:GATKTable:false:6:3:%s:%s:%.4f:%.4f:%d:%d:;
#:GATKTable:6:3:%s:%s:%.4f:%.4f:%d:%d:;
#:GATKTable:RecalTable0:
ReadGroup EventType EmpiricalQuality EstimatedQReported Observations Errors
exampleBAM.bam.bam D 25.8092 45.0000 380 0
exampleBAM.bam.bam M 14.0483 15.4820 380 14
exampleBAM.bam.bam I 25.8092 45.0000 380 0
#:GATKTable:false:6:32:%s:%s:%s:%.4f:%d:%d:;
#:GATKTable:6:32:%s:%s:%s:%.4f:%d:%d:;
#:GATKTable:RecalTable1:
ReadGroup QualityScore EventType EmpiricalQuality Observations Errors
exampleBAM.bam.bam 32 M 15.1851 32 0
@ -158,7 +158,7 @@ exampleBAM.bam.bam 45 D 25.8092 380
exampleBAM.bam.bam 13 M 6.0206 3 0
exampleBAM.bam.bam 28 M 12.0412 15 0
#:GATKTable:false:8:1354:%s:%s:%s:%s:%s:%.4f:%d:%d:;
#:GATKTable:8:1354:%s:%s:%s:%s:%s:%.4f:%d:%d:;
#:GATKTable:RecalTable2:
ReadGroup QualityScore CovariateValue CovariateName EventType EmpiricalQuality Observations Errors
exampleBAM.bam.bam 45 TGAAAGTG Context D 3.0103 1 0