From a26b04ba17a01db593ed4f314c0052b493fe5793 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 18 May 2012 01:11:26 -0400 Subject: [PATCH] Extensive refactoring of the GATKReports. This was a beast. The practical differences between version 1.0 and this one (v1.1) are: * the underlying data structure now uses arrays instead of hashes, which should drastically reduce the memory overhead required to create large tables. * no more primary keys; you can still create arbitrary IDs to index into rows, but there is no special cased primary key column in the table. * no more dangerous/ugly table operations supported except to increment a cell's value (if an int) or to concatenate 2 tables. Integration tests change because table headers are different. Old classes are still lying around. Will clean those up in a subsequent commit. --- .../sting/gatk/report/GATKReport.java | 109 ++- .../sting/gatk/report/GATKReportColumnV2.java | 146 ++++ .../sting/gatk/report/GATKReportGatherer.java | 2 +- .../sting/gatk/report/GATKReportTableV2.java | 672 ++++++++++++++++++ .../sting/gatk/report/GATKReportVersion.java | 14 +- .../gatk/walkers/bqsr/QuantizationInfo.java | 13 +- .../gatk/walkers/bqsr/RecalDataManager.java | 21 +- .../bqsr/RecalibrationArgumentCollection.java | 27 +- .../walkers/bqsr/RecalibrationReport.java | 89 +-- .../diagnostics/ErrorRatePerCycle.java | 25 +- .../diagnostics/ReadGroupProperties.java | 33 +- .../diagnostics/ReadLengthDistribution.java | 19 +- .../gatk/walkers/diffengine/DiffEngine.java | 21 +- .../diffengine/GATKReportDiffableReader.java | 18 +- .../varianteval/VariantEvalReportWriter.java | 56 +- .../varianteval/VariantEvalWalker.java | 3 +- .../utils/recalibration/QualQuantizer.java | 40 +- .../sting/gatk/report/GATKReportUnitTest.java | 125 ++-- .../walkers/bqsr/BQSRGathererUnitTest.java | 16 +- .../ErrorRatePerCycleIntegrationTest.java | 2 +- .../ReadGroupPropertiesIntegrationTest.java | 2 +- .../VariantEvalIntegrationTest.java | 56 +- .../sting/queue/util/QJobReport.scala | 11 +- .../sting/queue/pipeline/PipelineTest.scala | 2 +- public/testdata/exampleGRP.grp | 12 +- 25 files changed, 1161 insertions(+), 373 deletions(-) create mode 100755 public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnV2.java create mode 100755 public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTableV2.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index f2291e5ec..009137ac7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.TreeMap; /** @@ -38,11 +39,11 @@ import java.util.TreeMap; */ public class GATKReport { public static final String GATKREPORT_HEADER_PREFIX = "#:GATKReport."; - public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_0; + public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_1; private static final String SEPARATOR = ":"; private GATKReportVersion version = LATEST_REPORT_VERSION; - private final TreeMap tables = new TreeMap(); + private final TreeMap tables = new TreeMap(); /** * Create a new, empty GATKReport. @@ -70,10 +71,10 @@ public class GATKReport { /** * Create a new GATK report from GATK report tables - * @param tables Any number of tables that you want ot add to the report + * @param tables Any number of tables that you want to add to the report */ - public GATKReport(GATKReportTable... tables) { - for( GATKReportTable table: tables) + public GATKReport(GATKReportTableV2... tables) { + for( GATKReportTableV2 table: tables) addTable(table); } @@ -103,12 +104,10 @@ public class GATKReport { int nTables = Integer.parseInt(reportHeader.split(":")[2]); - // Read each tables according ot the number of tables + // Read each table according ot the number of tables for (int i = 0; i < nTables; i++) { - addTable(new GATKReportTable(reader, version)); + addTable(new GATKReportTableV2(reader, version)); } - - } /** @@ -116,9 +115,10 @@ public class GATKReport { * * @param tableName the name of the table * @param tableDescription the description of the table + * @param numColumns the number of columns in this table */ - public void addTable(String tableName, String tableDescription) { - addTable(tableName, tableDescription, true); + public void addTable(final String tableName, final String tableDescription, final int numColumns) { + addTable(tableName, tableDescription, numColumns, false); } /** @@ -126,10 +126,11 @@ public class GATKReport { * * @param tableName the name of the table * @param tableDescription the description of the table - * @param sortByPrimaryKey whether to sort the rows by the primary key + * @param numColumns the number of columns in this table + * @param sortByRowID whether to sort the rows by the row ID */ - public void addTable(String tableName, String tableDescription, boolean sortByPrimaryKey) { - GATKReportTable table = new GATKReportTable(tableName, tableDescription, sortByPrimaryKey); + public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID) { + GATKReportTableV2 table = new GATKReportTableV2(tableName, tableDescription, numColumns, sortByRowID); tables.put(tableName, table); } @@ -138,12 +139,12 @@ public class GATKReport { * * @param table the table to add */ - public void addTable(GATKReportTable table) { + public void addTable(GATKReportTableV2 table) { tables.put(table.getTableName(), table); } - public void addTables(List gatkReportTables) { - for (GATKReportTable table : gatkReportTables) + public void addTables(List gatkReportTableV2s) { + for ( GATKReportTableV2 table : gatkReportTableV2s ) addTable(table); } @@ -163,8 +164,8 @@ public class GATKReport { * @param tableName the name of the table * @return the table object */ - public GATKReportTable getTable(String tableName) { - GATKReportTable table = tables.get(tableName); + public GATKReportTableV2 getTable(String tableName) { + GATKReportTableV2 table = tables.get(tableName); if (table == null) throw new ReviewedStingException("Table is not in GATKReport: " + tableName); return table; @@ -177,30 +178,29 @@ public class GATKReport { */ public void print(PrintStream out) { out.println(GATKREPORT_HEADER_PREFIX + getVersion().toString() + SEPARATOR + getTables().size()); - for (GATKReportTable table : tables.values()) + for (GATKReportTableV2 table : tables.values()) table.write(out); } - public Collection getTables() { + public Collection getTables() { return tables.values(); } /** * This is the main function is charge of gathering the reports. It checks that the reports are compatible and then - * calls the table atheirng functions. + * calls the table gathering functions. * * @param input another GATKReport of the same format */ - public void combineWith(GATKReport input) { + public void concat(GATKReport input) { - if (!this.isSameFormat(input)) { + if ( !isSameFormat(input) ) { throw new ReviewedStingException("Failed to combine GATKReport, format doesn't match!"); } - for (String tableName : input.tables.keySet()) { - tables.get(tableName).combineWith(input.getTable(tableName)); + for ( Map.Entry table : tables.entrySet() ) { + table.getValue().concat(input.getTable(table.getKey())); } - } public GATKReportVersion getVersion() { @@ -271,9 +271,8 @@ public class GATKReport { * @param columns The names of the columns in your table * @return a simplified GATK report */ - public static GATKReport newSimpleReport(String tableName, String... columns) { - GATKReportTable table = new GATKReportTable(tableName, "A simplified GATK table report"); - table.addPrimaryKey("id", false); + public static GATKReport newSimpleReport(final String tableName, final String... columns) { + GATKReportTableV2 table = new GATKReportTableV2(tableName, "A simplified GATK table report", columns.length); for (String column : columns) { table.addColumn(column, ""); @@ -289,48 +288,20 @@ public class GATKReport { * This method provides an efficient way to populate a simplified GATK report. This method will only work on reports * that qualify as simplified GATK reports. See the newSimpleReport() constructor for more information. * - * @param values the row of data to be added to the table. + * @param values the row of data to be added to the table. * Note: the number of arguments must match the columns in the table. */ - public void addRow(Object... values) { - // Must be a simplified GATK Report - if (isSimpleReport()) { + public void addRow(final Object... values) { + // Must be a simple report + if ( tables.size() != 1 ) + throw new ReviewedStingException("Cannot write a row to a complex GATK Report"); - GATKReportTable table = tables.firstEntry().getValue(); - if (table.getColumns().size() != values.length) { - throw new StingException("The number of arguments in addRow() must match the number of columns in the table"); - } - - int counter = table.getNumRows() + 1; - int i = 0; - - for (String columnName : table.getColumns().keySet()) { - table.set(counter, columnName, values[i]); - i++; - } - - } else { - throw new StingException("Cannot add a Row to a non-Simplified GATK Report"); - } - - - } - - /** - * Checks if the GATK report qualifies as a "simple" GATK report - * - * @return true is the report is a simplified GATK report - */ - private boolean isSimpleReport() { - if (tables.size() != 1) - return false; - - GATKReportTable table = tables.firstEntry().getValue(); - - if (!table.getPrimaryKeyName().equals("id")) - return false; - - return true; + GATKReportTableV2 table = tables.firstEntry().getValue(); + if ( table.getNumColumns() != values.length ) + throw new ReviewedStingException("The number of arguments in writeRow() must match the number of columns in the table"); + final int rowIndex = table.getNumRows(); + for ( int i = 0; i < values.length; i++ ) + table.set(rowIndex, i, values[i]); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnV2.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnV2.java new file mode 100755 index 000000000..d7200fd56 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnV2.java @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.report; + +import org.apache.commons.lang.math.NumberUtils; + +import java.util.Arrays; +import java.util.Collection; + +/** + * column information within a GATK report table + */ +public class GATKReportColumnV2 { + final private String columnName; + final private String format; + final private GATKReportDataType dataType; + + private GATKReportColumnFormat columnFormat; + private GATKReportColumnFormat.Alignment alignment = GATKReportColumnFormat.Alignment.RIGHT; // default alignment is to the right unless values added ask for a left alignment + private int maxWidth = 0; + + /** + * Construct the column object, specifying the column name, default value, whether or not the column should be + * displayed, and the format string. This cannot be null. + * + * @param columnName the name of the column + * @param format format string + */ + public GATKReportColumnV2(final String columnName, final String format) { + this.columnName = columnName; + this.maxWidth = columnName.length(); + if ( format.equals("") ) { + this.format = "%s"; + this.dataType = GATKReportDataType.Unknown; + } + else { + this.format = format; + this.dataType = GATKReportDataType.fromFormatString(format); + } + } + + /** + * Get the display width for this column. This allows the entire column to be displayed with the appropriate, fixed + * width. + * + * @return the format string for this column + */ + public GATKReportColumnFormat getColumnFormat() { + if (columnFormat != null) + return columnFormat; + + columnFormat = new GATKReportColumnFormat(maxWidth, alignment); + return columnFormat; + } + + private static final Collection RIGHT_ALIGN_STRINGS = Arrays.asList( + "null", + "NA", + String.valueOf(Double.POSITIVE_INFINITY), + String.valueOf(Double.NEGATIVE_INFINITY), + String.valueOf(Double.NaN)); + + /** + * Check if the value can be right aligned. Does not trim the values before checking if numeric since it assumes + * the spaces mean that the value is already padded. + * + * @param value to check + * @return true if the value is a right alignable + */ + protected static boolean isRightAlign(final String value) { + return value == null || RIGHT_ALIGN_STRINGS.contains(value) || NumberUtils.isNumber(value.trim()); + } + + /** + * Returns a string version of the values. + * + * @param obj The object to convert to a string + * @return The string representation of the column + */ + private String formatValue(final Object obj) { + String value; + if (obj == null) { + value = "null"; + } + else if ( dataType.equals(GATKReportDataType.Unknown) && (obj instanceof Double || obj instanceof Float) ) { + value = String.format("%.8f", obj); + } + else + value = String.format(format, obj); + + return value; + } + + public GATKReportDataType getDataType() { + return dataType; + } + + public String getColumnName() { + return columnName; + } + + public String getFormat() { + return dataType.equals(GATKReportDataType.Unknown) ? "%s" : format; + } + + public void updateFormatting(final Object value) { + if (value != null) { + final String formatted = formatValue(value); + if ( formatted.length() > 0 ) { + updateMaxWidth(formatted); + updateFormat(formatted); + } + } + } + + private void updateMaxWidth(final String formatted) { + maxWidth = Math.max(formatted.length(), maxWidth); + } + + private void updateFormat(final String formatted) { + if (alignment == GATKReportColumnFormat.Alignment.RIGHT) + alignment = isRightAlign(formatted) ? GATKReportColumnFormat.Alignment.RIGHT : GATKReportColumnFormat.Alignment.LEFT; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java index ff1f9b90c..04030c4ad 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java @@ -54,7 +54,7 @@ public class GATKReportGatherer extends Gatherer { isFirst = false; } else { GATKReport toAdd = new GATKReport(input); - current.combineWith(toAdd); + current.concat(toAdd); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTableV2.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTableV2.java new file mode 100755 index 000000000..d00a45765 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTableV2.java @@ -0,0 +1,672 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.report; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.PrintStream; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class GATKReportTableV2 { + /** + * REGEX that matches any table with an invalid name + */ + public static final String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]"; + private static final String GATKTABLE_HEADER_PREFIX = "#:GATKTable"; + private static final String SEPARATOR = ":"; + private static final String ENDLINE = ":;"; + + private final String tableName; + private final String tableDescription; + + private final boolean sortByRowID; + + private List underlyingData; + private final List columnInfo; + private final Map columnNameToIndex; + private final HashMap rowIdToIndex; + + private static final String COULD_NOT_READ_HEADER = "Could not read the header of this file -- "; + private static final String COULD_NOT_READ_COLUMN_NAMES = "Could not read the column names of this file -- "; + private static final String COULD_NOT_READ_DATA_LINE = "Could not read a data line of this table -- "; + private static final String COULD_NOT_READ_EMPTY_LINE = "Could not read the last empty line of this table -- "; + private static final String OLD_GATK_TABLE_VERSION = "We no longer support older versions of the GATK Tables"; + + private static final int INITITAL_ARRAY_SIZE = 10000; + private static final String NUMBER_CONVERSION_EXCEPTION = "String is a number but is not a long or a double: "; + + protected enum TableDataHeaderFields { + COLS(2), + ROWS(3), + FORMAT_START(4); + + private final int index; + TableDataHeaderFields(int index) { this.index = index; } + public int index() { return index; } + } + + protected enum TableNameHeaderFields { + NAME(2), + DESCRIPTION(3); + + private final int index; + TableNameHeaderFields(int index) { this.index = index; } + public int index() { return index; } + } + + public GATKReportTableV2(BufferedReader reader, GATKReportVersion version) { + + switch ( version ) { + case V1_1: + // read in the header lines + final String[] tableData, tableNameData; + try { + tableData = reader.readLine().split(SEPARATOR); + tableNameData = reader.readLine().split(SEPARATOR); + } catch (IOException e) { + throw new ReviewedStingException(COULD_NOT_READ_HEADER + e.getMessage()); + } + + // parse the header fields + tableName = tableNameData[TableNameHeaderFields.NAME.index()]; + tableDescription = (tableNameData.length <= TableNameHeaderFields.DESCRIPTION.index()) ? "" : tableNameData[TableNameHeaderFields.DESCRIPTION.index()]; // table may have no description! (and that's okay) + + // when reading from a file, we do not re-sort the rows + sortByRowID = false; + + // initialize the data + final int nColumns = Integer.parseInt(tableData[TableDataHeaderFields.COLS.index()]); + final int nRows = Integer.parseInt(tableData[TableDataHeaderFields.ROWS.index()]); + underlyingData = new ArrayList(nRows); + columnInfo = new ArrayList(nColumns); + columnNameToIndex = new HashMap(nColumns); + + // when reading from a file, the row ID mapping is just the index + rowIdToIndex = new HashMap(); + for ( int i = 0; i < nRows; i++ ) + rowIdToIndex.put(i, i); + + // read the column names + final String columnLine; + try { + columnLine = reader.readLine(); + } catch (IOException e) { + throw new ReviewedStingException(COULD_NOT_READ_COLUMN_NAMES); + } + + final List columnStarts = TextFormattingUtils.getWordStarts(columnLine); + final String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts); + + // Put in columns using the format string from the header + for ( int i = 0; i < nColumns; i++ ) { + final String format = tableData[TableDataHeaderFields.FORMAT_START.index() + i]; + addColumn(columnNames[i], format); + } + + // fill in the table + try { + for ( int i = 0; i < nRows; i++ ) { + // read a data line + final String dataLine = reader.readLine(); + final List lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(dataLine, columnStarts)); + + underlyingData.add(new Object[nColumns]); + for ( int columnIndex = 0; columnIndex < nColumns; columnIndex++ ) { + + final GATKReportDataType type = columnInfo.get(columnIndex).getDataType(); + final String columnName = columnNames[columnIndex]; + set(i, columnName, type.Parse(lineSplits.get(columnIndex))); + + } + } + } catch (IOException e) { + throw new ReviewedStingException(COULD_NOT_READ_DATA_LINE + e.getMessage()); + } + + try { + reader.readLine(); + } catch (IOException e) { + throw new ReviewedStingException(COULD_NOT_READ_EMPTY_LINE + e.getMessage()); + } + break; + + default: + throw new ReviewedStingException(OLD_GATK_TABLE_VERSION); + } + } + + /** + * Construct a new GATK report table with the specified name and description + * + * @param tableName the name of the table + * @param tableDescription the description of the table + * @param numColumns the number of columns in this table + */ + public GATKReportTableV2(final String tableName, final String tableDescription, final int numColumns) { + this(tableName, tableDescription, numColumns, true); + } + + /** + * Construct a new GATK report table with the specified name and description and whether to sort rows by the row ID. + * + * @param tableName the name of the table + * @param tableDescription the description of the table + * @param numColumns the number of columns in this table + * @param sortByRowID whether to sort rows by the row ID (instead of the order in which they were added) + */ + public GATKReportTableV2(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID) { + if ( !isValidName(tableName) ) { + throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed."); + } + + if ( !isValidDescription(tableDescription) ) { + throw new ReviewedStingException("Attempted to set a GATKReportTable description of '" + tableDescription + "'. GATKReportTable descriptions must not contain newlines."); + } + + this.tableName = tableName; + this.tableDescription = tableDescription; + this.sortByRowID = sortByRowID; + + underlyingData = new ArrayList(INITITAL_ARRAY_SIZE); + columnInfo = new ArrayList(numColumns); + columnNameToIndex = new HashMap(numColumns); + rowIdToIndex = new HashMap(); + } + + /** + * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed + * + * @param name the name of the table or column + * @return true if the name is valid, false if otherwise + */ + private boolean isValidName(String name) { + Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX); + Matcher m = p.matcher(name); + + return !m.find(); + } + + /** + * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed + * + * @param description the name of the table or column + * @return true if the name is valid, false if otherwise + */ + private boolean isValidDescription(String description) { + Pattern p = Pattern.compile("\\r|\\n"); + Matcher m = p.matcher(description); + + return !m.find(); + } + + /** + * Add a mapping from ID to the index of a new row added to the table. + * + * @param ID the unique ID + */ + public void addRowID(final String ID) { + addRowID(ID, false); + } + + /** + * Add a mapping from ID to the index of a new row added to the table. + * + * @param ID the unique ID + * @param populateFirstColumn should we automatically populate the first column with the row's ID? + */ + public void addRowID(final String ID, final boolean populateFirstColumn) { + addRowIDMapping(ID, underlyingData.size(), populateFirstColumn); + } + + /** + * Add a mapping from ID to row index. + * + * @param ID the unique ID + * @param index the index associated with the ID + */ + public void addRowIDMapping(final String ID, final int index) { + addRowIDMapping(ID, index, false); + } + + /** + * Add a mapping from ID to row index. + * + * @param ID the unique ID + * @param index the index associated with the ID + * @param populateFirstColumn should we automatically populate the first column with the row's ID? + */ + public void addRowIDMapping(final Object ID, final int index, final boolean populateFirstColumn) { + if ( populateFirstColumn && !isValidName(ID.toString()) ) + throw new ReviewedStingException("Attempted to set a GATKReportTable ID of '" + ID + "'; GATKReportTable IDs must be purely alphanumeric - no spaces or special characters are allowed."); + + expandTo(index, false); + rowIdToIndex.put(ID, index); + + if ( populateFirstColumn ) + set(index, 0, ID); + } + + /** + * Add a column to the report and specify the default value that should be supplied if a given position in the table + * is never explicitly set. + * + * @param columnName the name of the column + */ + public void addColumn(String columnName) { + addColumn(columnName, ""); + } + + /** + * Add a column to the report, specify the default column value, whether the column should be displayed in the final + * output (useful when intermediate columns are necessary for later calculations, but are not required to be in the + * output file), and the format string used to display the data. + * + * @param columnName the name of the column + * @param format the format string used to display data + */ + public void addColumn(String columnName, String format) { + if (!isValidName(columnName)) { + throw new ReviewedStingException("Attempted to set a GATKReportTable column name of '" + columnName + "'. GATKReportTable column names must be purely alphanumeric - no spaces or special characters are allowed."); + } + columnNameToIndex.put(columnName, columnInfo.size()); + columnInfo.add(new GATKReportColumnV2(columnName, format)); + } + + /** + * Check if the requested cell is valid and expand the table if necessary + * + * @param rowIndex the row index + * @param colIndex the column index + */ + private void verifyEntry(final int rowIndex, final int colIndex) { + if ( rowIndex < 0 || colIndex < 0 || colIndex >= getNumColumns() ) + throw new ReviewedStingException("attempted to access a cell that does not exist in table '" + tableName + "'"); + } + + /** + * Set the value for a given position in the table + * + * @param rowIndex the row index + * @param updateRowIdMap should we update the row ID map? + */ + private void expandTo(final int rowIndex, final boolean updateRowIdMap) { + int currentSize = underlyingData.size(); + if ( rowIndex >= currentSize ) { + final int numNewRows = rowIndex - currentSize + 1; + for ( int i = 0; i < numNewRows; i++ ) { + if ( updateRowIdMap ) + rowIdToIndex.put(currentSize, currentSize); + underlyingData.add(new Object[getNumColumns()]); + currentSize++; + } + } + } + + /** + * Set the value for a given position in the table + * + * @param rowID the row ID + * @param columnName the name of the column + * @param value the value to set + */ + public void set(final Object rowID, final String columnName, final Object value) { + if ( !rowIdToIndex.containsKey(rowID) ) { + rowIdToIndex.put(rowID, underlyingData.size()); + expandTo(underlyingData.size(), false); + } + set(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName), value); + } + + public void set(final int rowIndex, final int colIndex, Object value) { + expandTo(rowIndex, true); + verifyEntry(rowIndex, colIndex); + GATKReportColumnV2 column = columnInfo.get(colIndex); + + // We do not accept internal null values + if (value == null) + value = "null"; + else + value = fixType(value, column); + + if ( column.getDataType().equals(GATKReportDataType.fromObject(value)) || column.getDataType().equals(GATKReportDataType.Unknown) ) { + underlyingData.get(rowIndex)[colIndex] = value; + column.updateFormatting(value); + } else { + throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", GATKReportDataType.fromObject(value).name(), column.getDataType().name())); + } + } + + /** + * Returns true if the table contains a row mapping with the given ID + * + * @param rowID the row ID + */ + public boolean containsRowID(final Object rowID) { + return rowIdToIndex.containsKey(rowID); + } + + /** + * Returns the row mapping IDs + * + */ + public Collection getRowIDs() { + return rowIdToIndex.keySet(); + } + + /** + * Set the value for a given position in the table + * + * @param rowID the row ID + * @param columnName the name of the column + */ + public void increment(final Object rowID, final String columnName) { + int prevValue; + if ( !rowIdToIndex.containsKey(rowID) ) { + rowIdToIndex.put(rowID, underlyingData.size()); + underlyingData.add(new Object[getNumColumns()]); + prevValue = 0; + } else { + Object obj = get(rowID, columnName); + if ( !(obj instanceof Integer) ) + throw new ReviewedStingException("Attempting to increment a value in a cell that is not an integer"); + prevValue = (Integer)obj; + } + + set(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName), prevValue + 1); + } + + /** + * Returns the index of the first row matching the column values. + * Ex: "CountVariants", "dbsnp", "eval", "called", "all", "novel", "all" + * + * @param columnValues column values. + * @return The index of the first row matching the column values or -1 if no such row exists. + */ + public int findRowByData(final Object... columnValues) { + if ( columnValues == null || columnValues.length == 0 || columnValues.length > getNumColumns() ) + return -1; + + for ( int rowIndex = 0; rowIndex < underlyingData.size(); rowIndex++ ) { + + final Object[] row = underlyingData.get(rowIndex); + + boolean matches = true; + for ( int colIndex = 0; colIndex < columnValues.length; colIndex++ ) { + if ( !columnValues[colIndex].equals(row[colIndex]) ) { + matches = false; + break; + } + } + + if ( matches ) + return rowIndex; + } + + return -1; + } + + private Object fixType(final Object value, final GATKReportColumnV2 column) { + // Below is some code to convert a string into its appropriate type. + + // todo -- Types have to be more flexible. For example, %d should accept Integers, Shorts and Bytes. + + Object newValue = null; + if ( value instanceof String && !column.getDataType().equals(GATKReportDataType.String) ) { + // Integer case + if ( column.getDataType().equals(GATKReportDataType.Integer) ) { + try { + newValue = Long.parseLong((String) value); + } catch (Exception e) { + /** do nothing */ + } + } + if ( column.getDataType().equals(GATKReportDataType.Decimal) ) { + try { + newValue = Double.parseDouble((String) value); + } catch (Exception e) { + /** do nothing */ + } + } + if ( column.getDataType().equals(GATKReportDataType.Character) && ((String) value).length() == 1 ) { + newValue = ((String) value).charAt(0); + } + } + + return (newValue != null) ? newValue : value; + } + + /** + * Get a value from the given position in the table + * + * @param rowID the row ID + * @param columnName the name of the column + * @return the value stored at the specified position in the table + */ + public Object get(final Object rowID, final String columnName) { + return get(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName)); + } + + /** + * Get a value from the given position in the table + * + * @param rowIndex the index of the row + * @param columnIndex the index of the column + * @return the value stored at the specified position in the table + */ + public Object get(int rowIndex, int columnIndex) { + verifyEntry(rowIndex, columnIndex); + return underlyingData.get(rowIndex)[columnIndex]; + } + + /** + * Write the table to the PrintStream, formatted nicely to be human-readable, AWK-able, and R-friendly. + * + * @param out the PrintStream to which the table should be written + */ + void write(final PrintStream out) { + + /* + * Table header: + * #:GATKTable:nColumns:nRows:(DataType for each column):; + * #:GATKTable:TableName:Description :; + * key colA colB + * row1 xxxx xxxxx + */ + + // write the table definition + out.printf(GATKTABLE_HEADER_PREFIX + ":%d:%d", getNumColumns(), getNumRows()); + + // write the formats for all the columns + for ( final GATKReportColumnV2 column : columnInfo ) + out.print(SEPARATOR + column.getFormat()); + out.println(ENDLINE); + + // write the table name & description + out.printf(GATKTABLE_HEADER_PREFIX + ":%s:%s\n", tableName, tableDescription); + + // write the column names + boolean needsPadding = false; + for ( final GATKReportColumnV2 column : columnInfo ) { + if ( needsPadding ) + out.printf(" "); + needsPadding = true; + + out.printf(column.getColumnFormat().getNameFormat(), column.getColumnName()); + } + out.println(); + + // write the table body + if ( sortByRowID ) { + final TreeMap sortedMap; + try { + sortedMap = new TreeMap(rowIdToIndex); + } catch (ClassCastException e) { + throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types"); + } + for ( final Map.Entry rowKey : sortedMap.entrySet() ) + writeRow(out, underlyingData.get(rowKey.getValue())); + } else { + for ( final Object[] row : underlyingData ) + writeRow(out, row); + } + + out.println(); + } + + private void writeRow(final PrintStream out, final Object[] row) { + boolean needsPadding = false; + for ( int i = 0; i < row.length; i++ ) { + if ( needsPadding ) + out.printf(" "); + needsPadding = true; + + final Object obj = row[i]; + final String value; + + final GATKReportColumnV2 info = columnInfo.get(i); + + if ( obj == null ) + value = "null"; + else if ( info.getDataType().equals(GATKReportDataType.Unknown) && (obj instanceof Double || obj instanceof Float) ) + value = String.format("%.8f", obj); + else + value = String.format(info.getFormat(), obj); + + out.printf(info.getColumnFormat().getValueFormat(), value); + } + + out.println(); + } + + public int getNumRows() { + return underlyingData.size(); + } + + public int getNumColumns() { + return columnInfo.size(); + } + + public List getColumnInfo() { + return columnInfo; + } + + public String getTableName() { + return tableName; + } + + public String getTableDescription() { + return tableDescription; + } + + /** + * Concatenates the rows from the table to this one + * + * @param table another GATK table + */ + public void concat(final GATKReportTableV2 table) { + if ( !isSameFormat(table) ) + throw new ReviewedStingException("Error trying to concatenate tables with different formats"); + + // add the data + underlyingData.addAll(table.underlyingData); + + // update the row index map + final int currentNumRows = getNumRows(); + for ( Map.Entry entry : table.rowIdToIndex.entrySet() ) + rowIdToIndex.put(entry.getKey(), entry.getValue() + currentNumRows); + } + + /** + * Returns whether or not the two tables have the same format including columns and everything in between. This does + * not check if the data inside is the same. This is the check to see if the two tables are gatherable or + * reduceable + * + * @param table another GATK table + * @return true if the the tables are gatherable + */ + public boolean isSameFormat(final GATKReportTableV2 table) { + if ( !tableName.equals(table.tableName) || + !tableDescription.equals(table.tableDescription) || + columnInfo.size() != table.columnInfo.size() ) + return false; + + for ( int i = 0; i < columnInfo.size(); i++ ) { + if ( !columnInfo.get(i).getFormat().equals(table.columnInfo.get(i).getFormat()) || + !columnInfo.get(i).getColumnName().equals(table.columnInfo.get(i).getColumnName()) ) + return false; + } + + return true; + } + + /** + * Checks that the tables are exactly the same. + * + * @param table another GATK report + * @return true if all field in the reports, tables, and columns are equal. + */ + public boolean equals(final GATKReportTableV2 table) { + if ( !isSameFormat(table) || + underlyingData.size() != table.underlyingData.size() ) + return false; + + final List myOrderedRows = getOrderedRows(); + final List otherOrderedRows = table.getOrderedRows(); + + for ( int i = 0; i < underlyingData.size(); i++ ) { + final Object[] myData = myOrderedRows.get(i); + final Object[] otherData = otherOrderedRows.get(i); + for ( int j = 0; j < myData.length; j++ ) { + if ( !myData[j].toString().equals(otherData[j].toString()) ) // need to deal with different typing (e.g. Long vs. Integer) + return false; + } + } + + return true; + } + + private List getOrderedRows() { + if ( !sortByRowID ) + return underlyingData; + + final TreeMap sortedMap; + try { + sortedMap = new TreeMap(rowIdToIndex); + } catch (ClassCastException e) { + return underlyingData; + } + + final List orderedData = new ArrayList(underlyingData.size()); + for ( final int rowKey : sortedMap.values() ) + orderedData.add(underlyingData.get(rowKey)); + + return orderedData; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java index 99381cc21..b5a5e0443 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java @@ -45,10 +45,17 @@ public enum GATKReportVersion { /* * Differences between v0.x * - Added table and report headers - * - Headers changed format, include the numbe rof tables, rows, and metadata for gathering + * - Headers changed format, include the number of tables, rows, and metadata for gathering * - IS GATHERABLE */ - V1_0("v1.0"); + V1_0("v1.0"), + + /* + * Differences between v1.0 + * - column numbers in header reflect the actual count of columns + * - primary keys are never displayed + */ + V1_1("v1.1"); private final String versionString; @@ -81,6 +88,9 @@ public enum GATKReportVersion { if (header.startsWith("#:GATKReport.v1.0")) return GATKReportVersion.V1_0; + if (header.startsWith("#:GATKReport.v1.1")) + return GATKReportVersion.V1_1; + throw new ReviewedStingException("Unknown GATK report version in header: " + header); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java index 9c91a1874..50a5a3a30 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.report.GATKReportTableV2; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.recalibration.QualQuantizer; @@ -77,13 +77,14 @@ public class QuantizationInfo { return quantizationLevels; } - public GATKReportTable generateReportTable() { - GATKReportTable quantizedTable = new GATKReportTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map"); - quantizedTable.addPrimaryKey(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); - quantizedTable.addColumn(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, 0L); - quantizedTable.addColumn(RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, (byte) 0); + public GATKReportTableV2 generateReportTable() { + GATKReportTableV2 quantizedTable = new GATKReportTableV2(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); + quantizedTable.addColumn(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + quantizedTable.addColumn(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); + quantizedTable.addColumn(RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) { + quantizedTable.set(qual, RecalDataManager.QUALITY_SCORE_COLUMN_NAME, qual); quantizedTable.set(qual, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual)); quantizedTable.set(qual, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java index 53e7c3f35..d0d4cb96f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.report.GATKReport; -import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.report.GATKReportTableV2; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; @@ -223,8 +223,8 @@ public class RecalDataManager { logger.info(""); } - private static List generateReportTables(Map> keysAndTablesMap) { - List result = new LinkedList(); + private static List generateReportTables(Map> keysAndTablesMap) { + List result = new LinkedList(); int tableIndex = 0; final Pair covariateValue = new Pair(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s"); @@ -240,7 +240,6 @@ public class RecalDataManager { Map recalTable = entry.getValue(); boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs. - GATKReportTable reportTable = new GATKReportTable("RecalTable" + tableIndex++, ""); List requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table List optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table @@ -264,11 +263,11 @@ public class RecalDataManager { columnNames.add(nObservations); columnNames.add(nErrors); - reportTable.addPrimaryKey("PrimaryKey", false); // every table must have a primary key (hidden) + GATKReportTableV2 reportTable = new GATKReportTableV2("RecalTable" + tableIndex++, "", columnNames.size()); for (Pair columnName : columnNames) - reportTable.addColumn(columnName.getFirst(), true, columnName.getSecond()); // every table must have the event type + reportTable.addColumn(columnName.getFirst(), columnName.getSecond()); // every table must have the event type - long primaryKey = 0L; + int rowIndex = 0; for (Map.Entry recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys BitSet bitSetKey = recalTableEntry.getKey(); @@ -288,9 +287,9 @@ public class RecalDataManager { for (Map.Entry dataEntry : columnData.entrySet()) { String columnName = dataEntry.getKey(); Object value = dataEntry.getValue(); - reportTable.set(primaryKey, columnName, value.toString()); + reportTable.set(rowIndex, columnName, value.toString()); } - primaryKey++; + rowIndex++; } result.add(reportTable); } @@ -301,11 +300,11 @@ public class RecalDataManager { outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile); } - public static void outputRecalibrationReport(GATKReportTable argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap> keysAndTablesMap, PrintStream outputFile) { + public static void outputRecalibrationReport(GATKReportTableV2 argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap> keysAndTablesMap, PrintStream outputFile) { outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile); } - private static void outputRecalibrationReport(GATKReportTable argumentTable, GATKReportTable quantizationTable, List recalTables, PrintStream outputFile) { + private static void outputRecalibrationReport(GATKReportTableV2 argumentTable, GATKReportTableV2 quantizationTable, List recalTables, PrintStream outputFile) { GATKReport report = new GATKReport(); report.addTable(argumentTable); report.addTable(quantizationTable); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 598312916..863796a67 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.report.GATKReportTableV2; import org.broadinstitute.sting.utils.Utils; import java.io.File; @@ -172,26 +172,43 @@ public class RecalibrationArgumentCollection { public File recalibrationReport = null; - public GATKReportTable generateReportTable() { - GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run"); - argumentsTable.addPrimaryKey("Argument"); - argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, "null"); + public GATKReportTableV2 generateReportTable() { + GATKReportTableV2 argumentsTable = new GATKReportTableV2("Arguments", "Recalibration argument collection values used in this run", 2); + argumentsTable.addColumn("Argument"); + argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + argumentsTable.addRowID("covariate", true); argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES)); + argumentsTable.addRowID("standard_covs", true); argumentsTable.set("standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, USE_STANDARD_COVARIATES); + argumentsTable.addRowID("run_without_dbsnp", true); argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP); + argumentsTable.addRowID("solid_recal_mode", true); argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE); + argumentsTable.addRowID("solid_nocall_strategy", true); argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY); + argumentsTable.addRowID("mismatches_context_size", true); argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE); + argumentsTable.addRowID("insertions_context_size", true); argumentsTable.set("insertions_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_CONTEXT_SIZE); + argumentsTable.addRowID("deletions_context_size", true); argumentsTable.set("deletions_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DELETIONS_CONTEXT_SIZE); + argumentsTable.addRowID("mismatches_default_quality", true); argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY); + argumentsTable.addRowID("insertions_default_quality", true); argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY); + argumentsTable.addRowID("low_quality_tail", true); argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL); + argumentsTable.addRowID("default_platform", true); argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM); + argumentsTable.addRowID("force_platform", true); argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM); + argumentsTable.addRowID("quantizing_levels", true); argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS); + argumentsTable.addRowID("keep_intermediate_files", true); argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES); + argumentsTable.addRowID("no_plots", true); argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS); + argumentsTable.addRowID("recalibration_report", true); argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath()); return argumentsTable; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java index febbc1280..a1945b5f6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.gatk.report.GATKReport; -import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.report.GATKReportTableV2; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -21,7 +21,7 @@ public class RecalibrationReport { private final LinkedHashMap> keysAndTablesMap; // quick access reference to the read group table and its key manager private final ArrayList requestedCovariates = new ArrayList(); // list of all covariates to be used in this calculation - private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes + private final GATKReportTableV2 argumentTable; // keep the argument table untouched just for output purposes private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter public RecalibrationReport(final File RECAL_FILE) { @@ -30,7 +30,7 @@ public class RecalibrationReport { argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE); RAC = initializeArgumentCollectionTable(argumentTable); - GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE); + GATKReportTableV2 quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE); quantizationInfo = initializeQuantizationTable(quantizedTable); Pair, ArrayList> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates @@ -53,11 +53,11 @@ public class RecalibrationReport { int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES) final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check."; if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table - final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE); + final GATKReportTableV2 reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE); table = parseReadGroupTable(keyManager, reportTable); } else if (nRequiredCovariates == 2 && optionalCovariatesToAdd.isEmpty()) { // when we have both required covariates and no optional covariates we're at the QUAL table - final GATKReportTable reportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE); + final GATKReportTableV2 reportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE); table = parseQualityScoreTable(keyManager, reportTable); } else @@ -68,12 +68,12 @@ public class RecalibrationReport { final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager - final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE); + final GATKReportTableV2 reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE); final Map table = parseAllCovariatesTable(keyManager, reportTable); keysAndTablesMap.put(keyManager, table); } - protected RecalibrationReport(QuantizationInfo quantizationInfo, LinkedHashMap> keysAndTablesMap, GATKReportTable argumentTable, RecalibrationArgumentCollection RAC) { + protected RecalibrationReport(QuantizationInfo quantizationInfo, LinkedHashMap> keysAndTablesMap, GATKReportTableV2 argumentTable, RecalibrationArgumentCollection RAC) { this.quantizationInfo = quantizationInfo; this.keysAndTablesMap = keysAndTablesMap; this.argumentTable = argumentTable; @@ -138,7 +138,7 @@ public class RecalibrationReport { * @param reportTable the GATKReport table containing data for this table * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private Map parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { + private Map parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTableV2 reportTable) { ArrayList columnNamesOrderedList = new ArrayList(5); columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); @@ -155,7 +155,7 @@ public class RecalibrationReport { * @param reportTable the GATKReport table containing data for this table * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private Map parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { + private Map parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTableV2 reportTable) { ArrayList columnNamesOrderedList = new ArrayList(3); columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); @@ -170,7 +170,7 @@ public class RecalibrationReport { * @param reportTable the GATKReport table containing data for this table * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private Map parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { + private Map parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTableV2 reportTable) { ArrayList columnNamesOrderedList = new ArrayList(2); columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME); @@ -185,24 +185,24 @@ public class RecalibrationReport { * @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private Map genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList columnNamesOrderedList, boolean hasEstimatedQReportedColumn) { + private Map genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTableV2 reportTable, ArrayList columnNamesOrderedList, boolean hasEstimatedQReportedColumn) { Map result = new HashMap(reportTable.getNumRows()*2); - for (Object primaryKey : reportTable.getPrimaryKeys()) { + for ( int i = 0; i < reportTable.getNumRows(); i++ ) { int nKeys = columnNamesOrderedList.size(); Object [] keySet = new Object[nKeys]; - for (int i = 0; i < nKeys; i++) - keySet[i] = reportTable.get(primaryKey, columnNamesOrderedList.get(i)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below) + for (int j = 0; j < nKeys; j++) + keySet[j] = reportTable.get(i, columnNamesOrderedList.get(j)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below) keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager). BitSet bitKey = keyManager.bitSetFromKey(keySet); - long nObservations = (Long) reportTable.get(primaryKey, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); - long nErrors = (Long) reportTable.get(primaryKey, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); - double empiricalQuality = (Double) reportTable.get(primaryKey, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME); + long nObservations = (Long) reportTable.get(i, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); + long nErrors = (Long) reportTable.get(i, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); + double empiricalQuality = (Double) reportTable.get(i, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME); double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table - (Double) reportTable.get(primaryKey, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table - Byte.parseByte((String) reportTable.get(primaryKey, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table + (Double) reportTable.get(i, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table + Byte.parseByte((String) reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table RecalDatum recalDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality); result.put(bitKey, recalDatum); @@ -216,13 +216,13 @@ public class RecalibrationReport { * @param table the GATKReportTable containing the quantization mappings * @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE */ - private QuantizationInfo initializeQuantizationTable(GATKReportTable table) { + private QuantizationInfo initializeQuantizationTable(GATKReportTableV2 table) { Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1]; Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1]; - for (Object primaryKey : table.getPrimaryKeys()) { - Object quantizedObject = table.get(primaryKey, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); - Object countObject = table.get(primaryKey, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); - byte originalQual = Byte.parseByte(primaryKey.toString()); + for ( int i = 0; i < table.getNumRows(); i++ ) { + byte originalQual = (byte)i; + Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); + Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); byte quantizedQual = Byte.parseByte(quantizedObject.toString()); long quantizedCount = Long.parseLong(countObject.toString()); quals[originalQual] = quantizedQual; @@ -237,63 +237,64 @@ public class RecalibrationReport { * @param table the GATKReportTable containing the arguments and its corresponding values * @return a RAC object properly initialized with all the objects in the table */ - private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) { + private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTableV2 table) { RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); - for (Object primaryKey : table.getPrimaryKeys()) { - Object value = table.get(primaryKey, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + for ( int i = 0; i < table.getNumRows(); i++ ) { + final String argument = table.get(i, "Argument").toString(); + Object value = table.get(i, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); if (value.equals("null")) value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport - if (primaryKey.equals("covariate") && value != null) + if (argument.equals("covariate") && value != null) RAC.COVARIATES = value.toString().split(","); - else if (primaryKey.equals("standard_covs")) + else if (argument.equals("standard_covs")) RAC.USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value); - else if (primaryKey.equals("solid_recal_mode")) + else if (argument.equals("solid_recal_mode")) RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value); - else if (primaryKey.equals("solid_nocall_strategy")) + else if (argument.equals("solid_nocall_strategy")) RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value); - else if (primaryKey.equals("mismatches_context_size")) + else if (argument.equals("mismatches_context_size")) RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value); - else if (primaryKey.equals("insertions_context_size")) + else if (argument.equals("insertions_context_size")) RAC.INSERTIONS_CONTEXT_SIZE = Integer.parseInt((String) value); - else if (primaryKey.equals("deletions_context_size")) + else if (argument.equals("deletions_context_size")) RAC.DELETIONS_CONTEXT_SIZE = Integer.parseInt((String) value); - else if (primaryKey.equals("mismatches_default_quality")) + else if (argument.equals("mismatches_default_quality")) RAC.MISMATCHES_DEFAULT_QUALITY = Byte.parseByte((String) value); - else if (primaryKey.equals("insertions_default_quality")) + else if (argument.equals("insertions_default_quality")) RAC.INSERTIONS_DEFAULT_QUALITY = Byte.parseByte((String) value); - else if (primaryKey.equals("deletions_default_quality")) + else if (argument.equals("deletions_default_quality")) RAC.DELETIONS_DEFAULT_QUALITY = Byte.parseByte((String) value); - else if (primaryKey.equals("low_quality_tail")) + else if (argument.equals("low_quality_tail")) RAC.LOW_QUAL_TAIL = Byte.parseByte((String) value); - else if (primaryKey.equals("default_platform")) + else if (argument.equals("default_platform")) RAC.DEFAULT_PLATFORM = (String) value; - else if (primaryKey.equals("force_platform")) + else if (argument.equals("force_platform")) RAC.FORCE_PLATFORM = (String) value; - else if (primaryKey.equals("quantizing_levels")) + else if (argument.equals("quantizing_levels")) RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value); - else if (primaryKey.equals("keep_intermediate_files")) + else if (argument.equals("keep_intermediate_files")) RAC.KEEP_INTERMEDIATE_FILES = Boolean.parseBoolean((String) value); - else if (primaryKey.equals("no_plots")) + else if (argument.equals("no_plots")) RAC.NO_PLOTS = Boolean.parseBoolean((String) value); - else if (primaryKey.equals("recalibration_report")) + else if (argument.equals("recalibration_report")) RAC.recalibrationReport = (value == null) ? null : new File((String) value); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java index 10ac523e6..709b26808 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java @@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; -import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.report.GATKReportTableV2; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; @@ -75,7 +75,7 @@ public class ErrorRatePerCycle extends LocusWalker { public Integer MIN_MAPPING_QUAL = 20; private GATKReport report; - private GATKReportTable table; + private GATKReportTableV2 table; private final static String reportName = "ErrorRatePerCycle"; private final static String reportDescription = "The error rate per sequenced position in the reads"; @@ -122,15 +122,14 @@ public class ErrorRatePerCycle extends LocusWalker { public void initialize() { report = new GATKReport(); - report.addTable(reportName, reportDescription); + report.addTable(reportName, reportDescription, 6, true); table = report.getTable(reportName); - table.addPrimaryKey("key", false); - table.addColumn("readgroup", 0); - table.addColumn("cycle", 0); - table.addColumn("mismatches", 0); - table.addColumn("counts", 0); - table.addColumn("qual", 0); - table.addColumn("errorrate", 0.0f, "%.2e"); + table.addColumn("readgroup"); + table.addColumn("cycle"); + table.addColumn("mismatches"); + table.addColumn("counts"); + table.addColumn("qual"); + table.addColumn("errorrate", "%.2e"); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { @@ -147,9 +146,11 @@ public class ErrorRatePerCycle extends LocusWalker { if ( BaseUtils.isRegularBase(readBase) && BaseUtils.isRegularBase(refBase) ) { final TableKey key = new TableKey(read.getReadGroup().getReadGroupId(), cycle); - if ( ! table.containsKey(key) ) { + if ( ! table.containsRowID(key) ) { table.set(key, "cycle", cycle); table.set(key, "readgroup", read.getReadGroup().getReadGroupId()); + table.set(key, "counts", 0); + table.set(key, "mismatches", 0); } table.increment(key, "counts"); @@ -167,7 +168,7 @@ public class ErrorRatePerCycle extends LocusWalker { public Integer reduce(Integer value, Integer sum) { return null; } public void onTraversalDone(Integer sum) { - for ( final Object key : table.getPrimaryKeys() ) { + for ( Object key : table.getRowIDs() ) { final int mismatches = (Integer)table.get(key, "mismatches"); final int count = (Integer)table.get(key, "counts"); final double errorRate = (mismatches + 1) / (1.0*(count + 1)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java index 14985907d..bc3706f16 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java @@ -30,7 +30,7 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; -import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.report.GATKReportTableV2; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.Median; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -168,27 +168,28 @@ public class ReadGroupProperties extends ReadWalker { @Override public void onTraversalDone(Integer sum) { final GATKReport report = new GATKReport(); - report.addTable(TABLE_NAME, "Table of read group properties"); - GATKReportTable table = report.getTable(TABLE_NAME); + report.addTable(TABLE_NAME, "Table of read group properties", 12); + GATKReportTableV2 table = report.getTable(TABLE_NAME); DateFormat dateFormatter = DateFormat.getDateInstance(DateFormat.SHORT); - table.addPrimaryKey("readgroup"); + table.addColumn("readgroup"); //* Emits a GATKReport containing read group, sample, library, platform, center, median insert size and //* median read length for each read group in every BAM file. - table.addColumn("sample", "NA"); - table.addColumn("library", "NA"); - table.addColumn("platform", "NA"); - table.addColumn("center", "NA"); - table.addColumn("date", "NA"); - table.addColumn("has.any.reads", "false"); - table.addColumn("is.paired.end", "false"); - table.addColumn("n.reads.analyzed", "NA"); - table.addColumn("simple.read.type", "NA"); - table.addColumn("median.read.length", Integer.valueOf(0)); - table.addColumn("median.insert.size", Integer.valueOf(0)); + table.addColumn("sample", "%s"); + table.addColumn("library", "%s"); + table.addColumn("platform", "%s"); + table.addColumn("center", "%s"); + table.addColumn("date", "%s"); + table.addColumn("has.any.reads"); + table.addColumn("is.paired.end"); + table.addColumn("n.reads.analyzed", "%d"); + table.addColumn("simple.read.type", "%s"); + table.addColumn("median.read.length"); + table.addColumn("median.insert.size"); for ( final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) { final String rgID = rg.getId(); + table.addRowID(rgID, true); PerReadGroupInfo info = readGroupInfo.get(rgID); // we are paired if > 25% of reads are paired @@ -217,7 +218,7 @@ public class ReadGroupProperties extends ReadWalker { report.print(out); } - private final void setTableValue(GATKReportTable table, final String rgID, final String key, final Object value) { + private final void setTableValue(GATKReportTableV2 table, final String rgID, final String key, final Object value) { table.set(rgID, key, value == null ? "NA" : value); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java index 2d88baf3f..50e5e05e1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java @@ -5,7 +5,7 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; -import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.report.GATKReportTableV2; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -53,20 +53,19 @@ public class ReadLengthDistribution extends ReadWalker { private GATKReport report; public void initialize() { + final List readGroups = getToolkit().getSAMFileHeader().getReadGroups(); + report = new GATKReport(); - report.addTable("ReadLengthDistribution", "Table of read length distributions"); - GATKReportTable table = report.getTable("ReadLengthDistribution"); + report.addTable("ReadLengthDistribution", "Table of read length distributions", 1 + (readGroups.isEmpty() ? 1 : readGroups.size())); + GATKReportTableV2 table = report.getTable("ReadLengthDistribution"); - table.addPrimaryKey("readLength"); + table.addColumn("readLength"); - List readGroups = getToolkit().getSAMFileHeader().getReadGroups(); if (readGroups.isEmpty()) - table.addColumn("SINGLE_SAMPLE", 0); - + table.addColumn("SINGLE_SAMPLE"); else for (SAMReadGroupRecord rg : readGroups) - table.addColumn(rg.getSample(), 0); - + table.addColumn(rg.getSample()); } public boolean filter(ReferenceContext ref, GATKSAMRecord read) { @@ -75,7 +74,7 @@ public class ReadLengthDistribution extends ReadWalker { @Override public Integer map(ReferenceContext referenceContext, GATKSAMRecord samRecord, ReadMetaDataTracker readMetaDataTracker) { - GATKReportTable table = report.getTable("ReadLengthDistribution"); + GATKReportTableV2 table = report.getTable("ReadLengthDistribution"); int length = Math.abs(samRecord.getReadLength()); String sample = samRecord.getReadGroup().getSample(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 3f4b4805f..ef5abe4e4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.report.GATKReport; -import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.report.GATKReportTableV2; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -235,14 +235,17 @@ public class DiffEngine { // now that we have a specific list of values we want to show, display them GATKReport report = new GATKReport(); final String tableName = "differences"; - report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false); - GATKReportTable table = report.getTable(tableName); - table.addPrimaryKey("Difference", true); - table.addColumn("NumberOfOccurrences", 0); - table.addColumn("ExampleDifference", 0); - for ( Difference diff : toShow ) { - table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); - table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString()); + report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", 3); + final GATKReportTableV2 table = report.getTable(tableName); + table.addColumn("Difference"); + table.addColumn("NumberOfOccurrences"); + table.addColumn("ExampleDifference"); + for ( int i = 0; i > toShow.size(); i++ ) { + final Difference diff = toShow.get(i); + final String key = diff.getPath(); + table.addRowIDMapping(key, i, true); + table.set(key, "NumberOfOccurrences", diff.getCount()); + table.set(key, "ExampleDifference", diff.valueDiffString()); } GATKReport output = new GATKReport(table); output.print(params.out); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java index 2fa566c09..f0af325f0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java @@ -26,7 +26,8 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportColumn; -import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.report.GATKReportColumnV2; +import org.broadinstitute.sting.gatk.report.GATKReportTableV2; import java.io.File; import java.io.FileReader; @@ -52,7 +53,7 @@ public class GATKReportDiffableReader implements DiffableReader { // one line reads the whole thing into memory GATKReport report = new GATKReport(file); - for (GATKReportTable table : report.getTables()) { + for (GATKReportTableV2 table : report.getTables()) { root.add(tableToNode(table, root)); } @@ -62,23 +63,22 @@ public class GATKReportDiffableReader implements DiffableReader { } } - private DiffNode tableToNode(GATKReportTable table, DiffNode root) { + private DiffNode tableToNode(GATKReportTableV2 table, DiffNode root) { DiffNode tableRoot = DiffNode.empty(table.getTableName(), root); tableRoot.add("Description", table.getTableDescription()); tableRoot.add("NumberOfRows", table.getNumRows()); - for (GATKReportColumn column : table.getColumns().values()) { + for ( GATKReportColumnV2 column : table.getColumnInfo() ) { DiffNode columnRoot = DiffNode.empty(column.getColumnName(), tableRoot); columnRoot.add("Width", column.getColumnFormat().getWidth()); // NOTE: as the values are trimmed during parsing left/right alignment is not currently preserved - columnRoot.add("Displayable", column.isDisplayable()); + columnRoot.add("Displayable", true); - int n = 1; - for (Object elt : column.values()) { - String name = column.getColumnName() + n++; - columnRoot.add(name, elt.toString()); + for ( int i = 0; i < table.getNumRows(); i++ ) { + String name = column.getColumnName() + (i+1); + columnRoot.add(name, table.get(i, column.getColumnName()).toString()); } tableRoot.add(columnRoot); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java index 8887e3c4f..7107d1a01 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; import org.broadinstitute.sting.gatk.report.GATKReport; -import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.report.GATKReportTableV2; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.StratificationManager; @@ -50,30 +50,32 @@ import java.util.Map; * and supports writing out the data in these evaluators to a GATKReport. */ public class VariantEvalReportWriter { - private final GATKReport report; - private final StratificationManager stratManager; - public VariantEvalReportWriter(final StratificationManager stratManager, - final Collection stratifiers, - final Collection evaluators) { - this.stratManager = stratManager; - this.report = initializeGATKReport(stratifiers, evaluators); - } + protected VariantEvalReportWriter() {} // no public access /** * The business end of the class. Writes out the data in the provided stratManager * to the PrintStream out * - * @param out + * @param out the output stream + * @param stratManager the stratification manager + * @param stratifiers the stratifiers + * @param evaluators the evaluators */ - public final void writeReport(final PrintStream out) { + public static void writeReport(final PrintStream out, + final StratificationManager stratManager, + final Collection stratifiers, + final Collection evaluators) { + + final GATKReport report = initializeGATKReport(stratifiers, evaluators); + for ( int key = 0; key < stratManager.size(); key++ ) { final String stratStateString = stratManager.getStratsAndStatesStringForKey(key); final List> stratsAndStates = stratManager.getStratsAndStatesForKey(key); final EvaluationContext nec = stratManager.get(key); for ( final VariantEvaluator ve : nec.getVariantEvaluators() ) { - final GATKReportTable table = report.getTable(ve.getSimpleName()); + final GATKReportTableV2 table = report.getTable(ve.getSimpleName()); final AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve); final Map datamap = scanner.getData(); @@ -120,9 +122,10 @@ public class VariantEvalReportWriter { * @param primaryKey * @param stratsAndStates */ - private void setStratificationColumns(final GATKReportTable table, - final String primaryKey, - final List> stratsAndStates) { + private static void setStratificationColumns(final GATKReportTableV2 table, + final String primaryKey, + final List> stratsAndStates) { + table.set(primaryKey, table.getTableName(), table.getTableName()); for ( final Pair stratAndState : stratsAndStates ) { final VariantStratifier vs = stratAndState.getFirst(); final String columnName = vs.getName(); @@ -148,34 +151,33 @@ public class VariantEvalReportWriter { * * @return an initialized report object */ - private GATKReport initializeGATKReport(final Collection stratifiers, - final Collection evaluators) { + private static GATKReport initializeGATKReport(final Collection stratifiers, + final Collection evaluators) { final GATKReport report = new GATKReport(); for (final VariantEvaluator ve : evaluators) { + final AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve); + final Map datamap = scanner.getData(); + // create the table final String tableName = ve.getSimpleName(); final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description(); - report.addTable(tableName, tableDesc, true); + report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), true); // grab the table, and add the columns we need to it - final GATKReportTable table = report.getTable(tableName); - table.addPrimaryKey("entry", false); + final GATKReportTableV2 table = report.getTable(tableName); table.addColumn(tableName, tableName); // first create a column to hold each stratifier state for (final VariantStratifier vs : stratifiers) { final String columnName = vs.getName(); - table.addColumn(columnName, null, vs.getFormat()); + table.addColumn(columnName, vs.getFormat()); } - final AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve); - final Map datamap = scanner.getData(); - if ( scanner.hasMoltenField() ) { // deal with molten data - table.addColumn(scanner.getMoltenAnnotation().variableName(), true, scanner.getMoltenAnnotation().variableFormat()); - table.addColumn(scanner.getMoltenAnnotation().valueName(), true, scanner.getMoltenAnnotation().valueFormat()); + table.addColumn(scanner.getMoltenAnnotation().variableName(), scanner.getMoltenAnnotation().variableFormat()); + table.addColumn(scanner.getMoltenAnnotation().valueName(), scanner.getMoltenAnnotation().valueFormat()); } else { if ( datamap.isEmpty() ) throw new ReviewedStingException("Datamap is empty for analysis " + scanner.getAnalysis()); @@ -187,7 +189,7 @@ public class VariantEvalReportWriter { // this is an atomic value, add a column for it final String format = datamap.get(field).format(); - table.addColumn(field.getName(), true, format); + table.addColumn(field.getName(), format); } catch (SecurityException e) { throw new StingException("SecurityException: " + e); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 5fa95b22e..a0f23531e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -555,8 +555,7 @@ public class VariantEvalWalker extends RodWalker implements Tr for ( final VariantEvaluator ve : nec.getVariantEvaluators() ) ve.finalizeEvaluation(); - final VariantEvalReportWriter writer = new VariantEvalReportWriter(stratManager, stratManager.getStratifiers(), stratManager.get(0).getVariantEvaluators()); - writer.writeReport(out); + VariantEvalReportWriter.writeReport(out, stratManager, stratManager.getStratifiers(), stratManager.get(0).getVariantEvaluators()); } // Accessors diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java index 9e20e9afc..0cb10679d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java @@ -29,7 +29,7 @@ import com.google.java.contract.Invariant; import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.report.GATKReport; -import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.report.GATKReportTableV2; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -422,40 +422,42 @@ public class QualQuantizer { } private final void addQualHistogramToReport(final GATKReport report) { - report.addTable("QualHistogram", "Quality score histogram provided to report"); - GATKReportTable table = report.getTable("QualHistogram"); + report.addTable("QualHistogram", "Quality score histogram provided to report", 2); + GATKReportTableV2 table = report.getTable("QualHistogram"); - table.addPrimaryKey("qual"); - table.addColumn("count", "NA"); + table.addColumn("qual"); + table.addColumn("count"); for ( int q = 0; q < nObservationsPerQual.size(); q++ ) { + table.set(q, "qual", q); table.set(q, "count", nObservationsPerQual.get(q)); } } private final void addIntervalsToReport(final GATKReport report) { - report.addTable("QualQuantizerIntervals", "Table of QualQuantizer quantization intervals"); - GATKReportTable table = report.getTable("QualQuantizerIntervals"); + report.addTable("QualQuantizerIntervals", "Table of QualQuantizer quantization intervals", 10); + GATKReportTableV2 table = report.getTable("QualQuantizerIntervals"); - table.addPrimaryKey("name"); - table.addColumn("qStart", "NA"); - table.addColumn("qEnd", "NA"); - table.addColumn("level", "NA"); - table.addColumn("merge.order", "NA"); - table.addColumn("nErrors", "NA"); - table.addColumn("nObservations", "NA"); - table.addColumn("qual", "NA"); - table.addColumn("penalty", "NA"); - table.addColumn("root.node", "NA"); + table.addColumn("name"); + table.addColumn("qStart"); + table.addColumn("qEnd"); + table.addColumn("level"); + table.addColumn("merge.order"); + table.addColumn("nErrors"); + table.addColumn("nObservations"); + table.addColumn("qual"); + table.addColumn("penalty"); + table.addColumn("root.node"); //table.addColumn("subintervals", "NA"); - for ( QualInterval interval : quantizedIntervals) + for ( QualInterval interval : quantizedIntervals ) addIntervalToReport(table, interval, true); } - private final void addIntervalToReport(final GATKReportTable table, QualInterval interval, final boolean atRootP) { + private final void addIntervalToReport(final GATKReportTableV2 table, final QualInterval interval, final boolean atRootP) { final String name = interval.getName(); + table.set(name, "name", name); table.set(name, "qStart", interval.qStart); table.set(name, "qEnd", interval.qEnd); table.set(name, "level", interval.level); diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java index 5759204cf..d538a027f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -36,20 +36,18 @@ import java.io.PrintStream; public class GATKReportUnitTest extends BaseTest { @Test public void testParse() throws Exception { - String reportPath = validationDataLocation + "exampleGATKReportv1.tbl"; + String reportPath = validationDataLocation + "exampleGATKReportv2.tbl"; GATKReport report = new GATKReport(reportPath); - Assert.assertEquals(report.getVersion(), GATKReportVersion.V1_0); + Assert.assertEquals(report.getVersion(), GATKReportVersion.V1_1); Assert.assertEquals(report.getTables().size(), 5); - GATKReportTable countVariants = report.getTable("CountVariants"); - Object countVariantsPK = countVariants.getPrimaryKeyByData("CountVariants", "dbsnp", "eval", "none", "all"); - Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "63025520"); - Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "0"); - Assert.assertEquals(countVariants.get(countVariantsPK, "heterozygosity"), 4.73e-06); + GATKReportTableV2 countVariants = report.getTable("CountVariants"); + Assert.assertEquals(countVariants.get(0, "nProcessedLoci"), "63025520"); + Assert.assertEquals(countVariants.get(0, "nNoCalls"), "0"); + Assert.assertEquals(countVariants.get(0, "heterozygosity"), 4.73e-06); - GATKReportTable validationReport = report.getTable("ValidationReport"); - Object validationReportPK = countVariants.getPrimaryKeyByData("CountVariants", "dbsnp", "eval", "none", "novel"); - Assert.assertEquals(validationReport.get(validationReportPK, "PPV"), Double.NaN); + GATKReportTableV2 validationReport = report.getTable("ValidationReport"); + Assert.assertEquals(validationReport.get(2, "PPV"), Double.NaN); } @DataProvider(name = "rightAlignValues") @@ -79,9 +77,9 @@ public class GATKReportUnitTest extends BaseTest { Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'"); } - private GATKReportTable makeBasicTable() { + private GATKReportTableV2 makeBasicTable() { GATKReport report = GATKReport.newSimpleReport("TableName", "sample", "value"); - GATKReportTable table = report.getTable("TableName"); + GATKReportTableV2 table = report.getTable("TableName"); report.addRow("foo.1", "hello"); report.addRow("foo.2", "world"); return table; @@ -89,37 +87,9 @@ public class GATKReportUnitTest extends BaseTest { @Test public void testDottedSampleName() { - GATKReportTable table = makeBasicTable(); - Object pk; - - pk = table.getPrimaryKeyByData("foo.1"); - Assert.assertEquals(table.get(pk, "value"), "hello"); - - pk = table.getPrimaryKeyByData("foo.2"); - Assert.assertEquals(table.get(pk, "value"), "world"); - } - - @Test - public void testFindPrimaryKeyByData() { - GATKReportTable table = makeBasicTable(); - Assert.assertNotNull(table.findPrimaryKeyByData("foo.1")); - Assert.assertNotNull(table.findPrimaryKeyByData("foo.1", "hello")); - Assert.assertNotNull(table.findPrimaryKeyByData("foo.2")); - Assert.assertNotNull(table.findPrimaryKeyByData("foo.2", "world")); - Assert.assertNull(table.findPrimaryKeyByData("list", "longer", "than", "column", "count")); - Assert.assertNull(table.findPrimaryKeyByData("short")); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testEmptyFindPrimaryKeyByData() { - GATKReportTable table = makeBasicTable(); - table.findPrimaryKeyByData(); - } - - @Test(expectedExceptions = NullPointerException.class) - public void testNullFindPrimaryKeyByData() { - GATKReportTable table = makeBasicTable(); - table.findPrimaryKeyByData((Object[]) null); + GATKReportTableV2 table = makeBasicTable(); + Assert.assertEquals(table.get(0, "value"), "hello"); + Assert.assertEquals(table.get(1, "value"), "world"); } @Test @@ -128,7 +98,7 @@ public class GATKReportUnitTest extends BaseTest { GATKReport report = GATKReport.newSimpleReport("TableName", "Roger", "is", "Awesome"); // Add data to simple GATK report - report.addRow( 12, 23.45, true); + report.addRow(12, 23.45, true); report.addRow("ans", '3', 24.5); report.addRow("hi", "", 2.3); @@ -154,42 +124,40 @@ public class GATKReportUnitTest extends BaseTest { @Test public void testGATKReportGatherer() { - boolean displayPK = false; GATKReport report1, report2, report3; report1 = new GATKReport(); - report1.addTable("TableName", "Description"); - report1.getTable("TableName").addPrimaryKey("id", displayPK); - report1.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); - report1.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c"); - report1.getTable("TableName").set(1, "colA", "NotNum"); - report1.getTable("TableName").set(1, "colB", (char) 64); + report1.addTable("TableName", "Description", 2); + report1.getTable("TableName").addColumn("colA", "%s"); + report1.getTable("TableName").addColumn("colB", "%c"); + report1.getTable("TableName").set(0, "colA", "NotNum"); + report1.getTable("TableName").set(0, "colB", (char) 64); report2 = new GATKReport(); - report2.addTable("TableName", "Description"); - report2.getTable("TableName").addPrimaryKey("id", displayPK); - report2.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); - report2.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c"); - report2.getTable("TableName").set(2, "colA", "df3"); - report2.getTable("TableName").set(2, "colB", 'A'); + report2.addTable("TableName", "Description", 2); + report2.getTable("TableName").addColumn("colA", "%s"); + report2.getTable("TableName").addColumn("colB", "%c"); + report2.getTable("TableName").set(0, "colA", "df3"); + report2.getTable("TableName").set(0, "colB", 'A'); report3 = new GATKReport(); - report3.addTable("TableName", "Description"); - report3.getTable("TableName").addPrimaryKey("id", displayPK); - report3.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); - report3.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c"); - report3.getTable("TableName").set(3, "colA", "df5f"); - report3.getTable("TableName").set(3, "colB", 'c'); + report3.addTable("TableName", "Description", 2); + report3.getTable("TableName").addColumn("colA", "%s"); + report3.getTable("TableName").addColumn("colB", "%c"); + report3.getTable("TableName").set(0, "colA", "df5f"); + report3.getTable("TableName").set(0, "colB", 'c'); - report1.combineWith(report2); - report1.combineWith(report3); + report1.concat(report2); + report1.concat(report3); - report1.addTable("Table2", "To contain some more data types"); - GATKReportTable table = report1.getTable("Table2"); - table.addPrimaryKey("KEY"); - table.addColumn("SomeInt", GATKReportDataType.Integer.getDefaultValue(), true, "%d"); - table.addColumn("SomeFloat", GATKReportDataType.Decimal.getDefaultValue(), true, "%.16E"); - table.addColumn("TrueFalse", false, true, "%B"); + report1.addTable("Table2", "To contain some more data types", 3); + GATKReportTableV2 table = report1.getTable("Table2"); + table.addColumn("SomeInt", "%d"); + table.addColumn("SomeFloat", "%.16E"); + table.addColumn("TrueFalse", "%B"); + table.addRowIDMapping("12df", 0); + table.addRowIDMapping("5f", 1); + table.addRowIDMapping("RZ", 2); table.set("12df", "SomeInt", Byte.MAX_VALUE); table.set("12df", "SomeFloat", 34.0); table.set("12df", "TrueFalse", true); @@ -200,17 +168,17 @@ public class GATKReportUnitTest extends BaseTest { table.set("RZ", "SomeFloat", 535646345.657453464576); table.set("RZ", "TrueFalse", true); - report1.addTable("Table3", "blah"); - report1.getTable("Table3").addPrimaryKey("HAI"); - report1.getTable("Table3").addColumn("a", true, GATKReportDataType.String.getDefaultFormatString()); + report1.addTable("Table3", "blah", 1, true); + report1.getTable("Table3").addColumn("a"); + report1.getTable("Table3").addRowIDMapping("q", 2); + report1.getTable("Table3").addRowIDMapping("5", 3); + report1.getTable("Table3").addRowIDMapping("573s", 0); + report1.getTable("Table3").addRowIDMapping("ZZZ", 1); report1.getTable("Table3").set("q", "a", "34"); report1.getTable("Table3").set("5", "a", "c4g34"); report1.getTable("Table3").set("573s", "a", "fDlwueg"); report1.getTable("Table3").set("ZZZ", "a", "Dfs"); - //report1.print(System.out); - - try { File file = createTempFile("GATKReportGatherer-UnitTest", ".tbl"); //System.out.format("The temporary file" + " has been created: %s%n", file); @@ -226,8 +194,5 @@ public class GATKReportUnitTest extends BaseTest { } catch (IOException x) { System.err.format("IOException: %s%n", x); } - - //Assert.assertEquals(1,1); - } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java index 3829d2808..d789eecde 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.gatk.report.GATKReport; -import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.report.GATKReportTableV2; import org.testng.Assert; import org.testng.annotations.Test; @@ -30,8 +30,8 @@ public class BQSRGathererUnitTest { GATKReport originalReport = new GATKReport(recal); GATKReport calculatedReport = new GATKReport(output); - for (GATKReportTable originalTable : originalReport.getTables()) { - GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName()); + for (GATKReportTableV2 originalTable : originalReport.getTables()) { + GATKReportTableV2 calculatedTable = calculatedReport.getTable(originalTable.getTableName()); List columnsToTest = new LinkedList(); columnsToTest.add(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); columnsToTest.add(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); @@ -59,11 +59,11 @@ public class BQSRGathererUnitTest { * @param columnsToTest list of columns to test. All columns will be tested with the same criteria (equality given factor) * @param factor 1 to test for equality, any other value to multiply the original value and match with the calculated */ - private void testTablesWithColumnsAndFactor(GATKReportTable original, GATKReportTable calculated, List columnsToTest, int factor) { - for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys + private void testTablesWithColumnsAndFactor(GATKReportTableV2 original, GATKReportTableV2 calculated, List columnsToTest, int factor) { + for (int row = 0; row < original.getNumRows(); row++ ) { for (String column : columnsToTest) { - Object actual = calculated.get(primaryKey, column); - Object expected = original.get(primaryKey, column); + Object actual = calculated.get(new Integer(row), column); + Object expected = original.get(row, column); if (factor != 1) { if (expected instanceof Double) @@ -76,7 +76,7 @@ public class BQSRGathererUnitTest { expected = (Byte) expected * factor; } } - Assert.assertEquals(actual, expected, "Primary key: " + primaryKey + " Original Table: " + original.getTableName() + " Calc Table: " + calculated.getTableName()); + Assert.assertEquals(actual, expected, "Row: " + row + " Original Table: " + original.getTableName() + " Calc Table: " + calculated.getTableName()); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java index 7c705de18..856fd73a3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java @@ -35,7 +35,7 @@ public class ErrorRatePerCycleIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s", 1, - Arrays.asList("71685716c7dde64c51bbd908c06ea742")); + Arrays.asList("dccdf3cb3193d01a1a767097e4a5c35e")); executeTest("ErrorRatePerCycle:", spec); } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupPropertiesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupPropertiesIntegrationTest.java index 0f3750abd..6b7c5afa9 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupPropertiesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupPropertiesIntegrationTest.java @@ -38,7 +38,7 @@ public class ReadGroupPropertiesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T ReadGroupProperties -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-11,000,000 -o %s", 1, - Arrays.asList("3f1f97a1d2c5fb552ed4f33ea30d136d")); + Arrays.asList("618a671c61014deb3b284061a87b61d6")); executeTest("ReadGroupProperties:", spec); } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 71c014f2c..22e266e5f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -57,7 +57,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("e87932ffa1d310cecee49e7829a0f056") + Arrays.asList("7091cbeb47d041463806c8c8f98239a6") ); executeTest("testFunctionClassWithSnpeff", spec); } @@ -77,7 +77,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("8279ee42a6785f9c2b3dda8d82674e00") + Arrays.asList("7a09b8a6759ccee5da55f1f85a43fe9c") ); executeTest("testStratifySamplesAndExcludeMonomorphicSites", spec); } @@ -97,7 +97,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("0bac64d5615f901d3005247c6d016549") + Arrays.asList("f70da7be5d4d8305f3e4433c9004aee4") ); executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); } @@ -118,7 +118,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("b84d8b4429116c887ceb5489c8782f00") + Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @@ -140,7 +140,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("e4f37642d9113a65fbe8bc1d091c206f") + Arrays.asList("087a2d9943c53e7f49663667c3305c7e") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } @@ -161,7 +161,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("c5412ee824b4815dc8eea62a4c5462ef") + Arrays.asList("bca988c81a761f12627610e5a3bab5a0") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); } @@ -182,7 +182,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("1d42e97643afd3e7f5f8c9f6416c5883") + Arrays.asList("7ca5c0c5e79ba6cd1e5102ced851a1b4") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); } @@ -203,7 +203,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("8c2ba70bed2f0fdb0ca371f7038819ef") + Arrays.asList("a6a31f658ad1e76c79190ada758f157c") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); } @@ -224,7 +224,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("c912b4b0bf1925d042119b301c183b93") + Arrays.asList("c1a3df6f89f5ddf7b7c296eb944f3fdd") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); } @@ -247,7 +247,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("dea3d2cc53265ff8ed2f0030c40f3747") + Arrays.asList("48652b360ce031aa2f9004c9bae6bda5") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); } @@ -272,7 +272,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("dede22b15936c38e29b850c805c7b706") + Arrays.asList("c3521b18388aff7f53691a63619b3b07") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); } @@ -291,7 +291,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("9a94c4c613bf69feb3d9579c353baaf2") + Arrays.asList("90a46e045f3fe8b22f102acaaeec0201") ); executeTest("testFundamentalsCountVariantsNoCompRod", spec); } @@ -304,7 +304,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + " --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("8d4530e9cef8531c46bbb693b84d04c7")); + 1, Arrays.asList("4b9dcbce0717285e3c0c736c1bed744c")); executeTestParallel("testSelect1", spec); } @@ -314,7 +314,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG --eval:VCF3 " + validationDataLocation + vcfFile + " --comp:VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", 1, - Arrays.asList("9bbc762f459023af0480774eb2986af4")); + Arrays.asList("810d55b67de592f6375d9dfb282145ef")); executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); } @@ -325,14 +325,14 @@ public class VariantEvalIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s -mvq 0 -noST", 1, - Arrays.asList("ddcabc30c88a755a78100e30e0d491d2")); + Arrays.asList("c56e19d0647d826485d8a3b559d5c56d")); executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec); } @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("bb076f7239039191fde883c5e68483ea")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("659a15cc842f0310106fa595a26da71d")); executeTestParallel("testCompVsEvalAC",spec); } @@ -350,7 +350,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompOverlap() { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals --comp:comphapmap " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf --eval " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("98f9c2f5fef43dbda688d32360908615")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("59ad39e03678011b5f62492fa83ede04")); executeTestParallel("testCompOverlap",spec); } @@ -362,7 +362,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --dbsnp " + b37dbSNP132 + " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9d24f34d94d74417e00e3b7bcf84650f")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("112bb3221688acad83f29542bfb33151")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -374,7 +374,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("7329b0bc73c9ccaf5facd754f3410c38")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("81dcdde458c1ebb9aa35289ea8f12bc8")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -391,13 +391,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -noST -noEV -ST Novelty -EV CompOverlap" + " -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d0218c5435c8601f2355b7d183ab032f")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("44146b8d4ddbaeb9409c9b88eefe7f40")); executeTestParallel("testMultipleCompTracks",spec); } @Test public void testPerSampleAndSubsettedSampleHaveSameResults1() { - String md5 = "b5cd5c286d459b8edd4ca54320e560a3"; + String md5 = "5f894d726cfaa0b29d7c11ff5bb9b3fd"; WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( @@ -452,7 +452,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("1198bfea6183bd43219071a84c79a386") + Arrays.asList("0d51321693d4afc262e4059353993d12") ); executeTest("testAlleleCountStrat", spec); } @@ -475,7 +475,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("1198bfea6183bd43219071a84c79a386") + Arrays.asList("0d51321693d4afc262e4059353993d12") ); executeTest("testMultipleEvalTracksAlleleCountWithMerge", spec); } @@ -518,7 +518,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("6decba040051daafad4ecad5a411e1e1") + Arrays.asList("74fc726760c0fcfe50c44c853756f261") ); executeTest("testIntervalStrat", spec); } @@ -535,7 +535,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("aad01b26198b30da5d59a05c08d863bb") + Arrays.asList("f8460af997436a5ce4407fefb0e2724d") ); executeTest("testModernVCFWithLargeIndels", spec); } @@ -555,7 +555,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("4fa2557663ef8fb4cdeecd667791985c") + Arrays.asList("7dc2d8983cb7d98b291ca2f60a9151b2") ); executeTest("testStandardIndelEval", spec); } @@ -588,7 +588,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { executeTest("testIncludingAC0 keep ac 0 = " + includeAC0, spec); } - @Test public void testWithAC0() { testIncludingAC0(true, "0ed2c8e4b4e06973a06838bc930a132d"); } - @Test public void testWithoutAC0() { testIncludingAC0(false, "79d28ddd0ab9584776b6cbefe48331df"); } + @Test public void testWithAC0() { testIncludingAC0(true, "c786128cfe4d3e28cdbc15c5c838ad20"); } + @Test public void testWithoutAC0() { testIncludingAC0(false, "7bc505c07d9aee49571ad4b3fc9f7feb"); } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala index e548e5c5e..d49e4526d 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala @@ -25,7 +25,7 @@ package org.broadinstitute.sting.queue.util import org.broadinstitute.sting.queue.function.QFunction -import org.broadinstitute.sting.gatk.report.{GATKReportTable, GATKReport} +import org.broadinstitute.sting.gatk.report.{GATKReportTableV2, GATKReport} import org.broadinstitute.sting.utils.exceptions.UserException import org.broadinstitute.sting.queue.engine.JobRunInfo import java.io.{PrintStream, File} @@ -65,7 +65,7 @@ trait QJobReport extends Logging { } /** The report Group is the analysis name transform to only contain valid GATKReportTable characters */ - def getReportGroup = self.analysisName.replaceAll(GATKReportTable.INVALID_TABLE_NAME_REGEX, "_") + def getReportGroup = self.analysisName.replaceAll(GATKReportTableV2.INVALID_TABLE_NAME_REGEX, "_") def getReportFeatures = reportFeatures def getReportFeatureNames: Seq[String] = getReportFeatures.keys.toSeq @@ -139,13 +139,12 @@ object QJobReport { // create a table for each group of logs for ( (group, groupLogs) <- groupLogs(logs) ) { - report.addTable(group, "Job logs for " + group) - val table: GATKReportTable = report.getTable(group) - table.addPrimaryKey("jobName", false) val keys = logKeys(groupLogs) + report.addTable(group, "Job logs for " + group, keys.size) + val table: GATKReportTableV2 = report.getTable(group) // add the columns - keys.foreach(table.addColumn(_, 0)) + keys.foreach(table.addColumn(_)) for (log <- groupLogs) { for ( key <- keys ) table.set(log.getReportName, key, log.getReportFeature(key)) diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala index 9d51b01a0..00b6b4cda 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala @@ -136,7 +136,7 @@ object PipelineTest extends BaseTest with Logging { println(" value (min,target,max) table key metric") for (validation <- evalSpec.validations) { val table = report.getTable(validation.table) - val key = table.getPrimaryKeyByData(validation.table +: validation.key.split('.') : _*) + val key = table.findRowByData(validation.table +: validation.key.split('.') : _*) val value = String.valueOf(table.get(key, validation.metric)) val inRange = if (value == null) false else validation.inRange(value) val flag = if (!inRange) "*" else " " diff --git a/public/testdata/exampleGRP.grp b/public/testdata/exampleGRP.grp index 67a39dc3a..619d6b212 100644 --- a/public/testdata/exampleGRP.grp +++ b/public/testdata/exampleGRP.grp @@ -1,5 +1,5 @@ -#:GATKReport.v1.0:5 -#:GATKTable:true:1:14::; +#:GATKReport.v1.1:5 +#:GATKTable:2:14::; #:GATKTable:Arguments:Recalibration argument collection values used in this run Argument Value covariate null @@ -17,7 +17,7 @@ solid_nocall_strategy THROW_EXCEPTION solid_recal_mode SET_Q_ZERO standard_covs true -#:GATKTable:true:2:94:::; +#:GATKTable:3:94:::; #:GATKTable:Quantized:Quality quantization map QualityScore Count QuantizedScore 0 20 3 @@ -115,14 +115,14 @@ QualityScore Count QuantizedScore 92 0 92 93 0 93 -#:GATKTable:false:6:3:%s:%s:%.4f:%.4f:%d:%d:; +#:GATKTable:6:3:%s:%s:%.4f:%.4f:%d:%d:; #:GATKTable:RecalTable0: ReadGroup EventType EmpiricalQuality EstimatedQReported Observations Errors exampleBAM.bam.bam D 25.8092 45.0000 380 0 exampleBAM.bam.bam M 14.0483 15.4820 380 14 exampleBAM.bam.bam I 25.8092 45.0000 380 0 -#:GATKTable:false:6:32:%s:%s:%s:%.4f:%d:%d:; +#:GATKTable:6:32:%s:%s:%s:%.4f:%d:%d:; #:GATKTable:RecalTable1: ReadGroup QualityScore EventType EmpiricalQuality Observations Errors exampleBAM.bam.bam 32 M 15.1851 32 0 @@ -158,7 +158,7 @@ exampleBAM.bam.bam 45 D 25.8092 380 exampleBAM.bam.bam 13 M 6.0206 3 0 exampleBAM.bam.bam 28 M 12.0412 15 0 -#:GATKTable:false:8:1354:%s:%s:%s:%s:%s:%.4f:%d:%d:; +#:GATKTable:8:1354:%s:%s:%s:%s:%s:%.4f:%d:%d:; #:GATKTable:RecalTable2: ReadGroup QualityScore CovariateValue CovariateName EventType EmpiricalQuality Observations Errors exampleBAM.bam.bam 45 TGAAAGTG Context D 3.0103 1 0