From 8f0e9d74cefbfe91e560d262180b56cd57e33326 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 28 Mar 2012 16:56:40 -0400 Subject: [PATCH] GATKReportTable output refactor writing out a GATKReportTable was O(n^2)!!!!! New implementation is O(n). What a difference, when N = 2^16... --- .../sting/gatk/report/GATKReportColumn.java | 65 +++++++++++-------- .../sting/gatk/report/GATKReportTable.java | 26 ++++---- 2 files changed, 50 insertions(+), 41 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index 2b611109f..0d969c989 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -26,7 +26,9 @@ package org.broadinstitute.sting.gatk.report; import org.apache.commons.lang.math.NumberUtils; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.LinkedHashMap; /** * Holds values for a column in a GATK report table @@ -38,6 +40,10 @@ public class GATKReportColumn extends LinkedHashMap { final private boolean display; final private GATKReportDataType dataType; + private GATKReportColumnFormat columnFormat; + private GATKReportColumnFormat.Alignment alignment = GATKReportColumnFormat.Alignment.RIGHT; // default alignment is to the right unless values added ask for a left alignment + private int maxWidth = 0; + /** * Construct the column object, specifying the column name, default value, whether or not the column should be * displayed, and the format string. This cannot be null. @@ -49,6 +55,7 @@ public class GATKReportColumn extends LinkedHashMap { */ public GATKReportColumn(String columnName, Object defaultValue, boolean display, String format) { this.columnName = columnName; + this.maxWidth = columnName.length(); this.display = display; if ( format.equals("") ) { this.format = "%s"; @@ -85,7 +92,8 @@ public class GATKReportColumn extends LinkedHashMap { /** * Return an object from the column, but if it doesn't exist, return the default value. This is useful when writing - * tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero + * tables, as the table gets written properly without having to waste storage for the unset elements (usually the + * zero * values) in the table. * * @param primaryKey the primary key position in the column that should be retrieved @@ -120,32 +128,17 @@ public class GATKReportColumn extends LinkedHashMap { } /** - * Get the display width for this column. This allows the entire column to be displayed with the appropriate, fixed width. + * Get the display width for this column. This allows the entire column to be displayed with the appropriate, fixed + * width. * * @return the format string for this column */ public GATKReportColumnFormat getColumnFormat() { - int maxWidth = columnName.length(); - GATKReportColumnFormat.Alignment alignment = GATKReportColumnFormat.Alignment.RIGHT; + if (columnFormat != null) + return columnFormat; - for (Object obj : this.values()) { - if (obj != null) { - String formatted = formatValue(obj); - - int width = formatted.length(); - if (width > maxWidth) { - maxWidth = width; - } - - if (alignment == GATKReportColumnFormat.Alignment.RIGHT) { - if (!isRightAlign(formatted)) { - alignment = GATKReportColumnFormat.Alignment.LEFT; - } - } - } - } - - return new GATKReportColumnFormat(maxWidth, alignment); + columnFormat = new GATKReportColumnFormat(maxWidth, alignment); + return columnFormat; } private static final Collection RIGHT_ALIGN_STRINGS = Arrays.asList( @@ -176,10 +169,11 @@ public class GATKReportColumn extends LinkedHashMap { String value; if (obj == null) { value = "null"; - } else if ( dataType.equals(GATKReportDataType.Unknown) && - (obj instanceof Double || obj instanceof Float) ) { + } + else if ( dataType.equals(GATKReportDataType.Unknown) && (obj instanceof Double || obj instanceof Float) ) { value = String.format("%.8f", obj); - } else + } + else value = String.format(format, obj); return value; @@ -226,4 +220,23 @@ public class GATKReportColumn extends LinkedHashMap { else return format; } + + @Override + public Object put(Object key, Object value) { + if (value != null) { + String formatted = formatValue(value); + updateMaxWidth(formatted); + updateFormat(formatted); + } + return super.put(key, value); + } + + private void updateMaxWidth(String formatted) { + maxWidth = Math.max(formatted.length(), maxWidth); + } + + private void updateFormat(String formatted) { + if (!isRightAlign(formatted)) + alignment = GATKReportColumnFormat.Alignment.LEFT; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 1fe67154e..44d70ac4b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -405,7 +405,7 @@ public class GATKReportTable { public void set(Object primaryKey, String columnName, Object value) { verifyEntry(primaryKey, columnName); GATKReportColumn column = columns.get(columnName); - // Check if value is of same type as column + //todo -- Check if value is of same type as column // We do not accept internal null values if (value == null) @@ -422,17 +422,18 @@ public class GATKReportTable { try { newValue = Long.parseLong((String) value); } catch (Exception e) { + /** do nothing */ } } if (column.getDataType().equals(GATKReportDataType.Decimal)) { try { newValue = Double.parseDouble((String) value); } catch (Exception e) { + /** do nothing */ } } if (column.getDataType().equals(GATKReportDataType.Character) && ((String) value).length() == 1) { newValue = ((String) value).charAt(0); - } } @@ -900,13 +901,10 @@ public class GATKReportTable { public boolean isSameFormat(GATKReportTable table) { //Should we add the sortByPrimaryKey as a check? - if (!columns.isSameFormat(table.columns)) { - return false; - } - return (primaryKeyDisplay == table.primaryKeyDisplay && - primaryKeyName.equals(table.primaryKeyName) && - tableName.equals(table.tableName) && - tableDescription.equals(table.tableDescription)); + return columns.isSameFormat(table.columns) && + (primaryKeyDisplay == table.primaryKeyDisplay && primaryKeyName.equals(table.primaryKeyName) && + tableName.equals(table.tableName) && + tableDescription.equals(table.tableDescription)); } /** @@ -916,12 +914,10 @@ public class GATKReportTable { * @return true if all field in the reports, tables, and columns are equal. */ public boolean equals(GATKReportTable table) { - if (!isSameFormat(table)) { - return false; - } - return (columns.equals(table.columns) && - primaryKeyColumn.equals(table.primaryKeyColumn) && - sortByPrimaryKey == table.sortByPrimaryKey); + return isSameFormat(table) && + (columns.equals(table.columns) && + primaryKeyColumn.equals(table.primaryKeyColumn) && + sortByPrimaryKey == table.sortByPrimaryKey); } }