From c1ba12d71a7fa0879dcd4e5a8fcadbd6ece025de Mon Sep 17 00:00:00 2001 From: Tad Jordan Date: Thu, 3 Jan 2013 16:25:57 -0500 Subject: [PATCH] Added unit test for outputting sorted GATKReport Tables - Made few small modifications to code - Replaced the two arguments in GATKReportTable constructor with an enum used to specify way of sorting the table --- .../sting/gatk/report/GATKReport.java | 9 +- .../sting/gatk/report/GATKReportTable.java | 187 +++++++++--------- .../bqsr/RecalibrationArgumentCollection.java | 2 +- .../diagnostics/ErrorRatePerCycle.java | 2 +- .../varianteval/VariantEvalReportWriter.java | 2 +- .../utils/recalibration/QuantizationInfo.java | 2 +- .../sting/utils/recalibration/RecalUtils.java | 2 +- .../sting/gatk/report/GATKReportUnitTest.java | 88 ++++++++- 8 files changed, 185 insertions(+), 109 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index e69924930..1451b8cde 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -117,7 +117,7 @@ public class GATKReport { * @param numColumns the number of columns in this table */ public void addTable(final String tableName, final String tableDescription, final int numColumns) { - addTable(tableName, tableDescription, numColumns, false, false); + addTable(tableName, tableDescription, numColumns, GATKReportTable.TableSortingWay.DO_NOT_SORT); } /** @@ -126,11 +126,10 @@ public class GATKReport { * @param tableName the name of the table * @param tableDescription the description of the table * @param numColumns the number of columns in this table - * @param sortByRowID whether to sort the rows by the row ID - * @param sortByAllColumns whether to sort the rows by all columns starting from leftmost column + * @param sortingWay way to sort table */ - public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) { - GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortByRowID, sortByAllColumns); + public void addTable(final String tableName, final String tableDescription, final int numColumns, final GATKReportTable.TableSortingWay sortingWay) { + GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortingWay); tables.put(tableName, table); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 2bf7c9609..226e50f81 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -46,8 +46,7 @@ public class GATKReportTable { private final String tableName; private final String tableDescription; - private final boolean sortByRowID; - private final boolean sortByAllColumns; + private final TableSortingWay sortingWay; private List underlyingData; private final List columnInfo; @@ -73,6 +72,12 @@ public class GATKReportTable { public int index() { return index; } } + public enum TableSortingWay { + SORT_BY_ROW, + SORT_BY_COLUMN, + DO_NOT_SORT + } + protected enum TableNameHeaderFields { NAME(2), DESCRIPTION(3); @@ -107,10 +112,7 @@ public class GATKReportTable { tableDescription = (tableNameData.length <= TableNameHeaderFields.DESCRIPTION.index()) ? "" : tableNameData[TableNameHeaderFields.DESCRIPTION.index()]; // table may have no description! (and that's okay) // when reading from a file, we do not re-sort the rows - sortByRowID = false; - - // when reading from a file, we do not re-sort the rows - sortByAllColumns = false; + sortingWay = TableSortingWay.DO_NOT_SORT; // initialize the data final int nColumns = Integer.parseInt(tableData[TableDataHeaderFields.COLS.index()]); @@ -181,7 +183,7 @@ public class GATKReportTable { * @param numColumns the number of columns in this table */ public GATKReportTable(final String tableName, final String tableDescription, final int numColumns) { - this(tableName, tableDescription, numColumns, true, false); + this(tableName, tableDescription, numColumns, TableSortingWay.SORT_BY_ROW); } /** @@ -190,10 +192,9 @@ public class GATKReportTable { * @param tableName the name of the table * @param tableDescription the description of the table * @param numColumns the number of columns in this table - * @param sortByRowID whether to sort rows by the row ID (instead of the order in which they were added) - * @param sortByAllColumns whether to sort rows by all columns (instead of the order in which they were added) + * @param sortingWay in what way to sort rows (instead of the order in which they were added) */ - public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) { + public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final TableSortingWay sortingWay) { if ( !isValidName(tableName) ) { throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed."); } @@ -204,8 +205,7 @@ public class GATKReportTable { this.tableName = tableName; this.tableDescription = tableDescription; - this.sortByRowID = sortByRowID; - this.sortByAllColumns = sortByAllColumns; + this.sortingWay = sortingWay; underlyingData = new ArrayList(INITITAL_ARRAY_SIZE); columnInfo = new ArrayList(numColumns); @@ -218,7 +218,7 @@ public class GATKReportTable { * @param tableToCopy */ public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) { - this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID, tableToCopy.sortByAllColumns); + this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortingWay); for ( final GATKReportColumn column : tableToCopy.getColumnInfo() ) addColumn(column.getColumnName(), column.getFormat()); if ( copyData ) @@ -569,56 +569,53 @@ public class GATKReportTable { out.println(); // write the table body - if ( sortByAllColumns ) { - Collections.sort(underlyingData, new Comparator() { - //INVARIANT the two arrays are of the same length and corresponding elements are of the same type - @Override - public int compare(Object[] objectArr1, Object[] objectArr2) { - final int EQUAL = 0; + switch (sortingWay) { + case SORT_BY_COLUMN: + Collections.sort(underlyingData, new Comparator() { + //INVARIANT the two arrays are of the same length and corresponding elements are of the same type + @Override + public int compare(Object[] objectArr1, Object[] objectArr2) { + final int EQUAL = 0; - int result = EQUAL; + int result = EQUAL; - int l = objectArr1.length; - for (int x = 0; x < l; x++) { - if (objectArr1[x] instanceof Integer) { - result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]); + int l = objectArr1.length; + for (int x = 0; x < l; x++) { + if (objectArr1[x] instanceof Integer) { + result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]); + } else if (objectArr1[x] instanceof Double) { + result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]); + } else { // default uses String comparison + result = objectArr1[x].toString().compareTo(objectArr2[x].toString()); + } if( result != EQUAL) { return result; } - } else if (objectArr1[x] instanceof Double) { - result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]); - if( result != EQUAL) { - return result; - } - } else { // default uses String comparison - result = objectArr1[x].toString().compareTo(objectArr2[x].toString()); - if( result != EQUAL) { - return result; - } } + return result; } - return result; - } - }); - for ( final Object[] row : underlyingData ) - writeRow(out, row); - } else if ( sortByRowID ) { - // make sure that there are exactly the correct number of ID mappings - if ( rowIdToIndex.size() != underlyingData.size() ) - throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently"); + }); + for ( final Object[] row : underlyingData ) + writeRow(out, row); + break; + case SORT_BY_ROW: + // make sure that there are exactly the correct number of ID mappings + if ( rowIdToIndex.size() != underlyingData.size() ) + throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently"); - final TreeMap sortedMap; - try { - sortedMap = new TreeMap(rowIdToIndex); - } catch (ClassCastException e) { - throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types"); - } - for ( final Map.Entry rowKey : sortedMap.entrySet() ) - writeRow(out, underlyingData.get(rowKey.getValue())); - } else { - for ( final Object[] row : underlyingData ) - writeRow(out, row); - } + final TreeMap sortedMap; + try { + sortedMap = new TreeMap(rowIdToIndex); + } catch (ClassCastException e) { + throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types"); + } + for ( final Map.Entry rowKey : sortedMap.entrySet() ) + writeRow(out, underlyingData.get(rowKey.getValue())); + break; + case DO_NOT_SORT: + for ( final Object[] row : underlyingData ) + writeRow(out, row); + } out.println(); } @@ -735,53 +732,47 @@ public class GATKReportTable { } private List getOrderedRows() { - if ( sortByAllColumns ) { - Collections.sort(underlyingData, new Comparator() { - //INVARIANT the two arrays are of the same length and corresponding elements are of the same type - @Override - public int compare(Object[] objectArr1, Object[] objectArr2) { - final int EQUAL = 0; - int result = EQUAL; - - int l = objectArr1.length; - for (int x = 0; x < l; x++) { - if (objectArr1[x] instanceof Integer) { - result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]); - if( result != EQUAL) { - return result; + switch (sortingWay) { + case SORT_BY_COLUMN: + Collections.sort(underlyingData, new Comparator() { + //INVARIANT the two arrays are of the same length and corresponding elements are of the same type + @Override + public int compare(Object[] objectArr1, Object[] objectArr2) { + final int EQUAL = 0; + int result = EQUAL; + int l = objectArr1.length; + for (int x = 0; x < l; x++) { + if (objectArr1[x] instanceof Integer) { + result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]); + } else if (objectArr1[x] instanceof Double) { + result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]); + } else { // default uses String comparison + result = objectArr1[x].toString().compareTo(objectArr2[x].toString()); + } + if( result != EQUAL) { + return result; + } } - } else if (objectArr1[x] instanceof Double) { - result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]); - if( result != EQUAL) { - return result; - } - } else { // default uses String comparison - result = objectArr1[x].toString().compareTo(objectArr2[x].toString()); - if( result != EQUAL) { - return result; - } - } + return result; } - return result; + }); + return underlyingData; + case SORT_BY_ROW: + final TreeMap sortedMap; + try { + sortedMap = new TreeMap(rowIdToIndex); + } catch (ClassCastException e) { + return underlyingData; } - }); - return underlyingData; - } else if ( !sortByRowID ) { - return underlyingData; + + final List orderedData = new ArrayList(underlyingData.size()); + for ( final int rowKey : sortedMap.values() ) + orderedData.add(underlyingData.get(rowKey)); + + return orderedData; + default: + return underlyingData; } - - final TreeMap sortedMap; - try { - sortedMap = new TreeMap(rowIdToIndex); - } catch (ClassCastException e) { - return underlyingData; - } - - final List orderedData = new ArrayList(underlyingData.size()); - for ( final int rowKey : sortedMap.values() ) - orderedData.add(underlyingData.get(rowKey)); - - return orderedData; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 2f0f976fa..622413b18 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -207,7 +207,7 @@ public class RecalibrationArgumentCollection { public GATKReportTable generateReportTable(final String covariateNames) { GATKReportTable argumentsTable; if(SORT_BY_ALL_COLUMNS) { - argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, false, true); + argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, GATKReportTable.TableSortingWay.SORT_BY_COLUMN); } else { argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java index b4e781e91..5972322f8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java @@ -124,7 +124,7 @@ public class ErrorRatePerCycle extends LocusWalker { public void initialize() { report = new GATKReport(); - report.addTable(reportName, reportDescription, 6, true, false); + report.addTable(reportName, reportDescription, 6, GATKReportTable.TableSortingWay.SORT_BY_ROW); table = report.getTable(reportName); table.addColumn("readgroup"); table.addColumn("cycle"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java index 6af70811f..6dad128fe 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java @@ -162,7 +162,7 @@ public class VariantEvalReportWriter { // create the table final String tableName = ve.getSimpleName(); final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description(); - report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), true, false); + report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), GATKReportTable.TableSortingWay.SORT_BY_ROW); // grab the table, and add the columns we need to it final GATKReportTable table = report.getTable(tableName); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java index e0c1261fe..fc942499c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java @@ -70,7 +70,7 @@ public class QuantizationInfo { public GATKReportTable generateReportTable(boolean sortBycols) { GATKReportTable quantizedTable; if(sortBycols) { - quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, false, true); + quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN); } else { quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index d4e781fdd..58327b924 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -287,7 +287,7 @@ public class RecalUtils { final GATKReportTable reportTable; if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) { if(sortByCols) { - reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), false, true); + reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), GATKReportTable.TableSortingWay.SORT_BY_COLUMN); } else { reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size()); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java index d20b70b42..40d8d8ff9 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -32,6 +32,13 @@ import org.testng.annotations.Test; import java.io.File; import java.io.IOException; import java.io.PrintStream; +import java.util.Random; +import java.io.FileInputStream; +import java.io.DataInputStream; +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.util.ArrayList; + public class GATKReportUnitTest extends BaseTest { @Test @@ -77,6 +84,85 @@ public class GATKReportUnitTest extends BaseTest { Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'"); } + private GATKReportTable getTableWithRandomValues() { + Random number = new Random(123L); + final int VALUESRANGE = 10; + + GATKReport report = GATKReport.newSimpleReport("TableName", "col1", "col2", "col3"); + GATKReportTable table = new GATKReportTable("testSortingTable", "table with random values sorted by columns", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN ); + + final int NUMROWS = 100; + for (int x = 0; x < NUMROWS; x++) { + report.addRow(number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE)); + } + return table; + } + + @Test(enabled = true) + public void testSortingByColumn() { + Assert.assertEquals(isSorted(getTableWithRandomValues()), true); + } + + private boolean isSorted(GATKReportTable table) { + boolean result = true; + File testingSortingTableFile = new File("myFile.txt"); + + try { + // Connect print stream to the output stream + PrintStream ps = new PrintStream(testingSortingTableFile); + table.write(ps); + ps.close(); + } + catch (Exception e){ + System.err.println ("Error: " + e.getMessage()); + } + + ArrayList rows = new ArrayList(); + try { + // Open the file + FileInputStream fStream = new FileInputStream(testingSortingTableFile); + // Get the object of DataInputStream + DataInputStream in = new DataInputStream(fStream); + BufferedReader br = new BufferedReader(new InputStreamReader(in)); + String strLine; + //Read File Line By Line + while ((strLine = br.readLine()) != null) { + + String[] parts = strLine.split(" "); + int l = parts.length; + int[] row = new int[l]; + for(int n = 0; n < l; n++) { + row[n] = Integer.parseInt(parts[n]); + } + rows.add(row); + } + //Close the input stream + in.close(); + } catch (Exception e){//Catch exception if any + System.err.println("Error: " + e.getMessage()); + } + for (int x = 1; x < rows.size() && result; x++) { + result = checkRowOrder(rows.get(x - 1), rows.get(x)); + } + return result; + } + + private boolean checkRowOrder(int[] row1, int[] row2) { + int l = row1.length; + final int EQUAL = 0; + + int result = EQUAL; + + for(int x = 0; x < l && ( result <= EQUAL); x++) { + result = ((Integer)row1[x]).compareTo(row2[x]); + } + if (result <= EQUAL) { + return true; + } else { + return false; + } + } + private GATKReportTable makeBasicTable() { GATKReport report = GATKReport.newSimpleReport("TableName", "sample", "value"); GATKReportTable table = report.getTable("TableName"); @@ -168,7 +254,7 @@ public class GATKReportUnitTest extends BaseTest { table.set("RZ", "SomeFloat", 535646345.657453464576); table.set("RZ", "TrueFalse", true); - report1.addTable("Table3", "blah", 1, true, false); + report1.addTable("Table3", "blah", 1, GATKReportTable.TableSortingWay.SORT_BY_ROW); report1.getTable("Table3").addColumn("a"); report1.getTable("Table3").addRowIDMapping("q", 2); report1.getTable("Table3").addRowIDMapping("5", 3);