diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index bee6dd69e..ff0c39f41 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -41,10 +41,10 @@ import java.util.TreeMap; public class GATKReport { public static final String GATKREPORT_HEADER_PREFIX = "#:GATKReport."; public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_0; - public static final String SEPARATOR = ":"; + private static final String SEPARATOR = ":"; private GATKReportVersion version = LATEST_REPORT_VERSION; - private TreeMap tables = new TreeMap(); + private final TreeMap tables = new TreeMap(); /** * Create a new, empty GATKReport. @@ -70,6 +70,15 @@ public class GATKReport { loadReport(file); } + /** + * Create a new GATK report from GATK report tables + * @param tables Any number of tables that you want ot add to the report + */ + public GATKReport(GATKReportTable... tables) { + for( GATKReportTable table: tables) + addTable(table); + } + /** * Load a GATKReport file from disk * @@ -202,10 +211,6 @@ public class GATKReport { return version; } - public void setVersion(GATKReportVersion version) { - this.version = version; - } - /** * Returns whether or not the two reports have the same format, from columns, to tables, to reports, and everything * in between. This does not check if the data inside is the same. This is the check to see if the two reports are diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index 7e64c8082..9a7c4ced0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -199,7 +199,7 @@ public class GATKReportColumn extends TreeMap { defaultValue.equals(that.defaultValue) ); } - protected boolean equals(GATKReportColumn that) { + boolean equals(GATKReportColumn that) { if ( !this.keySet().equals(that.keySet()) ) { return false; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnFormat.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnFormat.java index 6d19a83aa..79ae9b8bd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnFormat.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumnFormat.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -29,8 +29,8 @@ package org.broadinstitute.sting.gatk.report; */ public class GATKReportColumnFormat { public static enum Alignment { LEFT, RIGHT } - public int width; - public Alignment alignment; + private final int width; + private final Alignment alignment; public GATKReportColumnFormat(int width, Alignment alignment) { this.width = width; diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java index ca1de49f9..bb6e3a4f1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java @@ -24,13 +24,15 @@ package org.broadinstitute.sting.gatk.report; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + import java.util.*; /** * Tracks a linked list of GATKReportColumn in order by name. */ public class GATKReportColumns extends LinkedHashMap implements Iterable { - private List columnNames = new ArrayList(); + private final List columnNames = new ArrayList(); /** * Returns the column by index @@ -43,9 +45,12 @@ public class GATKReportColumns extends LinkedHashMap i } @Override - public GATKReportColumn remove(Object key) { - columnNames.remove(key); - return super.remove(key); + public GATKReportColumn remove(Object columnName) { + if ( !(columnName instanceof String) ) { + throw new ReviewedStingException("The column name must be a String!"); + } + columnNames.remove(columnName.toString()); + return super.remove(columnName); } @Override @@ -85,7 +90,7 @@ public class GATKReportColumns extends LinkedHashMap i return true; } - protected boolean equals(GATKReportColumns that) { + boolean equals(GATKReportColumns that) { for (Map.Entry pair : entrySet()) { // Make sure that every column is the same, we know that the # of columns // is the same from isSameFormat() diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java index 414102fec..d9bae19c7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java @@ -67,7 +67,7 @@ public enum GATKReportDataType { */ String("%[Ss]"); - public final String dataTypeString; + private final String dataTypeString; private GATKReportDataType(String dataTypeString) { this.dataTypeString = dataTypeString; @@ -189,7 +189,7 @@ public enum GATKReportDataType { * @param obj The input string * @return an object that matches the data type. */ - protected Object Parse(Object obj) { + Object Parse(Object obj) { if (obj instanceof String) { String str = obj.toString(); switch (this) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java index 0d15971ae..ff1f9b90c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java @@ -1,3 +1,27 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.gatk.report; import org.broadinstitute.sting.commandline.Gatherer; @@ -8,13 +32,6 @@ import java.io.FileNotFoundException; import java.io.PrintStream; import java.util.List; -/** - * Created by IntelliJ IDEA. - * User: roger - * Date: 1/9/12 - * Time: 11:17 PM - * To change this template use File | Settings | File Templates. - */ public class GATKReportGatherer extends Gatherer { @Override public void gather(List inputs, File output) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 1b5273741..81d7d7710 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -34,97 +34,14 @@ import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; -/** - * A data structure that allows data to be collected over the course of a walker's computation, then have that data - * written to a PrintStream such that it's human-readable, AWK-able, and R-friendly (given that you load it using the - * GATKReport loader module). - *

- * The goal of this object is to use the same data structure for both accumulating data during a walker's computation - * and emitting that data to a file for easy analysis in R (or any other program/language that can take in a table of - * results). Thus, all of the infrastructure below is designed simply to make printing the following as easy as - * possible: - *

- * ##:GATKReport.v0.1 ErrorRatePerCycle : The error rate per sequenced position in the reads - * cycle errorrate.61PA8.7 qualavg.61PA8.7 - * 0 0.007451835696110506 25.474613284804366 - * 1 0.002362777171937477 29.844949954504095 - * 2 9.087604507451836E-4 32.87590975254731 - * 3 5.452562704471102E-4 34.498999090081895 - * 4 9.087604507451836E-4 35.14831665150137 - * 5 5.452562704471102E-4 36.07223435225619 - * 6 5.452562704471102E-4 36.1217248908297 - * 7 5.452562704471102E-4 36.1910480349345 - * 8 5.452562704471102E-4 36.00345705967977 - *

- * Here, we have a GATKReport table - a well-formatted, easy to read representation of some tabular data. Every single - * table has this same GATKReport.v0.1 header, which permits multiple files from different sources to be cat-ed - * together, which makes it very easy to pull tables from different programs into R via a single file. - *

- * ------------ - * Definitions: - *

- * Table info: - * The first line, structured as - * ##: :
- *

- * Table header: - * The second line, specifying a unique name for each column in the table. - *

- * The first column mentioned in the table header is the "primary key" column - a column that provides the unique - * identifier for each row in the table. Once this column is created, any element in the table can be referenced by - * the row-column coordinate, i.e. "primary key"-"column name" coordinate. - *

- * When a column is added to a table, a default value must be specified (usually 0). This is the initial value for - * an element in a column. This permits operations like increment() and decrement() to work properly on columns that - * are effectively counters for a particular event. - *

- * Finally, the display property for each column can be set during column creation. This is useful when a given - * column stores an intermediate result that will be used later on, perhaps to calculate the value of another column. - * In these cases, it's obviously necessary to store the value required for further computation, but it's not - * necessary to actually print the intermediate column. - *

- * Table body: - * The values of the table itself. - *

- * --------------- - * Implementation: - *

- * The implementation of this table has two components: - * 1. A TreeSet that stores all the values ever specified for the primary key. Any get() operation that - * refers to an element where the primary key object does not exist will result in its implicit creation. I - * haven't yet decided if this is a good idea... - *

- * 2. A HashMap that stores a mapping from column name to column contents. Each - * GATKReportColumn is effectively a map (in fact, GATKReportColumn extends TreeMap) between - * primary key and the column value. This means that, given N columns, the primary key information is stored - * N+1 times. This is obviously wasteful and can likely be handled much more elegantly in future implementations. - *

- * ------------------------------ - * Element and column operations: - *

- * In addition to simply getting and setting values, this object also permits some simple operations to be applied to - * individual elements or to whole columns. For instance, an element can be easily incremented without the hassle of - * calling get(), incrementing the obtained value by 1, and then calling set() with the new value. Also, some vector - * operations are supported. For instance, two whole columns can be divided and have the result be set to a third - * column. This is especially useful when aggregating counts in two intermediate columns that will eventually need to - * be manipulated row-by-row to compute the final column. - *

- * Note: I've made no attempt whatsoever to make these operations efficient. Right now, some of the methods check the - * type of the stored object using an instanceof call and attempt to do the right thing. Others cast the contents of - * the cell to a Number, call the Number.toDouble() method and compute a result. This is clearly not the ideal design, - * but at least the prototype contained herein works. - * - * @author Kiran Garimella - * @author Khalid Shakir - */ public class GATKReportTable { /** * REGEX that matches any table with an invalid name */ public static final String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]"; - public static final String GATKTABLE_HEADER_PREFIX = "#:GATKTable"; - public static final String SEPARATOR = ":"; - public static final String ENDLINE = ":;"; + private static final String GATKTABLE_HEADER_PREFIX = "#:GATKTable"; + private static final String SEPARATOR = ":"; + private static final String ENDLINE = ":;"; private String tableName; private String tableDescription; @@ -418,8 +335,8 @@ public class GATKReportTable { * output file), and the format string used to display the data. * * @param columnName the name of the column - * @param defaultValue if true - the column will be displayed; if false - the column will be hidden - * @param display + * @param defaultValue the default value of a blank cell + * @param display if true - the column will be displayed; if false - the column will be hidden * @param format the format string used to display data */ public void addColumn(String columnName, Object defaultValue, boolean display, String format) { @@ -429,12 +346,6 @@ public class GATKReportTable { columns.put(columnName, new GATKReportColumn(columnName, defaultValue, display, format)); } - - public GATKReportVersion getVersion() { - return GATKReport.LATEST_REPORT_VERSION; - } - - /** * Check if the requested element exists, and if not, create it. * @@ -508,8 +419,7 @@ public class GATKReportTable { value = newValue; if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || - column.getDataType().equals(GATKReportDataType.Unknown) || - value == null) + column.getDataType().equals(GATKReportDataType.Unknown) ) columns.get(columnName).put(primaryKey, value); else throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", @@ -795,7 +705,7 @@ public class GATKReportTable { * * @return the width of the primary key column */ - public int getPrimaryKeyColumnWidth() { + int getPrimaryKeyColumnWidth() { int maxWidth = getPrimaryKeyName().length(); for (Object primaryKey : primaryKeyColumn) { @@ -814,7 +724,7 @@ public class GATKReportTable { * * @param out the PrintStream to which the table should be written */ - public void write(PrintStream out) { + void write(PrintStream out) { /* * Table header: @@ -912,7 +822,7 @@ public class GATKReportTable { * * @param input Another GATK table */ - protected void combineWith(GATKReportTable input) { + void combineWith(GATKReportTable input) { /* * This function is different from addRowsFrom because we will add the ability to sum,average, etc rows * TODO: Add other combining algorithms diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java index caac79cb5..99381cc21 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -50,7 +50,7 @@ public enum GATKReportVersion { */ V1_0("v1.0"); - public final String versionString; + private final String versionString; private GATKReportVersion(String versionString) { this.versionString = versionString; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 2159bc839..3f4b4805f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -244,7 +244,8 @@ public class DiffEngine { table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString()); } - table.write(params.out); + GATKReport output = new GATKReport(table); + output.print(params.out); } protected static int longestCommonPostfix(String[] diffPath1, String[] diffPath2) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java index 124bda7bc..90c92189e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -80,11 +80,15 @@ public class GATKReportUnitTest extends BaseTest { @Test public void testSimpleGATKReport() { - GATKReport report = GATKReport.newSimpleReport("TableName", "a", "b", "Roger", "is", "Awesome"); - report.addRow("a", 'F', 12, 23.45, true); - report.addRow("ans", '3', 24.5, 456L, 2345); - report.addRow("hi", null, null, "", 2.3); + // Create a new simple GATK report named "TableName" with columns: Roger, is, and Awesome + GATKReport report = GATKReport.newSimpleReport("TableName", "Roger", "is", "Awesome"); + // Add data to simple GATK report + report.addRow( 12, 23.45, true); + report.addRow("ans", '3', 24.5); + report.addRow("hi", "", 2.3); + + // Print the report to console //report.print(System.out); try {