diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R index 876cf5cbc..64fbcc50a 100644 --- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R +++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R @@ -2,19 +2,19 @@ .gsa.assignGATKTableToEnvironment <- function(tableName, tableHeader, tableRows, tableEnv) { d = data.frame(tableRows, row.names=NULL, stringsAsFactors=FALSE); colnames(d) = tableHeader; - + for (i in 1:ncol(d)) { # use the general type.convert infrastructure of read.table to convert column data to R types v = type.convert(d[,i]) d[,i] = v; } - + usedNames = ls(envir=tableEnv, pattern=tableName); - + if (length(usedNames) > 0) { tableName = paste(tableName, ".", length(usedNames), sep=""); } - + assign(tableName, d, envir=tableEnv); } @@ -28,74 +28,155 @@ starts = c(1, columnStarts); stops = c(columnStarts - 1, nchar(line)); - + sapply(line, splitStartStop)[,1]; } +# Old implementaton for v0.* +gsa.read.gatkreportv0 <- function(lines) { + + tableEnv = new.env(); + + tableName = NA; + tableHeader = c(); + tableRows = c(); + version = NA; + + for (line in lines) { + if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) { + headerFields = unlist(strsplit(line, "[[:space:]]+")); + + if (!is.na(tableName)) { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + } + + tableName = headerFields[2]; + tableHeader = c(); + tableRows = c(); + + # For differences in versions see + # $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java + if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) { + version = "v0.1"; + + } else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) { + version = "v0.2"; + columnStarts = c(); + + } + + } else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) { + # do nothing + } else if (!is.na(tableName)) { + + if (version == "v0.1") { + row = unlist(strsplit(line, "[[:space:]]+")); + + } else if (version == "v0.2") { + if (length(tableHeader) == 0) { + headerChars = unlist(strsplit(line, "")); + # Find the first position of non space characters, excluding the first character + columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1); + } + + row = .gsa.splitFixedWidth(line, columnStarts); + } + + if (length(tableHeader) == 0) { + tableHeader = row; + } else { + tableRows = rbind(tableRows, row); + } + } + } + + if (!is.na(tableName)) { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + } + + gatkreport = as.list(tableEnv, all.names=TRUE); +} + +# Load all GATKReport v1 tables from file +gsa.read.gatkreportv1 <- function(lines) { + + tableEnv = new.env(); + + tableName = NA; + tableHeader = c(); + tableRows = c(); + version = ""; + headerRowCount = -1; + + for (line in lines) { + + if (length(grep("^#:GATKReport.v1", line, ignore.case=TRUE)) > 0) { + version = "v1.0"; + headerRowCount = 0; + } + + if ( (headerRowCount %% 2 == 1) && (version == "v1.0") ) { + #print("Trying to start a table with line:"); + #print(line); + + #Get table header + headerFields = unlist(strsplit(line, ":")); + + if (!is.na(tableName)) { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + } + + tableName = headerFields[3]; + tableHeader = c(); + tableRows = c(); + + columnStarts = c(); + + } + + if (length(grep("^#:GATKTable", line, ignore.case=TRUE)) > 0) { + headerRowCount = headerRowCount+1; + #print("Header Row count is at:") + #print(headerRowCount); + } else if (!is.na(tableName)) { + if ( version == "v1.0") { + if (length(tableHeader) == 0) { + headerChars = unlist(strsplit(line, "")); + # Find the first position of non space characters, excluding the first character + columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1); + } + + row = .gsa.splitFixedWidth(line, columnStarts); + } + + if (length(tableHeader) == 0) { + tableHeader = row; + } else if ( nchar(line) > 0 ) { + tableRows = rbind(tableRows, row); + } + } + } + + if (!is.na(tableName)) { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + } + + gatkreport = as.list(tableEnv, all.names=TRUE); +} + # Load all GATKReport tables from a file gsa.read.gatkreport <- function(filename) { con = file(filename, "r", blocking = TRUE); lines = readLines(con); close(con); - - tableEnv = new.env(); - - tableName = NA; - tableHeader = c(); - tableRows = c(); - version = NA; - - for (line in lines) { - if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) { - headerFields = unlist(strsplit(line, "[[:space:]]+")); - - if (!is.na(tableName)) { - .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); - } - - tableName = headerFields[2]; - tableHeader = c(); - tableRows = c(); - - # For differences in versions see - # $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java - if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) { - version = "v0.1"; - - } else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) { - version = "v0.2"; - columnStarts = c(); - - } - - } else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) { - # do nothing - } else if (!is.na(tableName)) { - - if (version == "v0.1") { - row = unlist(strsplit(line, "[[:space:]]+")); - - } else if (version == "v0.2") { - if (length(tableHeader) == 0) { - headerChars = unlist(strsplit(line, "")); - # Find the first position of non space characters, excluding the first character - columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1); - } - - row = .gsa.splitFixedWidth(line, columnStarts); - } - - if (length(tableHeader) == 0) { - tableHeader = row; - } else { - tableRows = rbind(tableRows, row); - } - } + + # get first line + line = lines[1]; + + if (length(grep("^#:GATKReport.v1", line, ignore.case=TRUE)) > 0) { + gsa.read.gatkreportv1(lines) } - - if (!is.na(tableName)) { - .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + else if (length(grep("^##:GATKReport.v0", line, ignore.case=TRUE)) > 0) { + gsa.read.gatkreportv0(lines) } - - gatkreport = as.list(tableEnv, all.names=TRUE); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index 608b5d1d0..bee6dd69e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -1,19 +1,49 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.gatk.report; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; +import org.broadinstitute.sting.utils.exceptions.UserException; -import java.io.*; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.PrintStream; import java.util.Collection; -import java.util.List; import java.util.TreeMap; /** * Container class for GATK report tables */ public class GATKReport { - public static final String GATKREPORT_HEADER_PREFIX = "##:GATKReport.v"; + public static final String GATKREPORT_HEADER_PREFIX = "#:GATKReport."; + public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_0; + public static final String SEPARATOR = ":"; + private GATKReportVersion version = LATEST_REPORT_VERSION; + private TreeMap tables = new TreeMap(); /** @@ -24,7 +54,8 @@ public class GATKReport { /** * Create a new GATKReport with the contents of a GATKReport on disk. - * @param filename the path to the file to load + * + * @param filename the path to the file to load */ public GATKReport(String filename) { this(new File(filename)); @@ -32,7 +63,8 @@ public class GATKReport { /** * Create a new GATKReport with the contents of a GATKReport on disk. - * @param file the file to load + * + * @param file the file to load */ public GATKReport(File file) { loadReport(file); @@ -40,106 +72,77 @@ public class GATKReport { /** * Load a GATKReport file from disk - * @param file the file to load + * + * @param file the file to load */ private void loadReport(File file) { try { BufferedReader reader = new BufferedReader(new FileReader(file)); - GATKReportTable table = null; - String[] header = null; - int id = 0; - GATKReportVersion version = null; - List columnStarts = null; + String reportHeader = reader.readLine(); - String line; - while ( (line = reader.readLine()) != null ) { + // Read the first line for the version and number of tables. + version = GATKReportVersion.fromHeader(reportHeader); + if (version.equals(GATKReportVersion.V0_1) || + version.equals(GATKReportVersion.V0_2)) + throw new UserException("The GATK no longer supports reading legacy GATK Reports. Please use v1.0 or newer."); - if (line.startsWith(GATKREPORT_HEADER_PREFIX)) { + int nTables = Integer.parseInt(reportHeader.split(":")[2]); - version = GATKReportVersion.fromHeader(line); + // Read each tables according ot the number of tables + for (int i = 0; i < nTables; i++) { + addTable(new GATKReportTable(reader, version)); - line = line.replaceFirst("##:GATKReport." + version.versionString + " ", ""); - String[] pieces = line.split(" : "); - - String tableName = pieces[0]; - String tableDesc = pieces[1]; - - addTable(tableName, tableDesc); - table = getTable(tableName); - table.setVersion(version); - - header = null; - columnStarts = null; - } else if ( line.trim().isEmpty() ) { - // do nothing - } else { - if (table != null) { - - String[] splitLine; - - switch (version) { - case V0_1: - splitLine = TextFormattingUtils.splitWhiteSpace(line); - break; - - case V0_2: - if (header == null) { - columnStarts = TextFormattingUtils.getWordStarts(line); - } - splitLine = TextFormattingUtils.splitFixedWidth(line, columnStarts); - break; - - default: - throw new ReviewedStingException("GATK report version parsing not implemented for: " + line); - } - - if (header == null) { - header = splitLine; - - table.addPrimaryKey("id", false); - - for ( String columnName : header ) { - table.addColumn(columnName, ""); - } - - id = 0; - } else { - for (int columnIndex = 0; columnIndex < header.length; columnIndex++) { - table.set(id, header[columnIndex], splitLine[columnIndex]); - } - - id++; - } - } + /* + if ( !blankLine.equals("") ) { + throw new StingException("The GATK Report File is corrupted or not formatted correctly"); } + */ } - } catch (FileNotFoundException e) { - throw new StingException("Cannot read GATKReport: " + e); - } catch (IOException e) { - throw new StingException("Cannot read GATKReport: " + e); + + + } catch (Exception e) { + // todo - improve exception handling + //throw new StingException("Cannot read GATKReport: " + e); + e.printStackTrace(); } } /** - * Add a new table to the collection + * Add a new, empty table to the report * - * @param tableName the name of the table - * @param tableDescription the description of the table + * @param tableName the name of the table + * @param tableDescription the description of the table */ public void addTable(String tableName, String tableDescription) { addTable(tableName, tableDescription, true); } + /** + * Add a new, empty table to the report + * + * @param tableName the name of the table + * @param tableDescription the description of the table + * @param sortByPrimaryKey whether to sort the rows by the primary key + */ public void addTable(String tableName, String tableDescription, boolean sortByPrimaryKey) { GATKReportTable table = new GATKReportTable(tableName, tableDescription, sortByPrimaryKey); tables.put(tableName, table); } + /** + * Adds a table, empty or populated, to the report + * + * @param table the table to add + */ + public void addTable(GATKReportTable table) { + tables.put(table.getTableName(), table); + } + /** * Return true if table with a given name exists * - * @param tableName the name of the table + * @param tableName the name of the table * @return true if the table exists, false otherwise */ public boolean hasTable(String tableName) { @@ -149,8 +152,8 @@ public class GATKReport { /** * Return a table with a given name * - * @param tableName the name of the table - * @return the table object + * @param tableName the name of the table + * @return the table object */ public GATKReportTable getTable(String tableName) { GATKReportTable table = tables.get(tableName); @@ -162,9 +165,10 @@ public class GATKReport { /** * Print all tables contained within this container to a PrintStream * - * @param out the PrintStream to which the tables should be written + * @param out the PrintStream to which the tables should be written */ public void print(PrintStream out) { + out.println(GATKREPORT_HEADER_PREFIX + getVersion().toString() + SEPARATOR + getTables().size()); for (GATKReportTable table : tables.values()) { if (table.getNumRows() > 0) { table.write(out); @@ -175,4 +179,157 @@ public class GATKReport { public Collection getTables() { return tables.values(); } + + /** + * This is the main function is charge of gathering the reports. It checks that the reports are compatible and then + * calls the table atheirng functions. + * + * @param input another GATKReport of the same format + */ + public void combineWith(GATKReport input) { + + if (!this.isSameFormat(input)) { + throw new ReviewedStingException("Failed to combine GATKReport, format doesn't match!"); + } + + for (String tableName : input.tables.keySet()) { + tables.get(tableName).combineWith(input.getTable(tableName)); + } + + } + + public GATKReportVersion getVersion() { + return version; + } + + public void setVersion(GATKReportVersion version) { + this.version = version; + } + + /** + * Returns whether or not the two reports have the same format, from columns, to tables, to reports, and everything + * in between. This does not check if the data inside is the same. This is the check to see if the two reports are + * gatherable or reduceable. + * + * @param report another GATK report + * @return true if the the reports are gatherable + */ + public boolean isSameFormat(GATKReport report) { + if (!version.equals(report.version)) { + return false; + } + if (!tables.keySet().equals(report.tables.keySet())) { + return false; + } + for (String tableName : tables.keySet()) { + if (!getTable(tableName).isSameFormat(report.getTable(tableName))) + return false; + } + return true; + } + + /** + * Checks that the reports are exactly the same. + * + * @param report another GATK report + * @return true if all field in the reports, tables, and columns are equal. + */ + public boolean equals(GATKReport report) { + if (!version.equals(report.version)) { + return false; + } + if (!tables.keySet().equals(report.tables.keySet())) { + return false; + } + for (String tableName : tables.keySet()) { + if (!getTable(tableName).equals(report.getTable(tableName))) + return false; + } + return true; + } + + /** + * The constructor for a simplified GATK Report. Simplified GATK report are designed for reports that do not need + * the advanced functionality of a full GATK Report. + *

+ * A simple GATK Report consists of: + *

+ * - A single table + * - No primary key ( it is hidden ) + *

+ * Optional: + * - Only untyped columns. As long as the data is an Object, it will be accepted. + * - Default column values being empty strings. + *

+ * Limitations: + *

+ * - A simple GATK report cannot contain multiple tables. + * - It cannot contain typed columns, which prevents arithmetic gathering. + * + * @param tableName The name of your simple GATK report table + * @param columns The names of the columns in your table + * @return a simplified GATK report + */ + public static GATKReport newSimpleReport(String tableName, String... columns) { + GATKReportTable table = new GATKReportTable(tableName, "A simplified GATK table report"); + table.addPrimaryKey("id", false); + + for (String column : columns) { + table.addColumn(column, ""); + } + + GATKReport output = new GATKReport(); + output.addTable(table); + + return output; + } + + /** + * This method provides an efficient way to populate a simplified GATK report. This method will only work on reports + * that qualify as simplified GATK reports. See the newSimpleReport() constructor for more information. + * + * @param values the row of data to be added to the table. + * Note: the number of arguments must match the columns in the table. + */ + public void addRow(Object... values) { + // Must be a simplified GATK Report + if (isSimpleReport()) { + + GATKReportTable table = tables.firstEntry().getValue(); + if (table.getColumns().size() != values.length) { + throw new StingException("The number of arguments in addRow() must match the number of columns in the table"); + } + + int counter = table.getNumRows() + 1; + int i = 0; + + for (String columnName : table.getColumns().keySet()) { + table.set(counter, columnName, values[i]); + i++; + } + + } else { + throw new StingException("Cannot add a Row to a non-Simplified GATK Report"); + } + + + } + + /** + * Checks if the GATK report qualifies as a "simple" GATK report + * + * @return true is the report is a simplified GATK report + */ + private boolean isSimpleReport() { + if (tables.size() != 1) + return false; + + GATKReportTable table = tables.firstEntry().getValue(); + + if (!table.getPrimaryKeyName().equals("id")) + return false; + + return true; + + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index 5a6490afe..7e64c8082 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -1,8 +1,34 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.gatk.report; import org.apache.commons.lang.math.NumberUtils; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.TreeMap; /** * Holds values for a column in a GATK report table @@ -12,27 +38,48 @@ public class GATKReportColumn extends TreeMap { final private Object defaultValue; final private String format; final private boolean display; + final private GATKReportDataType dataType; /** - * Construct the column object, specifying the column name, default value, and whether or not the column should be displayed + * Construct the column object, specifying the column name, default value, whether or not the column should be + * displayed, and the format string. This cannot be null. * - * @param columnName the name of the column - * @param defaultValue the default value of the column - * @param display if true, the column will be displayed in the final output - * @param format format string + * @param columnName the name of the column + * @param defaultValue the default value of the column + * @param display if true, the column will be displayed in the final output + * @param format format string */ public GATKReportColumn(String columnName, Object defaultValue, boolean display, String format) { this.columnName = columnName; - this.defaultValue = defaultValue; this.display = display; - this.format = format == null ? null : (format.equals("") ? null : format); + if ( format.equals("") ) { + this.format = "%s"; + this.dataType = GATKReportDataType.Unknown; + if ( defaultValue != null ) { + this.defaultValue = defaultValue; + //this.dataType = GATKReportDataType.fromObject(defaultValue); + } + else { + this.defaultValue = ""; + //this.dataType = GATKReportDataType.Unknown; + } + } + else { + this.format = format; + this.dataType = GATKReportDataType.fromFormatString(format); + if ( defaultValue == null ) { + this.defaultValue = dataType.getDefaultValue(); + } + else { + this.defaultValue = defaultValue; + } + } } - /** * Initialize an element in the column with a default value * - * @param primaryKey the primary key position in the column that should be set + * @param primaryKey the primary key position in the column that should be set */ public void initialize(Object primaryKey) { this.put(primaryKey, defaultValue); @@ -43,8 +90,8 @@ public class GATKReportColumn extends TreeMap { * tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero * values) in the table. * - * @param primaryKey the primary key position in the column that should be retrieved - * @return the value at the specified position in the column, or the default value if the element is not set + * @param primaryKey the primary key position in the column that should be retrieved + * @return the value at the specified position in the column, or the default value if the element is not set */ private Object getWithoutSideEffects(Object primaryKey) { if (!this.containsKey(primaryKey)) { @@ -57,8 +104,8 @@ public class GATKReportColumn extends TreeMap { /** * Return an object from the column, but if it doesn't exist, return the default value. * - * @param primaryKey the primary key position in the column that should be retrieved - * @return the string value at the specified position in the column, or the default value if the element is not set + * @param primaryKey the primary key position in the column that should be retrieved + * @return the string value at the specified position in the column, or the default value if the element is not set */ public String getStringValue(Object primaryKey) { return formatValue(getWithoutSideEffects(primaryKey)); @@ -68,7 +115,7 @@ public class GATKReportColumn extends TreeMap { * Return the displayable property of the column. If true, the column will be displayed in the final output. * If not, printing will be suppressed for the contents of the table. * - * @return true if the column will be displayed, false if otherwise + * @return true if the column will be displayed, false if otherwise */ public boolean isDisplayable() { return display; @@ -76,6 +123,7 @@ public class GATKReportColumn extends TreeMap { /** * Get the display width for this column. This allows the entire column to be displayed with the appropriate, fixed width. + * * @return the format string for this column */ public GATKReportColumnFormat getColumnFormat() { @@ -112,6 +160,7 @@ public class GATKReportColumn extends TreeMap { /** * Check if the value can be right aligned. Does not trim the values before checking if numeric since it assumes * the spaces mean that the value is already padded. + * * @param value to check * @return true if the value is a right alignable */ @@ -121,6 +170,7 @@ public class GATKReportColumn extends TreeMap { /** * Returns a string version of the values. + * * @param obj The object to convert to a string * @return The string representation of the column */ @@ -128,19 +178,54 @@ public class GATKReportColumn extends TreeMap { String value; if (obj == null) { value = "null"; - } else if ( format != null ) { + } else if ( dataType.equals(GATKReportDataType.Unknown) && + (obj instanceof Double || obj instanceof Float) ) { + value = String.format("%.8f", obj); + } else value = String.format(format, obj); - } else if (obj instanceof Float) { - value = String.format("%.8f", (Float) obj); - } else if (obj instanceof Double) { - value = String.format("%.8f", (Double) obj); - } else { - value = obj.toString(); - } + return value; } + public GATKReportDataType getDataType() { + return dataType; + } + + public boolean isSameFormat(GATKReportColumn that) { + return (dataType.equals(that.dataType) && + columnName.equals(that.columnName) && + display == that.display && + format.equals(that.format) && + defaultValue.equals(that.defaultValue) ); + } + + protected boolean equals(GATKReportColumn that) { + if ( !this.keySet().equals(that.keySet()) ) { + return false; + } + + for (Object key : keySet()) { + Object ValueA = this.get(key); + Object ValueB = that.get(key); + + //if the value is not equal, (use data type to get the right comparison) + if (!dataType.isEqual(ValueA, ValueB)) { + return false; + } + } + + return true; + } + public String getColumnName() { return columnName; } + + public String getFormat() { + if ( dataType.equals(GATKReportDataType.Unknown) ) { + return ""; + } + else + return format; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java index a73123b6c..ca1de49f9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -24,8 +24,6 @@ package org.broadinstitute.sting.gatk.report; -import org.broadinstitute.sting.utils.collections.Pair; - import java.util.*; /** @@ -36,6 +34,7 @@ public class GATKReportColumns extends LinkedHashMap i /** * Returns the column by index + * * @param i the index * @return The column */ @@ -59,9 +58,44 @@ public class GATKReportColumns extends LinkedHashMap i public Iterator iterator() { return new Iterator() { int offset = 0; - public boolean hasNext() { return offset < columnNames.size() ; } - public GATKReportColumn next() { return getByIndex(offset++); } - public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); } + + public boolean hasNext() { + return offset < columnNames.size(); + } + + public GATKReportColumn next() { + return getByIndex(offset++); + } + + public void remove() { + throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); + } }; } + + public boolean isSameFormat(GATKReportColumns that) { + if (!columnNames.equals(that.columnNames)) { + return false; + } + for (String columnName : columnNames) { + if (!this.get(columnName).isSameFormat(that.get(columnName))) { + return false; + } + } + return true; + } + + protected boolean equals(GATKReportColumns that) { + for (Map.Entry pair : entrySet()) { + // Make sure that every column is the same, we know that the # of columns + // is the same from isSameFormat() + String key = pair.getKey(); + + if (!get(key).equals(that.get(key))) { + return false; + } + } + + return true; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java new file mode 100644 index 000000000..414102fec --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.report; + +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Map; + +/** + * The gatherable data types acceptable in a GATK report column. + */ +public enum GATKReportDataType { + /** + * The null type should not be used. + */ + Null("Null"), + + /** + * The default value when a format string is not present + */ + Unknown("Unknown"), + + /** + * Used for boolean values. Will display as true or false in the table. + */ + Boolean("%[Bb]"), + + /** + * Used for byte and char value. Will display as a char so use printable values! + */ + Byte("%[Cc]"), + + /** + * Used for float and double values. Will output a decimal with format %.8f unless otherwise specified. + */ + Decimal("%.*[EeFf]"), + + /** + * Used for int, and long values. Will display the full number by default. + */ + Integer("%[Dd]"), + + /** + * Used for string values. Displays the string itself. + */ + String("%[Ss]"); + + public final String dataTypeString; + + private GATKReportDataType(String dataTypeString) { + this.dataTypeString = dataTypeString; + } + + private static final Map lookup = new HashMap(); + + static { + for (GATKReportDataType s : EnumSet.allOf(GATKReportDataType.class)) + lookup.put(s.dataTypeString, s); + } + + + @Override + public String toString() { + return this.dataTypeString; + } + + /** + * Returns a GATK report data type from the Object specified. It looks through the list of acceptable classes and + * returns the appropriate data type. + * + * @param object the object ot derive the data type from + * @return the appropriate data type + */ + public static GATKReportDataType fromObject(Object object) { + GATKReportDataType value; + if (object instanceof Boolean) { + value = GATKReportDataType.Boolean; + } else if (object instanceof Byte || object instanceof Character) { + value = GATKReportDataType.Byte; + } else if (object instanceof Float || object instanceof Double) { + value = GATKReportDataType.Decimal; + } else if (object instanceof Integer || object instanceof Long) { + value = GATKReportDataType.Integer; + } else if (object instanceof String) { + value = GATKReportDataType.String; + } else { + value = GATKReportDataType.Unknown; + //throw new ReviewedStingException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation."); + } + return value; + } + + /** + * Returns a GATK report data type from the format string specified. It uses regex matching from the enumerated + * Strings. + * + * @param format the format string to derive the data type from + * @return the appropriate data type + */ + public static GATKReportDataType fromFormatString(String format) { + if (format.equals("")) + return Unknown; + for (GATKReportDataType type : lookup.values()) { + if (format.matches(type.toString()) ) + return type; + } + return Unknown; + } + + /** + * Returns the default value of the data type. It returns an object that matches the class of the data type. + * + * @return an object that matches the data type + */ + public Object getDefaultValue() { + switch (this) { + case Decimal: + return 0.0D; + case Boolean: + return false; + case Byte: + return (byte) 0; + case Integer: + return 0L; + case String: + return ""; + default: + return null; + } + } + + /** + * Checks if the two objects are equal using the appropriate test form the data types. + * + * @param a an object + * @param b another object to check if equal + * @return true - the objects are equal, false - the objects are nto equal + */ + public boolean isEqual(Object a, Object b) { + switch (this) { + case Null: + return true; + case Decimal: + case Boolean: + case Integer: + return a.toString().equals(b.toString()); + case Byte: + // A mess that checks if the bytes and characters contain the same value + if ((a instanceof Character && b instanceof Character) || + (a instanceof Byte && b instanceof Byte)) + return a.toString().equals(b.toString()); + else if (a instanceof Character && b instanceof Byte) { + return ((Character) a).charValue() == ((Byte) b).byteValue(); + } else if (a instanceof Byte && b instanceof Character) { + return ((Byte) a).byteValue() == ((Character) b).charValue(); + } + case String: + default: + return a.equals(b); + } + } + + /** + * Converts an input String to the appropriate type using the data type. Used for parsing loading a GATK report from + * file. + * + * @param obj The input string + * @return an object that matches the data type. + */ + protected Object Parse(Object obj) { + if (obj instanceof String) { + String str = obj.toString(); + switch (this) { + case Decimal: + return Double.parseDouble(str); + case Boolean: + return java.lang.Boolean.parseBoolean(str); + case Integer: + return Long.parseLong(str); + case String: + return str; + case Byte: + return (byte) str.toCharArray()[0]; + default: + return str; + } + } else + return null; + } + + /** + * Returns a format string version of the value according to the data type. + * + * @return The printf string representation of the object according to data type. + */ + public String getDefaultFormatString() { + switch (this) { + case Decimal: + return "%.8f"; + case Boolean: + return "%b"; + case Integer: + return "%d"; + case String: + return "%s"; + case Byte: + return "%c"; + case Null: + default: + return "%s"; + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java new file mode 100644 index 000000000..0d15971ae --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java @@ -0,0 +1,46 @@ +package org.broadinstitute.sting.gatk.report; + +import org.broadinstitute.sting.commandline.Gatherer; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: roger + * Date: 1/9/12 + * Time: 11:17 PM + * To change this template use File | Settings | File Templates. + */ +public class GATKReportGatherer extends Gatherer { + @Override + public void gather(List inputs, File output) { + //Combines inputs GATKReport to one output + + PrintStream o; + try { + o = new PrintStream(output); + } catch (FileNotFoundException e) { + throw new UserException("File to be output by CoverageByRG Gather function was not found"); + } + + GATKReport current = new GATKReport(); + boolean isFirst = true; + for (File input : inputs) { + + // If the table is empty + if (isFirst) { + current = new GATKReport(input); + isFirst = false; + } else { + GATKReport toAdd = new GATKReport(input); + current.combineWith(toAdd); + } + } + + current.print(o); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index b59b550e1..1b5273741 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -1,8 +1,34 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.gatk.report; import org.apache.commons.lang.ObjectUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; +import java.io.BufferedReader; import java.io.PrintStream; import java.util.*; import java.util.regex.Matcher; @@ -12,12 +38,12 @@ import java.util.regex.Pattern; * A data structure that allows data to be collected over the course of a walker's computation, then have that data * written to a PrintStream such that it's human-readable, AWK-able, and R-friendly (given that you load it using the * GATKReport loader module). - * + *

* The goal of this object is to use the same data structure for both accumulating data during a walker's computation * and emitting that data to a file for easy analysis in R (or any other program/language that can take in a table of * results). Thus, all of the infrastructure below is designed simply to make printing the following as easy as * possible: - * + *

* ##:GATKReport.v0.1 ErrorRatePerCycle : The error rate per sequenced position in the reads * cycle errorrate.61PA8.7 qualavg.61PA8.7 * 0 0.007451835696110506 25.474613284804366 @@ -29,60 +55,60 @@ import java.util.regex.Pattern; * 6 5.452562704471102E-4 36.1217248908297 * 7 5.452562704471102E-4 36.1910480349345 * 8 5.452562704471102E-4 36.00345705967977 - * + *

* Here, we have a GATKReport table - a well-formatted, easy to read representation of some tabular data. Every single * table has this same GATKReport.v0.1 header, which permits multiple files from different sources to be cat-ed * together, which makes it very easy to pull tables from different programs into R via a single file. - * + *

* ------------ * Definitions: - * + *

* Table info: - * The first line, structured as - * ##: :
- * + * The first line, structured as + * ##:
:
+ *

* Table header: - * The second line, specifying a unique name for each column in the table. - * - * The first column mentioned in the table header is the "primary key" column - a column that provides the unique - * identifier for each row in the table. Once this column is created, any element in the table can be referenced by - * the row-column coordinate, i.e. "primary key"-"column name" coordinate. - * - * When a column is added to a table, a default value must be specified (usually 0). This is the initial value for - * an element in a column. This permits operations like increment() and decrement() to work properly on columns that - * are effectively counters for a particular event. - * - * Finally, the display property for each column can be set during column creation. This is useful when a given - * column stores an intermediate result that will be used later on, perhaps to calculate the value of another column. - * In these cases, it's obviously necessary to store the value required for further computation, but it's not - * necessary to actually print the intermediate column. - * + * The second line, specifying a unique name for each column in the table. + *

+ * The first column mentioned in the table header is the "primary key" column - a column that provides the unique + * identifier for each row in the table. Once this column is created, any element in the table can be referenced by + * the row-column coordinate, i.e. "primary key"-"column name" coordinate. + *

+ * When a column is added to a table, a default value must be specified (usually 0). This is the initial value for + * an element in a column. This permits operations like increment() and decrement() to work properly on columns that + * are effectively counters for a particular event. + *

+ * Finally, the display property for each column can be set during column creation. This is useful when a given + * column stores an intermediate result that will be used later on, perhaps to calculate the value of another column. + * In these cases, it's obviously necessary to store the value required for further computation, but it's not + * necessary to actually print the intermediate column. + *

* Table body: - * The values of the table itself. - * + * The values of the table itself. + *

* --------------- * Implementation: - * + *

* The implementation of this table has two components: - * 1. A TreeSet that stores all the values ever specified for the primary key. Any get() operation that - * refers to an element where the primary key object does not exist will result in its implicit creation. I - * haven't yet decided if this is a good idea... - * - * 2. A HashMap that stores a mapping from column name to column contents. Each - * GATKReportColumn is effectively a map (in fact, GATKReportColumn extends TreeMap) between - * primary key and the column value. This means that, given N columns, the primary key information is stored - * N+1 times. This is obviously wasteful and can likely be handled much more elegantly in future implementations. - * + * 1. A TreeSet that stores all the values ever specified for the primary key. Any get() operation that + * refers to an element where the primary key object does not exist will result in its implicit creation. I + * haven't yet decided if this is a good idea... + *

+ * 2. A HashMap that stores a mapping from column name to column contents. Each + * GATKReportColumn is effectively a map (in fact, GATKReportColumn extends TreeMap) between + * primary key and the column value. This means that, given N columns, the primary key information is stored + * N+1 times. This is obviously wasteful and can likely be handled much more elegantly in future implementations. + *

* ------------------------------ * Element and column operations: - * + *

* In addition to simply getting and setting values, this object also permits some simple operations to be applied to * individual elements or to whole columns. For instance, an element can be easily incremented without the hassle of * calling get(), incrementing the obtained value by 1, and then calling set() with the new value. Also, some vector * operations are supported. For instance, two whole columns can be divided and have the result be set to a third * column. This is especially useful when aggregating counts in two intermediate columns that will eventually need to * be manipulated row-by-row to compute the final column. - * + *

* Note: I've made no attempt whatsoever to make these operations efficient. Right now, some of the methods check the * type of the stored object using an instanceof call and attempt to do the right thing. Others cast the contents of * the cell to a Number, call the Number.toDouble() method and compute a result. This is clearly not the ideal design, @@ -92,12 +118,17 @@ import java.util.regex.Pattern; * @author Khalid Shakir */ public class GATKReportTable { - /** REGEX that matches any table with an invalid name */ - public final static String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]"; - private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2; + /** + * REGEX that matches any table with an invalid name + */ + public static final String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]"; + public static final String GATKTABLE_HEADER_PREFIX = "#:GATKTable"; + public static final String SEPARATOR = ":"; + public static final String ENDLINE = ":;"; + private String tableName; private String tableDescription; - private GATKReportVersion version = LATEST_REPORT_VERSION; + private String primaryKeyName; private Collection primaryKeyColumn; @@ -106,11 +137,94 @@ public class GATKReportTable { private GATKReportColumns columns; + public GATKReportTable(BufferedReader reader, GATKReportVersion version) { + try { + + int counter = 0; + + switch (version) { + case V1_0: + int nHeaders = 2; + String[] tableHeaders = new String[nHeaders]; + + // Read in the headers + for (int i = 0; i < nHeaders; i++) { + tableHeaders[i] = reader.readLine(); + } + String[] tableData = tableHeaders[0].split(":"); + String[] userData = tableHeaders[1].split(":"); + + // Fill in the fields + tableName = userData[2]; + tableDescription = userData[3]; + primaryKeyDisplay = Boolean.parseBoolean(tableData[2]); + columns = new GATKReportColumns(); + + int nColumns = Integer.parseInt(tableData[3]); + int nRows = Integer.parseInt(tableData[4]); + + + // Read column names + String columnLine = reader.readLine(); + + List columnStarts = TextFormattingUtils.getWordStarts(columnLine); + String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts); + + if (primaryKeyDisplay) { + addPrimaryKey(columnNames[0]); + + } else { + sortByPrimaryKey = true; + addPrimaryKey("id", false); + counter = 1; + } + // Put in columns using the format string from the header + for (int i = 0; i < nColumns; i++) { + String format = tableData[5 + i]; + if (primaryKeyDisplay) + addColumn(columnNames[i + 1], true, format); + else + addColumn(columnNames[i], true, format); + } + + for (int i = 0; i < nRows; i++) { + // read line + List lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(reader.readLine(), columnStarts)); + + for (int columnIndex = 0; columnIndex < nColumns; columnIndex++) { + + //Input all the remaining values + GATKReportDataType type = getColumns().getByIndex(columnIndex).getDataType(); + + if (primaryKeyDisplay) { + String columnName = columnNames[columnIndex + 1]; + String primaryKey = lineSplits.get(0); + set(primaryKey, columnName, type.Parse(lineSplits.get(columnIndex + 1))); + } else { + String columnName = columnNames[columnIndex]; + set(counter, columnName, type.Parse(lineSplits.get(columnIndex))); + } + + } + counter++; + } + + + reader.readLine(); + // When you see empty line or null, quit out + } + } catch (Exception e) { + //throw new StingException("Cannot read GATKReport: " + e); + e.printStackTrace(); + } + } + + /** * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed * - * @param name the name of the table or column - * @return true if the name is valid, false if otherwise + * @param name the name of the table or column + * @return true if the name is valid, false if otherwise */ private boolean isValidName(String name) { Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX); @@ -122,8 +236,8 @@ public class GATKReportTable { /** * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed * - * @param description the name of the table or column - * @return true if the name is valid, false if otherwise + * @param description the name of the table or column + * @return true if the name is valid, false if otherwise */ private boolean isValidDescription(String description) { Pattern p = Pattern.compile("\\r|\\n"); @@ -135,15 +249,23 @@ public class GATKReportTable { /** * Construct a new GATK report table with the specified name and description * - * @param tableName the name of the table - * @param tableDescription the description of the table + * @param tableName the name of the table + * @param tableDescription the description of the table */ public GATKReportTable(String tableName, String tableDescription) { this(tableName, tableDescription, true); } + /** + * Construct a new GATK report table with the specified name and description and whether to sort rows by the primary + * key + * + * @param tableName the name of the table + * @param tableDescription the description of the table + * @param sortByPrimaryKey whether to sort rows by the primary key (instead of order added) + */ public GATKReportTable(String tableName, String tableDescription, boolean sortByPrimaryKey) { - if (!isValidName(tableName)) { + if (!isValidName(tableName)) { throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed."); } @@ -158,28 +280,21 @@ public class GATKReportTable { columns = new GATKReportColumns(); } - public GATKReportVersion getVersion() { - return version; - } - - protected void setVersion(GATKReportVersion version) { - this.version = version; - } - /** * Add a primary key column. This becomes the unique identifier for every column in the table. * - * @param primaryKeyName the name of the primary key column + * @param primaryKeyName the name of the primary key column */ public void addPrimaryKey(String primaryKeyName) { addPrimaryKey(primaryKeyName, true); } /** - * Add an optionally visible primary key column. This becomes the unique identifier for every column in the table, and will always be printed as the first column. + * Add an optionally visible primary key column. This becomes the unique identifier for every column in the table, + * and will always be printed as the first column. * - * @param primaryKeyName the name of the primary key column - * @param display should this primary key be displayed? + * @param primaryKeyName the name of the primary key column + * @param display should this primary key be displayed? */ public void addPrimaryKey(String primaryKeyName, boolean display) { if (!isValidName(primaryKeyName)) { @@ -195,6 +310,7 @@ public class GATKReportTable { /** * Returns the first primary key matching the dotted column values. * Ex: dbsnp.eval.called.all.novel.all + * * @param dottedColumnValues Period concatenated values. * @return The first primary key matching the column values or throws an exception. */ @@ -208,6 +324,7 @@ public class GATKReportTable { /** * Returns true if there is at least on row with the dotted column values. * Ex: dbsnp.eval.called.all.novel.all + * * @param dottedColumnValues Period concatenated values. * @return true if there is at least one row matching the columns. */ @@ -218,6 +335,7 @@ public class GATKReportTable { /** * Returns the first primary key matching the dotted column values. * Ex: dbsnp.eval.called.all.novel.all + * * @param dottedColumnValues Period concatenated values. * @return The first primary key matching the column values or null. */ @@ -228,6 +346,7 @@ public class GATKReportTable { /** * Returns the first primary key matching the column values. * Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" } + * * @param columnValues column values. * @return The first primary key matching the column values. */ @@ -235,7 +354,7 @@ public class GATKReportTable { for (Object primaryKey : primaryKeyColumn) { boolean matching = true; for (int i = 0; matching && i < columnValues.length; i++) { - matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1)); + matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i + 1)); } if (matching) return primaryKey; @@ -244,29 +363,65 @@ public class GATKReportTable { } /** - * Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set. + * Add a column to the report and specify the default value that should be supplied if a given position in the table + * is never explicitly set. * - * @param columnName the name of the column - * @param defaultValue the default value for the column + * @param columnName the name of the column + * @param defaultValue the default value for the column */ public void addColumn(String columnName, Object defaultValue) { - addColumn(columnName, defaultValue, null); + addColumn(columnName, defaultValue, true); } + /** + * Add a column to the report, specify the default column value, and specify whether the column should be displayed + * in the final output (useful when intermediate columns are necessary for later calculations, but are not required + * to be in the output file. + * + * @param columnName the name of the column + * @param defaultValue the default value of the column + * @param display if true - the column will be displayed; if false - the column will be hidden + */ + public void addColumn(String columnName, Object defaultValue, boolean display) { + addColumn(columnName, defaultValue, display, ""); + } + + /** + * Add a column to the report, specify the default column value, and specify whether the column should be displayed + * in the final output (useful when intermediate columns are necessary for later calculations, but are not required + * to be in the output file. + * + * @param columnName the name of the column + * @param defaultValue the default value of the column + * @param format the format string used to display data + */ public void addColumn(String columnName, Object defaultValue, String format) { addColumn(columnName, defaultValue, true, format); } + /** - * Add a column to the report, specify the default column value, and specify whether the column should be displayed in the final output (useful when intermediate columns are necessary for later calculations, but are not required to be in the output file. + * Add a column to the report, specify whether the column should be displayed in the final output (useful when + * intermediate columns are necessary for later calculations, but are not required to be in the output file), and the + * format string used to display the data. * - * @param columnName the name of the column - * @param defaultValue the default value of the column - * @param display if true - the column will be displayed; if false - the column will be hidden + * @param columnName the name of the column + * @param display if true - the column will be displayed; if false - the column will be hidden + * @param format the format string used to display data */ - public void addColumn(String columnName, Object defaultValue, boolean display) { - addColumn(columnName, defaultValue, display, null); + public void addColumn(String columnName, boolean display, String format) { + addColumn(columnName, null, display, format); } + /** + * Add a column to the report, specify the default column value, whether the column should be displayed in the final + * output (useful when intermediate columns are necessary for later calculations, but are not required to be in the + * output file), and the format string used to display the data. + * + * @param columnName the name of the column + * @param defaultValue if true - the column will be displayed; if false - the column will be hidden + * @param display + * @param format the format string used to display data + */ public void addColumn(String columnName, Object defaultValue, boolean display, String format) { if (!isValidName(columnName)) { throw new ReviewedStingException("Attempted to set a GATKReportTable column name of '" + columnName + "'. GATKReportTable column names must be purely alphanumeric - no spaces or special characters are allowed."); @@ -274,11 +429,17 @@ public class GATKReportTable { columns.put(columnName, new GATKReportColumn(columnName, defaultValue, display, format)); } + + public GATKReportVersion getVersion() { + return GATKReport.LATEST_REPORT_VERSION; + } + + /** * Check if the requested element exists, and if not, create it. * - * @param primaryKey the primary key value - * @param columnName the name of the column + * @param primaryKey the primary key value + * @param columnName the name of the column */ private void verifyEntry(Object primaryKey, String columnName) { if (!columns.containsKey(columnName)) { @@ -303,26 +464,68 @@ public class GATKReportTable { /** * Set the value for a given position in the table * - * @param primaryKey the primary key value - * @param columnName the name of the column - * @param value the value to set + * @param primaryKey the primary key value + * @param columnName the name of the column + * @param value the value to set */ public void set(Object primaryKey, String columnName, Object value) { verifyEntry(primaryKey, columnName); + GATKReportColumn column = columns.get(columnName); + // Check if value is of same type as column - columns.get(columnName).put(primaryKey, value); + // We do not accept internal null values + if (value == null) + value = "null"; + + // This code is bs. Why am do I have to conform to bad code + // Below is some ode to convert a string into its appropriate type. + // This is just Roger ranting + + // If we got a string but the column is not a String type + Object newValue = null; + if (value instanceof String && !column.getDataType().equals(GATKReportDataType.String)) { + // Integer case + if (column.getDataType().equals(GATKReportDataType.Integer)) { + try { + newValue = Long.parseLong((String) value); + } catch (Exception e) { + } + } + if (column.getDataType().equals(GATKReportDataType.Decimal)) { + try { + newValue = Double.parseDouble((String) value); + } catch (Exception e) { + } + } + if (column.getDataType().equals(GATKReportDataType.Byte) && + ((String) value).length() == 1) { + newValue = ((String) value).charAt(0); + + } + } + + if (newValue != null) + value = newValue; + + if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || + column.getDataType().equals(GATKReportDataType.Unknown) || + value == null) + columns.get(columnName).put(primaryKey, value); + else + throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", + GATKReportDataType.fromObject(value).name(), column.getDataType().name())); } /** * Get a value from the given position in the table * - * @param primaryKey the primary key value - * @param columnName the name of the column - * @return the value stored at the specified position in the table + * @param primaryKey the primary key value + * @param columnName the name of the column + * @return the value stored at the specified position in the table */ public Object get(Object primaryKey, String columnName) { verifyEntry(primaryKey, columnName); - + return columns.get(columnName).get(primaryKey); } @@ -331,7 +534,7 @@ public class GATKReportTable { * * @param primaryKey the primary key value * @param columnIndex the index of the column - * @return the value stored at the specified position in the table + * @return the value stored at the specified position in the table */ private Object get(Object primaryKey, int columnIndex) { return columns.getByIndex(columnIndex).get(primaryKey); @@ -340,8 +543,8 @@ public class GATKReportTable { /** * Increment an element in the table. This implementation is awful - a functor would probably be better. * - * @param primaryKey the primary key value - * @param columnName the name of the column + * @param primaryKey the primary key value + * @param columnName the name of the column */ public void increment(Object primaryKey, String columnName) { Object oldValue = get(primaryKey, columnName); @@ -369,8 +572,8 @@ public class GATKReportTable { /** * Decrement an element in the table. This implementation is awful - a functor would probably be better. * - * @param primaryKey the primary key value - * @param columnName the name of the column + * @param primaryKey the primary key value + * @param columnName the name of the column */ public void decrement(Object primaryKey, String columnName) { Object oldValue = get(primaryKey, columnName); @@ -398,9 +601,9 @@ public class GATKReportTable { /** * Add the specified value to an element in the table * - * @param primaryKey the primary key value - * @param columnName the name of the column - * @param valueToAdd the value to add + * @param primaryKey the primary key value + * @param columnName the name of the column + * @param valueToAdd the value to add */ public void add(Object primaryKey, String columnName, Object valueToAdd) { Object oldValue = get(primaryKey, columnName); @@ -428,8 +631,8 @@ public class GATKReportTable { /** * Subtract the specified value from an element in the table * - * @param primaryKey the primary key value - * @param columnName the name of the column + * @param primaryKey the primary key value + * @param columnName the name of the column * @param valueToSubtract the value to subtract */ public void subtract(Object primaryKey, String columnName, Object valueToSubtract) { @@ -458,9 +661,9 @@ public class GATKReportTable { /** * Multiply the specified value to an element in the table * - * @param primaryKey the primary key value - * @param columnName the name of the column - * @param valueToMultiply the value to multiply by + * @param primaryKey the primary key value + * @param columnName the name of the column + * @param valueToMultiply the value to multiply by */ public void multiply(Object primaryKey, String columnName, Object valueToMultiply) { Object oldValue = get(primaryKey, columnName); @@ -488,9 +691,9 @@ public class GATKReportTable { /** * Divide the specified value from an element in the table * - * @param primaryKey the primary key value - * @param columnName the name of the column - * @param valueToDivide the value to divide by + * @param primaryKey the primary key value + * @param columnName the name of the column + * @param valueToDivide the value to divide by */ public void divide(Object primaryKey, String columnName, Object valueToDivide) { Object oldValue = get(primaryKey, columnName); @@ -518,9 +721,9 @@ public class GATKReportTable { /** * Add two columns to each other and set the results to a third column * - * @param columnToSet the column that should hold the results - * @param augend the column that shall be the augend - * @param addend the column that shall be the addend + * @param columnToSet the column that should hold the results + * @param augend the column that shall be the augend + * @param addend the column that shall be the addend */ public void addColumns(String columnToSet, String augend, String addend) { for (Object primaryKey : primaryKeyColumn) { @@ -536,8 +739,8 @@ public class GATKReportTable { /** * Subtract one column from another and set the results to a third column * - * @param columnToSet the column that should hold the results - * @param minuend the column that shall be the minuend (the a in a - b) + * @param columnToSet the column that should hold the results + * @param minuend the column that shall be the minuend (the a in a - b) * @param subtrahend the column that shall be the subtrahend (the b in a - b) */ public void subtractColumns(String columnToSet, String minuend, String subtrahend) { @@ -555,8 +758,8 @@ public class GATKReportTable { * Multiply two columns by each other and set the results to a third column * * @param columnToSet the column that should hold the results - * @param multiplier the column that shall be the multiplier - * @param multiplicand the column that shall be the multiplicand + * @param multiplier the column that shall be the multiplier + * @param multiplicand the column that shall be the multiplicand */ public void multiplyColumns(String columnToSet, String multiplier, String multiplicand) { for (Object primaryKey : primaryKeyColumn) { @@ -572,9 +775,9 @@ public class GATKReportTable { /** * Divide two columns by each other and set the results to a third column * - * @param columnToSet the column that should hold the results - * @param numeratorColumn the column that shall be the numerator - * @param denominatorColumn the column that shall be the denominator + * @param columnToSet the column that should hold the results + * @param numeratorColumn the column that shall be the numerator + * @param denominatorColumn the column that shall be the denominator */ public void divideColumns(String columnToSet, String numeratorColumn, String denominatorColumn) { for (Object primaryKey : primaryKeyColumn) { @@ -589,10 +792,11 @@ public class GATKReportTable { /** * Return the print width of the primary key column - * @return the width of the primary key column + * + * @return the width of the primary key column */ public int getPrimaryKeyColumnWidth() { - int maxWidth = primaryKeyName.length(); + int maxWidth = getPrimaryKeyName().length(); for (Object primaryKey : primaryKeyColumn) { int width = primaryKey.toString().length(); @@ -608,9 +812,18 @@ public class GATKReportTable { /** * Write the table to the PrintStream, formatted nicely to be human-readable, AWK-able, and R-friendly. * - * @param out the PrintStream to which the table should be written + * @param out the PrintStream to which the table should be written */ public void write(PrintStream out) { + + /* + * Table header: + * #:GATKTable:nColumns:nRows:(DataType for each column):; + * #:GATKTable:TableName:Description :; + * key colA colB + * row1 xxxx xxxxx + */ + // Get the column widths for everything HashMap columnFormats = new HashMap(); for (String columnName : columns.keySet()) { @@ -619,18 +832,30 @@ public class GATKReportTable { String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s"; // Emit the table definition - out.printf("##:GATKReport.%s %s : %s%n", LATEST_REPORT_VERSION.versionString, tableName, tableDescription); + String formatHeader = String.format(GATKTABLE_HEADER_PREFIX + ":%b:%d:%d", primaryKeyDisplay, getColumns().size(), getNumRows()); + // Add all the formats for all the columns + for (GATKReportColumn column : getColumns()) { + if (column.isDisplayable()) + formatHeader += (SEPARATOR + column.getFormat()); + } + out.println(formatHeader + ENDLINE); + out.printf(GATKTABLE_HEADER_PREFIX + ":%s:%s\n", tableName, tableDescription); + + //out.printf("#:GATKTable:%s:%s", Algorithm); + // Emit the table header, taking into account the padding requirement if the primary key is a hidden column boolean needsPadding = false; if (primaryKeyDisplay) { - out.printf(primaryKeyFormat, primaryKeyName); + out.printf(primaryKeyFormat, getPrimaryKeyName()); needsPadding = true; } for (String columnName : columns.keySet()) { if (columns.get(columnName).isDisplayable()) { - if (needsPadding) { out.printf(" "); } + if (needsPadding) { + out.printf(" "); + } out.printf(columnFormats.get(columnName).getNameFormat(), columnName); needsPadding = true; @@ -649,7 +874,9 @@ public class GATKReportTable { for (String columnName : columns.keySet()) { if (columns.get(columnName).isDisplayable()) { - if (needsPadding) { out.printf(" "); } + if (needsPadding) { + out.printf(" "); + } String value = columns.get(columnName).getStringValue(primaryKey); out.printf(columnFormats.get(columnName).getValueFormat(), value); @@ -660,7 +887,6 @@ public class GATKReportTable { out.printf("%n"); } - // Close the table out.printf("%n"); } @@ -679,4 +905,97 @@ public class GATKReportTable { public GATKReportColumns getColumns() { return columns; } + + /** + * Combines two compatible GATK report tables. This is the general function which will call the different algorithms + * necessary to gather the tables. Every column's combine algorithm is read and treated accordingly. + * + * @param input Another GATK table + */ + protected void combineWith(GATKReportTable input) { + /* + * This function is different from addRowsFrom because we will add the ability to sum,average, etc rows + * TODO: Add other combining algorithms + */ + + // Make sure the columns match AND the Primary Key + if (input.getColumns().keySet().equals(this.getColumns().keySet()) && + input.getPrimaryKeyName().equals(this.getPrimaryKeyName())) { + this.addRowsFrom(input); + } else + throw new ReviewedStingException("Failed to combine GATKReportTable, columns don't match!"); + } + + /** + * A gather algorithm that simply takes the rows from the argument, and adds them to the current table. This is the + * default gather algorithm. + * + * @param input Another GATK table to add rows from. + */ + private void addRowsFrom(GATKReportTable input) { + // add column by column + + // For every column + for (String columnKey : input.getColumns().keySet()) { + GATKReportColumn current = this.getColumns().get(columnKey); + GATKReportColumn toAdd = input.getColumns().get(columnKey); + // We want to take the current column and add all the values from input + + // The column is a map of values + for (Object rowKey : toAdd.keySet()) { + // We add every value from toAdd to the current + if (!current.containsKey(rowKey)) { + this.set(rowKey, columnKey, toAdd.get(rowKey)); + //System.out.printf("Putting row with PK: %s \n", rowKey); + } else { + + // TODO we should be able to handle combining data by adding, averaging, etc. + this.set(rowKey, columnKey, toAdd.get(rowKey)); + + System.out.printf("OVERWRITING Row with PK: %s \n", rowKey); + } + } + } + + } + + public String getPrimaryKeyName() { + return primaryKeyName; + } + + /** + * Returns whether or not the two tables have the same format including columns and everything in between. This does + * not check if the data inside is the same. This is the check to see if the two tables are gatherable or + * reduceable + * + * @param table another GATK table + * @return true if the the tables are gatherable + */ + public boolean isSameFormat(GATKReportTable table) { + //Should we add the sortByPrimaryKey as a check? + + if (!columns.isSameFormat(table.columns)) { + return false; + } + return (primaryKeyDisplay == table.primaryKeyDisplay && + primaryKeyName.equals(table.primaryKeyName) && + tableName.equals(table.tableName) && + tableDescription.equals(table.tableDescription)); + } + + /** + * Checks that the tables are exactly the same. + * + * @param table another GATK report + * @return true if all field in the reports, tables, and columns are equal. + */ + public boolean equals(GATKReportTable table) { + if (!isSameFormat(table)) { + return false; + } + return (columns.equals(table.columns) && + primaryKeyColumn.equals(table.primaryKeyColumn) && + sortByPrimaryKey == table.sortByPrimaryKey); + + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java index 5f1159a43..caac79cb5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java @@ -31,7 +31,7 @@ public enum GATKReportVersion { * Differences between other versions: * - Does not allow spaces in cells. * - Mostly fixed width but has a bug where the string width of floating point - * values was not measured correctly leading to columns that aren't aligned + * values was not measured correctly leading to columns that aren't aligned */ V0_1("v0.1"), @@ -40,7 +40,15 @@ public enum GATKReportVersion { * - Spaces allowed in cells, for example in sample names with spaces in them ex: "C507/FG-CR 6". * - Fixed width fixed for floating point values */ - V0_2("v0.2"); + V0_2("v0.2"), + + /* + * Differences between v0.x + * - Added table and report headers + * - Headers changed format, include the numbe rof tables, rows, and metadata for gathering + * - IS GATHERABLE + */ + V1_0("v1.0"); public final String versionString; @@ -53,8 +61,13 @@ public enum GATKReportVersion { return versionString; } + public boolean equals(GATKReportVersion that) { + return (versionString.equals(that.versionString)); + } + /** * Returns the GATK Report Version from the file header. + * * @param header Header from the file starting with ##:GATKReport.v[version] * @return The version as an enum. */ @@ -65,6 +78,9 @@ public enum GATKReportVersion { if (header.startsWith("##:GATKReport.v0.2 ")) return GATKReportVersion.V0_2; + if (header.startsWith("#:GATKReport.v1.0")) + return GATKReportVersion.V1_0; + throw new ReviewedStingException("Unknown GATK report version in header: " + header); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java index 41b17cc7b..2fa566c09 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java @@ -36,9 +36,14 @@ import java.io.IOException; /** * Class implementing diffnode reader for GATKReports */ + +// TODO Version check to be added at the report level + public class GATKReportDiffableReader implements DiffableReader { @Override - public String getName() { return "GATKReport"; } + public String getName() { + return "GATKReport"; + } @Override public DiffElement readFromFile(File file, int maxElementsToRead) { @@ -47,12 +52,12 @@ public class GATKReportDiffableReader implements DiffableReader { // one line reads the whole thing into memory GATKReport report = new GATKReport(file); - for (GATKReportTable table : report.getTables() ) { + for (GATKReportTable table : report.getTables()) { root.add(tableToNode(table, root)); } return root.getBinding(); - } catch ( Exception e ) { + } catch (Exception e) { return null; } } @@ -62,9 +67,8 @@ public class GATKReportDiffableReader implements DiffableReader { tableRoot.add("Description", table.getTableDescription()); tableRoot.add("NumberOfRows", table.getNumRows()); - tableRoot.add("Version", table.getVersion()); - for ( GATKReportColumn column : table.getColumns().values() ) { + for (GATKReportColumn column : table.getColumns().values()) { DiffNode columnRoot = DiffNode.empty(column.getColumnName(), tableRoot); columnRoot.add("Width", column.getColumnFormat().getWidth()); @@ -72,7 +76,7 @@ public class GATKReportDiffableReader implements DiffableReader { columnRoot.add("Displayable", column.isDisplayable()); int n = 1; - for ( Object elt : column.values() ) { + for (Object elt : column.values()) { String name = column.getColumnName() + n++; columnRoot.add(name, elt.toString()); } @@ -91,7 +95,7 @@ public class GATKReportDiffableReader implements DiffableReader { new FileReader(file).read(buff, 0, HEADER.length()); String firstLine = new String(buff); return firstLine.startsWith(HEADER); - } catch ( IOException e ) { + } catch (IOException e) { return false; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 1106fcb52..0eb35d299 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -317,7 +317,7 @@ public class UnifiedGenotyper extends LocusWalker> knownCNVs = null; // basic counts on various rates found - @DataPoint(description = "Number of samples") + @DataPoint(description = "Number of samples", format = "%d") public long nSamples = 0; - @DataPoint(description = "Number of processed loci") + @DataPoint(description = "Number of processed loci", format = "%d") public long nProcessedLoci = 0; - @DataPoint(description = "Number of SNPs") + @DataPoint(description = "Number of SNPs", format = "%d") public long nSNPs = 0; @DataPoint(description = "Overall TiTv ratio", format = "%.2f") public double TiTvRatio = 0; - @DataPoint(description = "SNP Novelty Rate") + @DataPoint(description = "SNP Novelty Rate", format = "%s") public String SNPNoveltyRate = "NA"; - @DataPoint(description = "Mean number of SNPs per individual") + @DataPoint(description = "Mean number of SNPs per individual", format = "%d") public long nSNPsPerSample = 0; @DataPoint(description = "Mean TiTv ratio per individual", format = "%.2f") public double TiTvRatioPerSample = 0; @DataPoint(description = "Mean depth of coverage per sample at SNPs", format = "%.1f") public double SNPDPPerSample = 0; - @DataPoint(description = "Number of Indels") + @DataPoint(description = "Number of Indels", format = "%d") public long nIndels = 0; - @DataPoint(description = "Indel Novelty Rate") + @DataPoint(description = "Indel Novelty Rate", format = "%s") public String IndelNoveltyRate = "NA"; - @DataPoint(description = "Mean number of Indels per individual") + @DataPoint(description = "Mean number of Indels per individual", format = "%d") public long nIndelsPerSample = 0; @DataPoint(description = "Mean depth of coverage per sample at Indels", format = "%.1f") public double IndelDPPerSample = 0; - @DataPoint(description = "Number of SVs") + @DataPoint(description = "Number of SVs", format = "%d") public long nSVs = 0; - @DataPoint(description = "SV Novelty Rate") + @DataPoint(description = "SV Novelty Rate", format = "%s") public String SVNoveltyRate = "NA"; - @DataPoint(description = "Mean number of SVs per individual") + @DataPoint(description = "Mean number of SVs per individual", format = "%d") public long nSVsPerSample = 0; TypeSampleMap allVariantCounts, knownVariantCounts; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index fdeb6919d..44af9f574 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -1,3 +1,27 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.gatk.walkers.varianteval.util; import org.apache.log4j.Logger; @@ -35,8 +59,8 @@ public class VariantEvalUtils { * List all of the available evaluation modules, then exit successfully */ public void listModulesAndExit() { - List> vsClasses = new PluginManager( VariantStratifier.class ).getPlugins(); - List> veClasses = new PluginManager( VariantEvaluator.class ).getPlugins(); + List> vsClasses = new PluginManager(VariantStratifier.class).getPlugins(); + List> veClasses = new PluginManager(VariantEvaluator.class).getPlugins(); logger.info("Available stratification modules:"); logger.info("(Standard modules are starred)"); @@ -58,9 +82,9 @@ public class VariantEvalUtils { /** * Initialize required, standard and user-specified stratification objects * - * @param variantEvalWalker the parent walker - * @param noStandardStrats don't use the standard stratifications - * @param modulesToUse the list of stratification modules to use + * @param variantEvalWalker the parent walker + * @param noStandardStrats don't use the standard stratifications + * @param modulesToUse the list of stratification modules to use * @return set of stratifications to use */ public TreeSet initializeStratificationObjects(VariantEvalWalker variantEvalWalker, boolean noStandardStrats, String[] modulesToUse) { @@ -246,7 +270,8 @@ public class VariantEvalUtils { field.setAccessible(true); if (!(field.get(vei) instanceof TableType)) { - table.addColumn(field.getName(), 0.0, datamap.get(field).format()); + String format = datamap.get(field).format(); + table.addColumn(field.getName(), true, format); } } } catch (InstantiationException e) { @@ -297,7 +322,6 @@ public class VariantEvalUtils { * Additional variant contexts per sample are automatically generated and added to the map unless the sample name * matches the ALL_SAMPLE_NAME constant. * - * * @param tracker the metadata tracker * @param ref the reference context * @param tracks the list of tracks to process @@ -306,57 +330,56 @@ public class VariantEvalUtils { * @param subsetBySample if false, do not separate the track into per-sample VCs * @param trackPerSample if false, don't stratify per sample (and don't cut up the VariantContext like we would need * to do this) - * * @return the mapping of track to VC list that should be populated */ public HashMap, HashMap>> - bindVariantContexts(RefMetaDataTracker tracker, - ReferenceContext ref, - List> tracks, - boolean byFilter, - boolean subsetBySample, - boolean trackPerSample, - boolean mergeTracks) { - if ( tracker == null ) + bindVariantContexts(RefMetaDataTracker tracker, + ReferenceContext ref, + List> tracks, + boolean byFilter, + boolean subsetBySample, + boolean trackPerSample, + boolean mergeTracks) { + if (tracker == null) return null; HashMap, HashMap>> bindings = new HashMap, HashMap>>(); RodBinding firstTrack = tracks.isEmpty() ? null : tracks.get(0); - for ( RodBinding track : tracks ) { + for (RodBinding track : tracks) { HashMap> mapping = new HashMap>(); - for ( VariantContext vc : tracker.getValues(track, ref.getLocus()) ) { + for (VariantContext vc : tracker.getValues(track, ref.getLocus())) { // First, filter the VariantContext to represent only the samples for evaluation VariantContext vcsub = vc; - if ( subsetBySample && vc.hasGenotypes() && vc.hasGenotypes(variantEvalWalker.getSampleNamesForEvaluation()) ) { + if (subsetBySample && vc.hasGenotypes() && vc.hasGenotypes(variantEvalWalker.getSampleNamesForEvaluation())) { vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation()); } - if ( (byFilter || !vcsub.isFiltered()) ) { + if ((byFilter || !vcsub.isFiltered())) { addMapping(mapping, VariantEvalWalker.getAllSampleName(), vcsub); } // Now, if stratifying, split the subsetted vc per sample and add each as a new context - if ( vc.hasGenotypes() && trackPerSample ) { - for ( String sampleName : variantEvalWalker.getSampleNamesForEvaluation() ) { + if (vc.hasGenotypes() && trackPerSample) { + for (String sampleName : variantEvalWalker.getSampleNamesForEvaluation()) { VariantContext samplevc = getSubsetOfVariantContext(vc, sampleName); - if ( byFilter || !samplevc.isFiltered() ) { + if (byFilter || !samplevc.isFiltered()) { addMapping(mapping, sampleName, samplevc); } } } } - if ( mergeTracks && bindings.containsKey(firstTrack) ) { + if (mergeTracks && bindings.containsKey(firstTrack)) { // go through each binding of sample -> value and add all of the bindings from this entry HashMap> firstMapping = bindings.get(firstTrack); - for ( Map.Entry> elt : mapping.entrySet() ) { + for (Map.Entry> elt : mapping.entrySet()) { Collection firstMappingSet = firstMapping.get(elt.getKey()); - if ( firstMappingSet != null ) { + if (firstMappingSet != null) { firstMappingSet.addAll(elt.getValue()); } else { firstMapping.put(elt.getKey(), elt.getValue()); @@ -371,7 +394,7 @@ public class VariantEvalUtils { } private void addMapping(HashMap> mappings, String sample, VariantContext vc) { - if ( !mappings.containsKey(sample) ) + if (!mappings.containsKey(sample)) mappings.put(sample, new ArrayList(1)); mappings.get(sample).add(vc); } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index a96cbffc5..90b5630b6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -128,11 +128,11 @@ public class MathUtils { return big + MathUtils.jacobianLogTable[ind]; } - public static double sum(Collection numbers) { + public static double sum(Collection numbers) { return sum(numbers, false); } - public static double sum(Collection numbers, boolean ignoreNan) { + public static double sum(Collection numbers, boolean ignoreNan) { double sum = 0; for (Number n : numbers) { if (!ignoreNan || !Double.isNaN(n.doubleValue())) { @@ -152,8 +152,8 @@ public class MathUtils { return size; } - public static double average(Collection x) { - return (double) sum(x) / x.size(); + public static double average(Collection x) { + return sum(x) / x.size(); } public static double average(Collection numbers, boolean ignoreNan) { @@ -1100,13 +1100,6 @@ public class MathUtils { return getQScoreOrderStatistic(reads, offsets, (int) Math.floor(reads.size() / 2.)); } - public static long sum(Collection x) { - long sum = 0; - for (int v : x) - sum += v; - return sum; - } - /** * A utility class that computes on the fly average and standard deviation for a stream of numbers. * The number of observations does not have to be known in advance, and can be also very big (so that diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java index b3b9ab555..124bda7bc 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -29,43 +29,47 @@ import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; + public class GATKReportUnitTest extends BaseTest { @Test(enabled = false) public void testParse() throws Exception { - String reportPath = validationDataLocation + "exampleGATKReport.eval"; + String reportPath = validationDataLocation + "exampleGATKReportv1.tbl"; GATKReport report = new GATKReport(reportPath); GATKReportTable countVariants = report.getTable("CountVariants"); - Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1); + //Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1); Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all"); Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000"); Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872"); GATKReportTable validationReport = report.getTable("ValidationReport"); - Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1); + //Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1); Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known"); Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN"); } @DataProvider(name = "rightAlignValues") public Object[][] getRightAlignValues() { - return new Object[][] { - new Object[] {null, true}, - new Object[] {"null", true}, - new Object[] {"NA", true}, - new Object[] {"0", true}, - new Object[] {"0.0", true}, - new Object[] {"-0", true}, - new Object[] {"-0.0", true}, - new Object[] {String.valueOf(Long.MAX_VALUE), true}, - new Object[] {String.valueOf(Long.MIN_VALUE), true}, - new Object[] {String.valueOf(Float.MIN_NORMAL), true}, - new Object[] {String.valueOf(Double.MAX_VALUE), true}, - new Object[] {String.valueOf(Double.MIN_VALUE), true}, - new Object[] {String.valueOf(Double.POSITIVE_INFINITY), true}, - new Object[] {String.valueOf(Double.NEGATIVE_INFINITY), true}, - new Object[] {String.valueOf(Double.NaN), true}, - new Object[] {"hello", false} + return new Object[][]{ + new Object[]{null, true}, + new Object[]{"null", true}, + new Object[]{"NA", true}, + new Object[]{"0", true}, + new Object[]{"0.0", true}, + new Object[]{"-0", true}, + new Object[]{"-0.0", true}, + new Object[]{String.valueOf(Long.MAX_VALUE), true}, + new Object[]{String.valueOf(Long.MIN_VALUE), true}, + new Object[]{String.valueOf(Float.MIN_NORMAL), true}, + new Object[]{String.valueOf(Double.MAX_VALUE), true}, + new Object[]{String.valueOf(Double.MIN_VALUE), true}, + new Object[]{String.valueOf(Double.POSITIVE_INFINITY), true}, + new Object[]{String.valueOf(Double.NEGATIVE_INFINITY), true}, + new Object[]{String.valueOf(Double.NaN), true}, + new Object[]{"hello", false} }; } @@ -73,4 +77,109 @@ public class GATKReportUnitTest extends BaseTest { public void testIsRightAlign(String value, boolean expected) { Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'"); } + + @Test + public void testSimpleGATKReport() { + GATKReport report = GATKReport.newSimpleReport("TableName", "a", "b", "Roger", "is", "Awesome"); + report.addRow("a", 'F', 12, 23.45, true); + report.addRow("ans", '3', 24.5, 456L, 2345); + report.addRow("hi", null, null, "", 2.3); + + //report.print(System.out); + + try { + File file = createTempFile("GATKReportGatherer-UnitTest", ".tbl"); + //System.out.format("The temporary file" + " has been created: %s%n", file); + PrintStream ps = new PrintStream(file); + report.print(ps); + //System.out.println("File succesfully outputed!"); + GATKReport inputRead = new GATKReport(file); + //System.out.println("File succesfully read!"); + //inputRead.print(System.out); + Assert.assertTrue(report.isSameFormat(inputRead)); + + } catch (IOException x) { + System.err.format("IOException: %s%n", x); + } + + } + + @Test + public void testGATKReportGatherer() { + boolean displayPK = false; + + GATKReport report1, report2, report3; + report1 = new GATKReport(); + report1.addTable("TableName", "Description"); + report1.getTable("TableName").addPrimaryKey("id", displayPK); + report1.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); + report1.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); + report1.getTable("TableName").set(1, "colA", "NotNum"); + report1.getTable("TableName").set(1, "colB", (byte) 64); + + report2 = new GATKReport(); + report2.addTable("TableName", "Description"); + report2.getTable("TableName").addPrimaryKey("id", displayPK); + report2.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); + report2.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); + report2.getTable("TableName").set(2, "colA", "df3"); + report2.getTable("TableName").set(2, "colB", 'A'); + + report3 = new GATKReport(); + report3.addTable("TableName", "Description"); + report3.getTable("TableName").addPrimaryKey("id", displayPK); + report3.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); + report3.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); + report3.getTable("TableName").set(3, "colA", "df5f"); + report3.getTable("TableName").set(3, "colB", 'c'); + + report1.combineWith(report2); + report1.combineWith(report3); + + report1.addTable("Table2", "To contain some more data types"); + GATKReportTable table = report1.getTable("Table2"); + table.addPrimaryKey("KEY"); + table.addColumn("SomeInt", GATKReportDataType.Integer.getDefaultValue(), true, "%d"); + table.addColumn("SomeFloat", GATKReportDataType.Decimal.getDefaultValue(), true, "%.16E"); + table.addColumn("TrueFalse", false, true, "%B"); + table.set("12df", "SomeInt", 34); + table.set("12df", "SomeFloat", 34.0); + table.set("12df", "TrueFalse", true); + table.set("5f", "SomeInt", -1); + table.set("5f", "SomeFloat", 0.000003); + table.set("5f", "TrueFalse", false); + table.set("RZ", "SomeInt", 904948230958203958L); + table.set("RZ", "SomeFloat", 535646345.657453464576); + table.set("RZ", "TrueFalse", true); + + report1.addTable("Table3", "blah"); + report1.getTable("Table3").addPrimaryKey("HAI"); + report1.getTable("Table3").addColumn("a", true, GATKReportDataType.String.getDefaultFormatString()); + report1.getTable("Table3").set("q", "a", "34"); + report1.getTable("Table3").set("5", "a", "c4g34"); + report1.getTable("Table3").set("573s", "a", "fDlwueg"); + report1.getTable("Table3").set("ZZZ", "a", "Dfs"); + + //report1.print(System.out); + + + try { + File file = createTempFile("GATKReportGatherer-UnitTest", ".tbl"); + //System.out.format("The temporary file" + " has been created: %s%n", file); + PrintStream ps = new PrintStream(file); + report1.print(ps); + //System.out.println("File succesfully outputed!"); + GATKReport inputRead = new GATKReport(file); + //System.out.println("File succesfully read!"); + //inputRead.print(System.out); + Assert.assertTrue(report1.isSameFormat(inputRead)); + Assert.assertTrue(report1.equals(inputRead)); + + } catch (IOException x) { + System.err.format("IOException: %s%n", x); + } + + //Assert.assertEquals(1,1); + + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java index accb9c0cf..7c705de18 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java @@ -35,7 +35,7 @@ public class ErrorRatePerCycleIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s", 1, - Arrays.asList("0cc212ecb6df300e321784039ff29f13")); + Arrays.asList("71685716c7dde64c51bbd908c06ea742")); executeTest("ErrorRatePerCycle:", spec); } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupPropertiesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupPropertiesIntegrationTest.java index 1a4c8db30..0f3750abd 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupPropertiesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupPropertiesIntegrationTest.java @@ -38,7 +38,7 @@ public class ReadGroupPropertiesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T ReadGroupProperties -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-11,000,000 -o %s", 1, - Arrays.asList("6b8cce223af28cbadcfe87a3b841fc56")); + Arrays.asList("3f1f97a1d2c5fb552ed4f33ea30d136d")); executeTest("ReadGroupProperties:", spec); } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java index 9b79653c6..408849c78 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -50,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest { @DataProvider(name = "data") public Object[][] createData() { - new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "da3dc85a0e35a9aade5520591891b4fa"); - new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "7dc8200730313e6753237a696296fb73"); + new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "dac62fcd25e1052bf18b5707700dda7e"); + new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "e10c48dd294fb257802d4e73bb50580d"); return TestParams.getTests(TestParams.class); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 36c093e8f..454843859 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -1,3 +1,27 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.gatk.walkers.varianteval; import org.broadinstitute.sting.WalkerTest; @@ -30,7 +54,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("c8d8bffa5c572df9dec7364f71a1b943") + Arrays.asList("add8b2213c091a41f5d7a2c8dd68c03a") ); executeTest("testFunctionClassWithSnpeff", spec); } @@ -50,7 +74,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("081fcaa532c7ba8f23da739389e6f7c3") + Arrays.asList("621a712deb01e7fc7e5a13d3627b11ba") ); executeTest("testStratifySamplesAndExcludeMonomorphicSites", spec); } @@ -70,7 +94,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("b3852f84d07c270b8a12874083c3e31b") + Arrays.asList("94fb8cba9e236131c6fbf1d7fee738fe") ); executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); } @@ -91,7 +115,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("cf70468b5ebaec408419da69b0a7fcb9") + Arrays.asList("670979268b05c3024297ba98d67d89ab") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @@ -113,7 +137,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("5e3b8b85acfc41365c8208c23abf746b") + Arrays.asList("c38ce9c872a76ae7dd26c3e353bf0765") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } @@ -134,7 +158,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("ccdbc50d30ece6d0d3b199c397f03ed3") + Arrays.asList("2c37f23bf6114a2b27f21ed445806fd2") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); } @@ -155,7 +179,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("95c690d5af8ed51573eb2f0503dcd9c2") + Arrays.asList("206f0d629de9af0b97340cb22d34a81b") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); } @@ -176,7 +200,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("8e8547eb38b34bec0095b0500fd9641d") + Arrays.asList("bd869725429deae8f56175ba9a8ab390") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); } @@ -197,7 +221,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("158a4651a656aea7f84c79548f6fe519") + Arrays.asList("9c7f6783a57ad681bb754b5e71de27dc") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); } @@ -220,7 +244,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("76c8a0b28d2993644120f7afa5833ab2") + Arrays.asList("a2d280440aa3771937f3d2d10f1eea74") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); } @@ -245,7 +269,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("34682193f458b93b39efac00b4fc6723") + Arrays.asList("2925d811dd521beb00059f8c8e818d83") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); } @@ -264,7 +288,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("52f6655f1532bcea24b402010d93ce73") + Arrays.asList("4b79bf2dfd73ddac0ceb0838a352bf9a") ); executeTest("testFundamentalsCountVariantsNoCompRod", spec); } @@ -277,7 +301,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + " --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("c49e239292704447a36e01ee9a71e729")); + 1, Arrays.asList("c2a4b0266c509944eafe6220fd8389da")); executeTestParallel("testSelect1", spec); } @@ -287,7 +311,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG --eval:VCF3 " + validationDataLocation + vcfFile + " --comp:VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", 1, - Arrays.asList("9a56c20a7b9a554a7b530f2cb1dd776d")); + Arrays.asList("70da6a0f91a9f1052d68fc360cc99aed")); executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); } @@ -298,14 +322,14 @@ public class VariantEvalIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s -mvq 0 -noST", 1, - Arrays.asList("66e72c887124f40933d32254b2dd44a3")); + Arrays.asList("03581adcb4f2f7960662fc7ffd910f43")); executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec); } @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("fa13eb59892892c07711c6ffe31bf870")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("19cde5078dd7284c95be4797695d3200")); executeTestParallel("testCompVsEvalAC",spec); } @@ -323,7 +347,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompOverlap() { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals --comp:comphapmap " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf --eval " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9002023b8aa8fc2c9aac58b8a79bca1e")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("00241ce70476187a2f910606b9242697")); executeTestParallel("testCompOverlap",spec); } @@ -335,7 +359,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --dbsnp " + b37dbSNP132 + " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e42cda858649a35eaa9d14ea2d70a956")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5ac240e33082887264e07be7de0f095f")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -347,7 +371,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9561cb4c7aa36dcf30ba253385299859")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("4dec767b6e7f2743eef89e586faab948")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -364,13 +388,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -noST -noEV -ST Novelty -EV CompOverlap" + " -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("61052c19211e7eb61fbbb62db5e40b56")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("15f6a6ba4f7fed49c617589ce9fdcbc5")); executeTestParallel("testMultipleCompTracks",spec); } @Test public void testPerSampleAndSubsettedSampleHaveSameResults1() { - String md5 = "0edded1cd578db62fa296c99c34a909d"; + String md5 = "bcf55537db0762b8fd68f7f02439c475"; WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( @@ -425,7 +449,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("ee22604616b3e9fc48a6dcbbf73a056d") + Arrays.asList("0c632b5be8a54e43afa576510b40c4da") ); executeTest("testAlleleCountStrat", spec); } @@ -446,7 +470,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("240369cd651c77e05e8a6659f4a6237e") + Arrays.asList("92404820a94e7cfb854ae73450a0fbd9") ); executeTest("testIntervalStrat", spec); } @@ -463,7 +487,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("397b0e77459b9b69d2e0dd1dac320c3c") + Arrays.asList("8cb8a393a0176e4df4290af7817c8647") ); executeTest("testModernVCFWithLargeIndels", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java index 16b6c97d0..4db2c7f6f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, The Broad Institute + * Copyright (c) 2012, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -98,7 +98,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest { " -EV CompOverlap -noEV -noST" + " -o %s", 1, - Arrays.asList("addf5f4596ddacef40808f6d3d281111") + Arrays.asList("666036d38f224d7c95b46a8d7197fe68") ); executeTest("testVCFStreamingChain", selectTestSpec); diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 7d4771c4b..3dc953361 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -321,7 +321,7 @@ class GATKResourcesBundle extends QScript { } class LiftOverPerl(@Input val in: File, @Output val out: File, @Input val chain: File, oldRef: Reference, newRef: Reference) extends CommandLineFunction { - this.memoryLimit = 8 + this.memoryLimit = 12 def commandLine = ("%s -vcf %s -chain %s -out %s " + "-gatk ./ -newRef %s -oldRef %s -tmp %s").format(liftOverPerl, in.getAbsolutePath, chain, out.getAbsolutePath, newRef.file.replace(".fasta", ""),