GATKReport v0.2:

- Floating point column widths are measured correctly
- Using fixed width columns instead of white space separated which allows spaces embedded in cell values
- Legacy support for parsing white space separated v0.1 tables where the columns may not be fixed width
- Enforcing that table descriptions do not contain newlines so that tables can be parsed correctly
Replaced GATKReportTableParser with existing functionality in GATKReport
This commit is contained in:
Khalid Shakir 2011-08-03 00:24:47 -04:00
parent d3437e62da
commit 5dcac7b064
16 changed files with 532 additions and 247 deletions

View File

@ -20,6 +20,20 @@
assign(tableName, d, envir=tableEnv);
}
# Read a fixed width line of text into a list.
.gsa.splitFixedWidth <- function(line, columnStarts) {
splitStartStop <- function(x) {
x = substring(x, starts, stops);
x = gsub("^[[:space:]]+|[[:space:]]+$", "", x);
x;
}
starts = c(1, columnStarts);
stops = c(columnStarts - 1, nchar(line));
sapply(line, splitStartStop)[,1];
}
# Load all GATKReport tables from a file
gsa.read.gatkreport <- function(filename) {
con = file(filename, "r", blocking = TRUE);
@ -31,9 +45,10 @@ gsa.read.gatkreport <- function(filename) {
tableName = NA;
tableHeader = c();
tableRows = c();
version = NA;
for (line in lines) {
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) {
headerFields = unlist(strsplit(line, "[[:space:]]+"));
if (!is.na(tableName)) {
@ -43,13 +58,37 @@ gsa.read.gatkreport <- function(filename) {
tableName = headerFields[2];
tableHeader = c();
tableRows = c();
# For differences in versions see
# $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
version = "v0.1";
} else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) {
version = "v0.2";
columnStarts = c();
}
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
# do nothing
} else if (!is.na(tableName)) {
row = unlist(strsplit(line, "[[:space:]]+"));
if (version == "v0.1") {
row = unlist(strsplit(line, "[[:space:]]+"));
} else if (version == "v0.2") {
if (length(tableHeader) == 0) {
headerChars = unlist(strsplit(line, ""));
# Find the first position of non space characters, excluding the first character
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
}
row = .gsa.splitFixedWidth(line, columnStarts);
}
if (length(tableHeader) == 0) {
tableHeader = row;
tableHeader = row;
} else {
tableRows = rbind(tableRows, row);
}

View File

@ -1,21 +1,23 @@
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.io.*;
import java.util.List;
import java.util.TreeMap;
/**
* Container class for GATK report tables
*/
public class GATKReport {
private TreeMap<String, GATKReportTable> tables;
private TreeMap<String, GATKReportTable> tables = new TreeMap<String, GATKReportTable>();
/**
* Create a new, empty GATKReport.
*/
public GATKReport() {
tables = new TreeMap<String, GATKReportTable>();
}
/**
@ -23,7 +25,7 @@ public class GATKReport {
* @param filename the path to the file to load
*/
public GATKReport(String filename) {
loadReport(new File(filename));
this(new File(filename));
}
/**
@ -31,7 +33,6 @@ public class GATKReport {
* @param file the file to load
*/
public GATKReport(File file) {
tables = new TreeMap<String, GATKReportTable>();
loadReport(file);
}
@ -46,11 +47,17 @@ public class GATKReport {
GATKReportTable table = null;
String[] header = null;
int id = 0;
GATKReportVersion version = null;
List<Integer> columnStarts = null;
String line;
while ( (line = reader.readLine()) != null ) {
if (line.startsWith("##:GATKReport.v0.1 ")) {
line = line.replaceFirst("##:GATKReport.v0.1 ", "");
if (line.startsWith("##:GATKReport.v")) {
version = GATKReportVersion.fromHeader(line);
line = line.replaceFirst("##:GATKReport." + version.versionString + " ", "");
String[] pieces = line.split(" : ");
String tableName = pieces[0];
@ -58,14 +65,35 @@ public class GATKReport {
addTable(tableName, tableDesc);
table = getTable(tableName);
table.setVersion(version);
header = null;
} else if ( line.isEmpty() ) {
columnStarts = null;
} else if ( line.trim().isEmpty() ) {
// do nothing
} else {
if (table != null) {
String[] splitLine;
switch (version) {
case V0_1:
splitLine = TextFormattingUtils.splitWhiteSpace(line);
break;
case V0_2:
if (header == null) {
columnStarts = TextFormattingUtils.getWordStarts(line);
}
splitLine = TextFormattingUtils.splitFixedWidth(line, columnStarts);
break;
default:
throw new ReviewedStingException("GATK report version parsing not implemented for: " + line);
}
if (header == null) {
header = line.split("\\s+");
header = splitLine;
table.addPrimaryKey("id", false);
@ -75,10 +103,8 @@ public class GATKReport {
id = 0;
} else {
String[] entries = line.split("\\s+");
for (int columnIndex = 0; columnIndex < header.length; columnIndex++) {
table.set(id, header[columnIndex], entries[columnIndex]);
table.set(id, header[columnIndex], splitLine[columnIndex]);
}
id++;
@ -125,7 +151,10 @@ public class GATKReport {
* @return the table object
*/
public GATKReportTable getTable(String tableName) {
return tables.get(tableName);
GATKReportTable table = tables.get(tableName);
if (table == null)
throw new ReviewedStingException("Table is not in GATKReport: " + tableName);
return table;
}
/**

View File

@ -37,10 +37,10 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
* tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero
* values) in the table.
*
* @param primaryKey the primary key position in the column that should be set
* @param primaryKey the primary key position in the column that should be retrieved
* @return the value at the specified position in the column, or the default value if the element is not set
*/
public Object getWithoutSideEffects(Object primaryKey) {
private Object getWithoutSideEffects(Object primaryKey) {
if (!this.containsKey(primaryKey)) {
return defaultValue;
}
@ -48,6 +48,16 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
return this.get(primaryKey);
}
/**
* Return an object from the column, but if it doesn't exist, return the default value.
*
* @param primaryKey the primary key position in the column that should be retrieved
* @return the string value at the specified position in the column, or the default value if the element is not set
*/
public String getStringValue(Object primaryKey) {
return toString(getWithoutSideEffects(primaryKey));
}
/**
* Return the displayable property of the column. If true, the column will be displayed in the final output.
* If not, printing will be suppressed for the contents of the table.
@ -67,7 +77,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
for (Object obj : this.values()) {
if (obj != null) {
int width = obj.toString().length();
int width = toString(obj).length();
if (width > maxWidth) {
maxWidth = width;
@ -77,4 +87,23 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
return maxWidth;
}
/**
* Returns a string version of the values.
* @param obj The object to convert to a string
* @return The string representation of the column
*/
private static String toString(Object obj) {
String value;
if (obj == null) {
value = "null";
} else if (obj instanceof Float) {
value = String.format("%.8f", (Float) obj);
} else if (obj instanceof Double) {
value = String.format("%.8f", (Double) obj);
} else {
value = obj.toString();
}
return value;
}
}

View File

@ -24,26 +24,32 @@
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.*;
import java.io.File;
/**
* Tracks a linked list of GATKReportColumn in order by name.
*/
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
private List<String> columnNames = new ArrayList<String>();
public class GATKReportParserUnitTest extends BaseTest {
@Test
public void testParse() throws Exception {
GATKReportParser parser = new GATKReportParser();
parser.parse(new File(validationDataLocation + "exampleGATKReport.eval"));
/**
* Returns the column by index
* @param i the index
* @return The column
*/
public GATKReportColumn getByIndex(int i) {
return get(columnNames.get(i));
}
Assert.assertEquals(parser.getValue("CountVariants", "none.eval.none.all", "nProcessedLoci"), "100000");
Assert.assertEquals(parser.getValue("CountVariants", "none.eval.none.all", "nNoCalls"), "99872");
@Override
public GATKReportColumn remove(Object key) {
columnNames.remove(key);
return super.remove(key);
}
Assert.assertEquals(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2", "AC"), "2");
Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2.bad", "AC"));
Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2", "AC.bad"));
Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics.bad", "none.eval.none.novel.ac2", "AC"));
Assert.assertEquals(parser.getValue("ValidationReport", "none.eval.none.known", "sensitivity"), "NaN");
@Override
public GATKReportColumn put(String key, GATKReportColumn value) {
columnNames.add(key);
return super.put(key, value);
}
}

View File

@ -1,83 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
public class GATKReportParser {
private List<GATKReportTableParser> tables = new ArrayList<GATKReportTableParser>();
public void parse(File file) throws IOException {
InputStream stream = FileUtils.openInputStream(file);
try {
parse(stream);
} finally {
IOUtils.closeQuietly(stream);
}
}
public void parse(InputStream input) throws IOException {
GATKReportTableParser table = null;
for (String line: new XReadLines(input)) {
if (line.startsWith("##:GATKReport.v0.1 ")) {
table = newTableParser(line);
tables.add(table);
table.parse(line);
} else if (table != null) {
if (line.trim().length() == 0)
table = null;
else
table.parse(line);
}
}
}
public String getValue(String tableName, String[] key, String column) {
for (GATKReportTableParser table: tables)
if (table.getTableName().equals(tableName))
return table.getValue(key, column);
return null;
}
public String getValue(String tableName, String key, String column) {
for (GATKReportTableParser table: tables)
if (table.getTableName().equals(tableName))
return table.getValue(key, column);
return null;
}
private GATKReportTableParser newTableParser(String header) {
return new GATKReportTableParser();
}
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.ObjectUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.PrintStream;
@ -88,17 +89,20 @@ import java.util.regex.Pattern;
* but at least the prototype contained herein works.
*
* @author Kiran Garimella
* @author Khalid Shakir
*/
public class GATKReportTable {
private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2;
private String tableName;
private String tableDescription;
private GATKReportVersion version = LATEST_REPORT_VERSION;
private String primaryKeyName;
private Collection<Object> primaryKeyColumn;
private boolean primaryKeyDisplay;
boolean sortByPrimaryKey = true;
private boolean sortByPrimaryKey = true;
private LinkedHashMap<String, GATKReportColumn> columns;
private GATKReportColumns columns;
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
@ -113,6 +117,19 @@ public class GATKReportTable {
return !m.find();
}
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
*
* @param description the name of the table or column
* @return true if the name is valid, false if otherwise
*/
private boolean isValidDescription(String description) {
Pattern p = Pattern.compile("\\r|\\n");
Matcher m = p.matcher(description);
return !m.find();
}
/**
* Construct a new GATK report table with the specified name and description
*
@ -128,11 +145,23 @@ public class GATKReportTable {
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
}
if (!isValidDescription(tableDescription)) {
throw new ReviewedStingException("Attempted to set a GATKReportTable description of '" + tableDescription + "'. GATKReportTable descriptions must not contain newlines.");
}
this.tableName = tableName;
this.tableDescription = tableDescription;
this.sortByPrimaryKey = sortByPrimaryKey;
columns = new LinkedHashMap<String, GATKReportColumn>();
columns = new GATKReportColumns();
}
public GATKReportVersion getVersion() {
return version;
}
protected void setVersion(GATKReportVersion version) {
this.version = version;
}
/**
@ -161,6 +190,57 @@ public class GATKReportTable {
primaryKeyDisplay = display;
}
/**
* Returns the first primary key matching the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
* @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or throws an exception.
*/
public Object getPrimaryKey(String dottedColumnValues) {
Object key = findPrimaryKey(dottedColumnValues);
if (key == null)
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues);
return key;
}
/**
* Returns true if there is at least on row with the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
* @param dottedColumnValues Period concatenated values.
* @return true if there is at least one row matching the columns.
*/
public boolean containsPrimaryKey(String dottedColumnValues) {
return findPrimaryKey(dottedColumnValues) != null;
}
/**
* Returns the first primary key matching the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
* @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or null.
*/
private Object findPrimaryKey(String dottedColumnValues) {
return findPrimaryKey(dottedColumnValues.split("\\."));
}
/**
* Returns the first primary key matching the column values.
* Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" }
* @param columnValues column values.
* @return The first primary key matching the column values.
*/
private Object findPrimaryKey(Object[] columnValues) {
for (Object primaryKey : primaryKeyColumn) {
boolean matching = true;
for (int i = 0; matching && i < columnValues.length; i++) {
matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1));
}
if (matching)
return primaryKey;
}
return null;
}
/**
* Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set.
*
@ -230,6 +310,17 @@ public class GATKReportTable {
return columns.get(columnName).get(primaryKey);
}
/**
* Get a value from the given position in the table
*
* @param primaryKey the primary key value
* @param columnIndex the index of the column
* @return the value stored at the specified position in the table
*/
private Object get(Object primaryKey, int columnIndex) {
return columns.getByIndex(columnIndex).get(primaryKey);
}
/**
* Increment an element in the table. This implementation is awful - a functor would probably be better.
*
@ -515,7 +606,7 @@ public class GATKReportTable {
String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s";
// Emit the table definition
out.printf("##:GATKReport.v0.1 %s : %s%n", tableName, tableDescription);
out.printf("##:GATKReport.%s %s : %s%n", LATEST_REPORT_VERSION.versionString, tableName, tableDescription);
// Emit the table header, taking into account the padding requirement if the primary key is a hidden column
boolean needsPadding = false;
@ -545,22 +636,8 @@ public class GATKReportTable {
for (String columnName : columns.keySet()) {
if (columns.get(columnName).isDisplayable()) {
Object obj = columns.get(columnName).getWithoutSideEffects(primaryKey);
if (needsPadding) { out.printf(" "); }
String value = "null";
if (obj != null) {
if (obj instanceof Float) {
value = String.format("%.8f", (Float) obj);
} else if (obj instanceof Double) {
value = String.format("%.8f", (Double) obj);
} else {
value = obj.toString();
}
}
//out.printf(columnWidths.get(columnName), obj == null ? "null" : obj.toString());
String value = columns.get(columnName).getStringValue(primaryKey);
out.printf(columnWidths.get(columnName), value);
needsPadding = true;

View File

@ -1,75 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.StringUtils;
import java.util.*;
public class GATKReportTableParser {
private int lineNum = 0;
private String[] descriptions;
private Map<String, Integer> headers = new HashMap<String, Integer>();
private List<String[]> values = new ArrayList<String[]>();
public void parse(String line) {
lineNum++;
switch (lineNum) {
case 1:
descriptions = parseLine(line);
case 2:
String[] columnHeaders = parseLine(line);
for (int i = 0; i < columnHeaders.length; i++)
headers.put(columnHeaders[i], i);
default:
values.add(parseLine(line));
}
}
public String getTableName() {
return descriptions[1];
}
public String getValue(String[] key, String column) {
if (!headers.containsKey(column))
return null;
for (String[] row: values)
if (Arrays.equals(key, Arrays.copyOfRange(row, 1, key.length + 1)))
return row[headers.get(column)];
return null;
}
public String getValue(String key, String column) {
return getValue(key.split("\\."), column);
}
private String generateKey(String[] row, int i) {
return StringUtils.join(row, ".", 0, i);
}
private String[] parseLine(String line) {
return line.split(" +");
}
}

View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
public enum GATKReportVersion {
/**
* Differences between other versions:
* - Does not allow spaces in cells.
* - Mostly fixed width but has a bug where the string width of floating point
* values was not measured correctly leading to columns that aren't aligned
*/
V0_1("v0.1"),
/**
* Differences between other versions:
* - Spaces allowed in cells, for example in sample names with spaces in them ex: "C507/FG-CR 6".
* - Fixed width fixed for floating point values
*/
V0_2("v0.2");
public final String versionString;
private GATKReportVersion(String versionString) {
this.versionString = versionString;
}
@Override
public String toString() {
return versionString;
}
/**
* Returns the GATK Report Version from the file header.
* @param header Header from the file starting with ##:GATKReport.v[version]
* @return The version as an enum.
*/
public static GATKReportVersion fromHeader(String header) {
if (header.startsWith("##:GATKReport.v0.1 "))
return GATKReportVersion.V0_1;
if (header.startsWith("##:GATKReport.v0.2 "))
return GATKReportVersion.V0_2;
throw new ReviewedStingException("Unknown GATK report version in header: " + header);
}
}

View File

@ -235,7 +235,7 @@ public class DiffEngine {
// now that we have a specific list of values we want to show, display them
GATKReport report = new GATKReport();
final String tableName = "diffences";
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false);
report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false);
GATKReportTable table = report.getTable(tableName);
table.addPrimaryKey("Difference", true);
table.addColumn("NumberOfOccurrences", 0);

View File

@ -116,4 +116,57 @@ public class TextFormattingUtils {
return bundle;
}
/**
* Returns the word starting positions within line, excluding the first position 0.
* The returned list is compatible with splitFixedWidth.
* @param line Text to parse.
* @return the word starting positions within line, excluding the first position 0.
*/
public static List<Integer> getWordStarts(String line) {
if (line == null)
throw new ReviewedStingException("line is null");
List<Integer> starts = new ArrayList<Integer>();
int stop = line.length();
for (int i = 1; i < stop; i++)
if (Character.isWhitespace(line.charAt(i-1)))
if(!Character.isWhitespace(line.charAt(i)))
starts.add(i);
return starts;
}
/**
* Parses a fixed width line of text.
* @param line Text to parse.
* @param columnStarts the column starting positions within line, excluding the first position 0.
* @return The parsed string array with each entry trimmed.
*/
public static String[] splitFixedWidth(String line, List<Integer> columnStarts) {
if (line == null)
throw new ReviewedStingException("line is null");
if (columnStarts == null)
throw new ReviewedStingException("columnStarts is null");
int startCount = columnStarts.size();
String[] row = new String[startCount + 1];
if (startCount == 0) {
row[0] = line.trim();
} else {
row[0] = line.substring(0, columnStarts.get(0)).trim();
for (int i = 1; i < startCount; i++)
row[i] = line.substring(columnStarts.get(i - 1), columnStarts.get(i)).trim();
row[startCount] = line.substring(columnStarts.get(startCount - 1)).trim();
}
return row;
}
/**
* Parses a line of text by whitespace.
* @param line Text to parse.
* @return The parsed string array.
*/
public static String[] splitWhiteSpace(String line) {
if (line == null)
throw new ReviewedStingException("line is null");
return line.trim().split("\\s+");
}
}

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
public class GATKReportUnitTest extends BaseTest {
@Test
public void testParse() throws Exception {
String reportPath = validationDataLocation + "exampleGATKReport.eval";
GATKReport report = new GATKReport(reportPath);
GATKReportTable countVariants = report.getTable("CountVariants");
Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1);
Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all");
Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000");
Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872");
GATKReportTable validationReport = report.getTable("ValidationReport");
Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1);
Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known");
Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN");
GATKReportTable simpleMetricsByAC = report.getTable("SimpleMetricsByAC.metrics");
Assert.assertEquals(simpleMetricsByAC.getVersion(), GATKReportVersion.V0_1);
Object simpleMetricsByACPK = simpleMetricsByAC.getPrimaryKey("none.eval.none.novel.ac2");
Assert.assertEquals(simpleMetricsByAC.get(simpleMetricsByACPK, "AC"), "2");
Assert.assertFalse(simpleMetricsByAC.containsPrimaryKey("none.eval.none.novel.ac2.bad"));
}
}

View File

@ -30,8 +30,6 @@ import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class DiffObjectsIntegrationTest extends WalkerTest {
private class TestParams extends TestDataProvider {
@ -52,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
@DataProvider(name = "data")
public Object[][] createData() {
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "4d9f4636de05b93c354d05011264546e");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "37e6efd833b5cd6d860a9df3df9713fc");
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "92311de76dda3f38aac289d807ef23d0");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "0c69412c385fda50210f2a612e1ffe4a");
return TestParams.getTests(TestParams.class);
}

View File

@ -4,8 +4,6 @@ import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
public class VariantEvalIntegrationTest extends WalkerTest {
private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval";
@ -45,7 +43,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2")
Arrays.asList("bced1842c78fbabb089dd12b7087050d")
);
executeTest("testFundamentalsCountVariantsSNPsandIndels", spec);
}
@ -66,7 +64,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525")
Arrays.asList("06510bd37ffaa39e817ca0dcaf8f8ac2")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
}
@ -88,7 +86,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd")
Arrays.asList("19c5b1b6396921c5b1059a2849ae4fcc")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
}
@ -109,7 +107,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("677fe398643e62a10d6739d36a720a12")
Arrays.asList("a71f8d81cf166cd97ac628092650964a")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec);
}
@ -130,7 +128,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd")
Arrays.asList("4dabe0658232f6174188515db6dfe112")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec);
}
@ -151,7 +149,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2")
Arrays.asList("3340587f10ceff83e5567ddfd1a9a60e")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec);
}
@ -172,7 +170,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9")
Arrays.asList("c730c7ee31c8138cef6efd8dd04fbbfc")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec);
}
@ -195,7 +193,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8")
Arrays.asList("2559ca8f454b03e81561f6947f79df18")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec);
}
@ -220,7 +218,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa")
Arrays.asList("23aa5f97641d2fd033095f21c51d2f37")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec);
}
@ -239,7 +237,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("d44c8f44384189a09eea85a8e89d7299")
Arrays.asList("a69dd3f06903b3f374c6d6f010c653e0")
);
executeTest("testFundamentalsCountVariantsNoCompRod", spec);
}
@ -249,7 +247,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
String extraArgs = "-L 1:1-10,000,000";
for (String tests : testsEnumerations) {
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("cdbe47ea01b9dd79ff1c5ce6f5fa8bec"));
1, Arrays.asList("db95c8af8ba549d38ca6741a59fd6892"));
executeTestParallel("testSelect1", spec);
}
}
@ -260,14 +258,14 @@ public class VariantEvalIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
1,
Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1"));
Arrays.asList("96f27163f16bb945f19c6623cd6db34e"));
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
}
@Test
public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d1932be3748fcf6da77dc51aec323710"));
executeTestParallel("testCompVsEvalAC",spec);
}
@ -278,14 +276,14 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testTranches() {
String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe"));
executeTestParallel("testTranches",spec);
}
@Test
public void testCompOverlap() {
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("462d4784dd55294ef9d5118217b157a5"));
executeTestParallel("testCompOverlap",spec);
}
@ -299,7 +297,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -D " + dbsnp +
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5b1fc9a4066aca61f1b5f7b933ad37d9"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("61c36fb6cc75172e2b22a44edeae85e0"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
}
@ -313,7 +311,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6d902d9d4d8fef5219a43e416a51cee6"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("79089484097614b7ab81bbc3ad3a892a"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
}
@ -330,13 +328,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -noST -noEV -ST Novelty -EV CompOverlap" +
" -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("55a1c53bced20701c56accfc3eb782a7"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9f906c04a4553d649b51ae67e0a25113"));
executeTestParallel("testMultipleCompTracks",spec);
}
@Test
public void testPerSampleAndSubsettedSampleHaveSameResults() {
String md5 = "454a1750fd36525f24172b21af5f49de";
String md5 = "97a16a99a43d2384cfabc39d36647419";
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
@ -391,7 +389,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("bf324e4c87fe0d21170fcd2a67a20371")
Arrays.asList("44464fe7c89a56cf128a932ef640f7da")
);
executeTest("testAlleleCountStrat", spec);
}

View File

@ -98,7 +98,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
" -EV CompOverlap -noEV -noST" +
" -o %s",
1,
Arrays.asList("f60729c900bc8368717653b3fad80d1e") //"f60729c900bc8368717653b3fad80d1e"
Arrays.asList("ea09bf764adba9765b99921c5ba2c709")
);
executeTest("testVCFStreamingChain", selectTestSpec);

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.text;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.Collections;
public class TextFormattingUtilsUnitTest extends BaseTest {
@Test(expectedExceptions = ReviewedStingException.class)
public void testSplitWhiteSpaceNullLine() {
TextFormattingUtils.splitWhiteSpace(null);
}
@Test
public void testSplitWhiteSpace() {
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz"), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz"), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace(" foo bar baz"), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace(" foo bar baz "), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz "), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("\tfoo\tbar\tbaz\t"), new String[]{"foo", "bar", "baz"});
}
@Test(expectedExceptions = ReviewedStingException.class)
public void testGetWordStartsNullLine() {
TextFormattingUtils.getWordStarts(null);
}
@Test
public void testGetWordStarts() {
Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz"), Arrays.asList(4, 8));
Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz"), Arrays.asList(5, 10));
Assert.assertEquals(TextFormattingUtils.getWordStarts(" foo bar baz"), Arrays.asList(1, 5, 9));
Assert.assertEquals(TextFormattingUtils.getWordStarts(" foo bar baz "), Arrays.asList(1, 5, 9));
Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz "), Arrays.asList(4, 8));
Assert.assertEquals(TextFormattingUtils.getWordStarts("\tfoo\tbar\tbaz\t"), Arrays.asList(1, 5, 9));
}
@Test(expectedExceptions = ReviewedStingException.class)
public void testSplitFixedWidthNullLine() {
TextFormattingUtils.splitFixedWidth(null, Collections.<Integer>emptyList());
}
@Test(expectedExceptions = ReviewedStingException.class)
public void testSplitFixedWidthNullColumnStarts() {
TextFormattingUtils.splitFixedWidth("foo bar baz", null);
}
@Test
public void testSplitFixedWidth() {
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz", Arrays.asList(4, 8)), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz", Arrays.asList(5, 10)), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" foo bar baz", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" foo bar baz ", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz ", Arrays.asList(4, 8)), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("\tfoo\tbar\tbaz\t", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("f o b r b z", Arrays.asList(4, 8)), new String[] { "f o", "b r", "b z" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" f o b r b z", Arrays.asList(4, 8)), new String[] { "f o", "b r", "b z" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" f o b r b z", Arrays.asList(4, 8)), new String[] { "f", "o b", "r b z" });
}
}

View File

@ -34,8 +34,8 @@ import org.broadinstitute.sting.BaseTest
import org.broadinstitute.sting.MD5DB
import org.broadinstitute.sting.queue.QCommandLine
import org.broadinstitute.sting.queue.util.{Logging, ProcessController}
import java.io.{FileNotFoundException, File}
import org.broadinstitute.sting.gatk.report.GATKReportParser
import java.io.File
import org.broadinstitute.sting.gatk.report.GATKReport
import org.apache.commons.io.FileUtils
import org.broadinstitute.sting.queue.engine.CommandLinePluginManager
@ -118,12 +118,11 @@ object PipelineTest extends BaseTest with Logging {
// write the report to the shared validation data location
val formatter = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss")
val reportLocation = "%s%s/%s/validation.%s.eval".format(validationReportsDataLocation, jobRunner, name, formatter.format(new Date))
val report = new File(reportLocation)
val reportFile = new File(reportLocation)
FileUtils.copyFile(new File(runDir(name, jobRunner) + evalSpec.evalReport), report);
FileUtils.copyFile(new File(runDir(name, jobRunner) + evalSpec.evalReport), reportFile);
val parser = new GATKReportParser
parser.parse(report)
val report = new GATKReport(reportFile);
var allInRange = true
@ -131,7 +130,9 @@ object PipelineTest extends BaseTest with Logging {
println(name + " validation values:")
println(" value (min,target,max) table key metric")
for (validation <- evalSpec.validations) {
val value = parser.getValue(validation.table, validation.key, validation.metric)
val table = report.getTable(validation.table)
val key = table.getPrimaryKey(validation.key)
val value = String.valueOf(table.get(key, validation.metric))
val inRange = if (value == null) false else validation.inRange(value)
val flag = if (!inRange) "*" else " "
println(" %s %s (%s,%s,%s) %s %s %s".format(flag, value, validation.min, validation.target, validation.max, validation.table, validation.key, validation.metric))