Merge branch 'master' of ssh://gsa1.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Eric Banks 2012-03-13 09:28:16 -04:00
commit ed69f4ff7c
28 changed files with 1624 additions and 499 deletions

View File

@ -2,19 +2,19 @@
.gsa.assignGATKTableToEnvironment <- function(tableName, tableHeader, tableRows, tableEnv) {
d = data.frame(tableRows, row.names=NULL, stringsAsFactors=FALSE);
colnames(d) = tableHeader;
for (i in 1:ncol(d)) {
# use the general type.convert infrastructure of read.table to convert column data to R types
v = type.convert(d[,i])
d[,i] = v;
}
usedNames = ls(envir=tableEnv, pattern=tableName);
if (length(usedNames) > 0) {
tableName = paste(tableName, ".", length(usedNames), sep="");
}
assign(tableName, d, envir=tableEnv);
}
@ -28,74 +28,155 @@
starts = c(1, columnStarts);
stops = c(columnStarts - 1, nchar(line));
sapply(line, splitStartStop)[,1];
}
# Old implementaton for v0.*
gsa.read.gatkreportv0 <- function(lines) {
tableEnv = new.env();
tableName = NA;
tableHeader = c();
tableRows = c();
version = NA;
for (line in lines) {
if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) {
headerFields = unlist(strsplit(line, "[[:space:]]+"));
if (!is.na(tableName)) {
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
}
tableName = headerFields[2];
tableHeader = c();
tableRows = c();
# For differences in versions see
# $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
version = "v0.1";
} else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) {
version = "v0.2";
columnStarts = c();
}
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
# do nothing
} else if (!is.na(tableName)) {
if (version == "v0.1") {
row = unlist(strsplit(line, "[[:space:]]+"));
} else if (version == "v0.2") {
if (length(tableHeader) == 0) {
headerChars = unlist(strsplit(line, ""));
# Find the first position of non space characters, excluding the first character
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
}
row = .gsa.splitFixedWidth(line, columnStarts);
}
if (length(tableHeader) == 0) {
tableHeader = row;
} else {
tableRows = rbind(tableRows, row);
}
}
}
if (!is.na(tableName)) {
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
}
gatkreport = as.list(tableEnv, all.names=TRUE);
}
# Load all GATKReport v1 tables from file
gsa.read.gatkreportv1 <- function(lines) {
tableEnv = new.env();
tableName = NA;
tableHeader = c();
tableRows = c();
version = "";
headerRowCount = -1;
for (line in lines) {
if (length(grep("^#:GATKReport.v1", line, ignore.case=TRUE)) > 0) {
version = "v1.0";
headerRowCount = 0;
}
if ( (headerRowCount %% 2 == 1) && (version == "v1.0") ) {
#print("Trying to start a table with line:");
#print(line);
#Get table header
headerFields = unlist(strsplit(line, ":"));
if (!is.na(tableName)) {
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
}
tableName = headerFields[3];
tableHeader = c();
tableRows = c();
columnStarts = c();
}
if (length(grep("^#:GATKTable", line, ignore.case=TRUE)) > 0) {
headerRowCount = headerRowCount+1;
#print("Header Row count is at:")
#print(headerRowCount);
} else if (!is.na(tableName)) {
if ( version == "v1.0") {
if (length(tableHeader) == 0) {
headerChars = unlist(strsplit(line, ""));
# Find the first position of non space characters, excluding the first character
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
}
row = .gsa.splitFixedWidth(line, columnStarts);
}
if (length(tableHeader) == 0) {
tableHeader = row;
} else if ( nchar(line) > 0 ) {
tableRows = rbind(tableRows, row);
}
}
}
if (!is.na(tableName)) {
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
}
gatkreport = as.list(tableEnv, all.names=TRUE);
}
# Load all GATKReport tables from a file
gsa.read.gatkreport <- function(filename) {
con = file(filename, "r", blocking = TRUE);
lines = readLines(con);
close(con);
tableEnv = new.env();
tableName = NA;
tableHeader = c();
tableRows = c();
version = NA;
for (line in lines) {
if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) {
headerFields = unlist(strsplit(line, "[[:space:]]+"));
if (!is.na(tableName)) {
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
}
tableName = headerFields[2];
tableHeader = c();
tableRows = c();
# For differences in versions see
# $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
version = "v0.1";
} else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) {
version = "v0.2";
columnStarts = c();
}
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
# do nothing
} else if (!is.na(tableName)) {
if (version == "v0.1") {
row = unlist(strsplit(line, "[[:space:]]+"));
} else if (version == "v0.2") {
if (length(tableHeader) == 0) {
headerChars = unlist(strsplit(line, ""));
# Find the first position of non space characters, excluding the first character
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
}
row = .gsa.splitFixedWidth(line, columnStarts);
}
if (length(tableHeader) == 0) {
tableHeader = row;
} else {
tableRows = rbind(tableRows, row);
}
}
# get first line
line = lines[1];
if (length(grep("^#:GATKReport.v1", line, ignore.case=TRUE)) > 0) {
gsa.read.gatkreportv1(lines)
}
if (!is.na(tableName)) {
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
else if (length(grep("^##:GATKReport.v0", line, ignore.case=TRUE)) > 0) {
gsa.read.gatkreportv0(lines)
}
gatkreport = as.list(tableEnv, all.names=TRUE);
}

View File

@ -1,19 +1,49 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintStream;
import java.util.Collection;
import java.util.List;
import java.util.TreeMap;
/**
* Container class for GATK report tables
*/
public class GATKReport {
public static final String GATKREPORT_HEADER_PREFIX = "##:GATKReport.v";
public static final String GATKREPORT_HEADER_PREFIX = "#:GATKReport.";
public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_0;
public static final String SEPARATOR = ":";
private GATKReportVersion version = LATEST_REPORT_VERSION;
private TreeMap<String, GATKReportTable> tables = new TreeMap<String, GATKReportTable>();
/**
@ -24,7 +54,8 @@ public class GATKReport {
/**
* Create a new GATKReport with the contents of a GATKReport on disk.
* @param filename the path to the file to load
*
* @param filename the path to the file to load
*/
public GATKReport(String filename) {
this(new File(filename));
@ -32,7 +63,8 @@ public class GATKReport {
/**
* Create a new GATKReport with the contents of a GATKReport on disk.
* @param file the file to load
*
* @param file the file to load
*/
public GATKReport(File file) {
loadReport(file);
@ -40,106 +72,77 @@ public class GATKReport {
/**
* Load a GATKReport file from disk
* @param file the file to load
*
* @param file the file to load
*/
private void loadReport(File file) {
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
GATKReportTable table = null;
String[] header = null;
int id = 0;
GATKReportVersion version = null;
List<Integer> columnStarts = null;
String reportHeader = reader.readLine();
String line;
while ( (line = reader.readLine()) != null ) {
// Read the first line for the version and number of tables.
version = GATKReportVersion.fromHeader(reportHeader);
if (version.equals(GATKReportVersion.V0_1) ||
version.equals(GATKReportVersion.V0_2))
throw new UserException("The GATK no longer supports reading legacy GATK Reports. Please use v1.0 or newer.");
if (line.startsWith(GATKREPORT_HEADER_PREFIX)) {
int nTables = Integer.parseInt(reportHeader.split(":")[2]);
version = GATKReportVersion.fromHeader(line);
// Read each tables according ot the number of tables
for (int i = 0; i < nTables; i++) {
addTable(new GATKReportTable(reader, version));
line = line.replaceFirst("##:GATKReport." + version.versionString + " ", "");
String[] pieces = line.split(" : ");
String tableName = pieces[0];
String tableDesc = pieces[1];
addTable(tableName, tableDesc);
table = getTable(tableName);
table.setVersion(version);
header = null;
columnStarts = null;
} else if ( line.trim().isEmpty() ) {
// do nothing
} else {
if (table != null) {
String[] splitLine;
switch (version) {
case V0_1:
splitLine = TextFormattingUtils.splitWhiteSpace(line);
break;
case V0_2:
if (header == null) {
columnStarts = TextFormattingUtils.getWordStarts(line);
}
splitLine = TextFormattingUtils.splitFixedWidth(line, columnStarts);
break;
default:
throw new ReviewedStingException("GATK report version parsing not implemented for: " + line);
}
if (header == null) {
header = splitLine;
table.addPrimaryKey("id", false);
for ( String columnName : header ) {
table.addColumn(columnName, "");
}
id = 0;
} else {
for (int columnIndex = 0; columnIndex < header.length; columnIndex++) {
table.set(id, header[columnIndex], splitLine[columnIndex]);
}
id++;
}
}
/*
if ( !blankLine.equals("") ) {
throw new StingException("The GATK Report File is corrupted or not formatted correctly");
}
*/
}
} catch (FileNotFoundException e) {
throw new StingException("Cannot read GATKReport: " + e);
} catch (IOException e) {
throw new StingException("Cannot read GATKReport: " + e);
} catch (Exception e) {
// todo - improve exception handling
//throw new StingException("Cannot read GATKReport: " + e);
e.printStackTrace();
}
}
/**
* Add a new table to the collection
* Add a new, empty table to the report
*
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param tableName the name of the table
* @param tableDescription the description of the table
*/
public void addTable(String tableName, String tableDescription) {
addTable(tableName, tableDescription, true);
}
/**
* Add a new, empty table to the report
*
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param sortByPrimaryKey whether to sort the rows by the primary key
*/
public void addTable(String tableName, String tableDescription, boolean sortByPrimaryKey) {
GATKReportTable table = new GATKReportTable(tableName, tableDescription, sortByPrimaryKey);
tables.put(tableName, table);
}
/**
* Adds a table, empty or populated, to the report
*
* @param table the table to add
*/
public void addTable(GATKReportTable table) {
tables.put(table.getTableName(), table);
}
/**
* Return true if table with a given name exists
*
* @param tableName the name of the table
* @param tableName the name of the table
* @return true if the table exists, false otherwise
*/
public boolean hasTable(String tableName) {
@ -149,8 +152,8 @@ public class GATKReport {
/**
* Return a table with a given name
*
* @param tableName the name of the table
* @return the table object
* @param tableName the name of the table
* @return the table object
*/
public GATKReportTable getTable(String tableName) {
GATKReportTable table = tables.get(tableName);
@ -162,9 +165,10 @@ public class GATKReport {
/**
* Print all tables contained within this container to a PrintStream
*
* @param out the PrintStream to which the tables should be written
* @param out the PrintStream to which the tables should be written
*/
public void print(PrintStream out) {
out.println(GATKREPORT_HEADER_PREFIX + getVersion().toString() + SEPARATOR + getTables().size());
for (GATKReportTable table : tables.values()) {
if (table.getNumRows() > 0) {
table.write(out);
@ -175,4 +179,157 @@ public class GATKReport {
public Collection<GATKReportTable> getTables() {
return tables.values();
}
/**
* This is the main function is charge of gathering the reports. It checks that the reports are compatible and then
* calls the table atheirng functions.
*
* @param input another GATKReport of the same format
*/
public void combineWith(GATKReport input) {
if (!this.isSameFormat(input)) {
throw new ReviewedStingException("Failed to combine GATKReport, format doesn't match!");
}
for (String tableName : input.tables.keySet()) {
tables.get(tableName).combineWith(input.getTable(tableName));
}
}
public GATKReportVersion getVersion() {
return version;
}
public void setVersion(GATKReportVersion version) {
this.version = version;
}
/**
* Returns whether or not the two reports have the same format, from columns, to tables, to reports, and everything
* in between. This does not check if the data inside is the same. This is the check to see if the two reports are
* gatherable or reduceable.
*
* @param report another GATK report
* @return true if the the reports are gatherable
*/
public boolean isSameFormat(GATKReport report) {
if (!version.equals(report.version)) {
return false;
}
if (!tables.keySet().equals(report.tables.keySet())) {
return false;
}
for (String tableName : tables.keySet()) {
if (!getTable(tableName).isSameFormat(report.getTable(tableName)))
return false;
}
return true;
}
/**
* Checks that the reports are exactly the same.
*
* @param report another GATK report
* @return true if all field in the reports, tables, and columns are equal.
*/
public boolean equals(GATKReport report) {
if (!version.equals(report.version)) {
return false;
}
if (!tables.keySet().equals(report.tables.keySet())) {
return false;
}
for (String tableName : tables.keySet()) {
if (!getTable(tableName).equals(report.getTable(tableName)))
return false;
}
return true;
}
/**
* The constructor for a simplified GATK Report. Simplified GATK report are designed for reports that do not need
* the advanced functionality of a full GATK Report.
* <p/>
* A simple GATK Report consists of:
* <p/>
* - A single table
* - No primary key ( it is hidden )
* <p/>
* Optional:
* - Only untyped columns. As long as the data is an Object, it will be accepted.
* - Default column values being empty strings.
* <p/>
* Limitations:
* <p/>
* - A simple GATK report cannot contain multiple tables.
* - It cannot contain typed columns, which prevents arithmetic gathering.
*
* @param tableName The name of your simple GATK report table
* @param columns The names of the columns in your table
* @return a simplified GATK report
*/
public static GATKReport newSimpleReport(String tableName, String... columns) {
GATKReportTable table = new GATKReportTable(tableName, "A simplified GATK table report");
table.addPrimaryKey("id", false);
for (String column : columns) {
table.addColumn(column, "");
}
GATKReport output = new GATKReport();
output.addTable(table);
return output;
}
/**
* This method provides an efficient way to populate a simplified GATK report. This method will only work on reports
* that qualify as simplified GATK reports. See the newSimpleReport() constructor for more information.
*
* @param values the row of data to be added to the table.
* Note: the number of arguments must match the columns in the table.
*/
public void addRow(Object... values) {
// Must be a simplified GATK Report
if (isSimpleReport()) {
GATKReportTable table = tables.firstEntry().getValue();
if (table.getColumns().size() != values.length) {
throw new StingException("The number of arguments in addRow() must match the number of columns in the table");
}
int counter = table.getNumRows() + 1;
int i = 0;
for (String columnName : table.getColumns().keySet()) {
table.set(counter, columnName, values[i]);
i++;
}
} else {
throw new StingException("Cannot add a Row to a non-Simplified GATK Report");
}
}
/**
* Checks if the GATK report qualifies as a "simple" GATK report
*
* @return true is the report is a simplified GATK report
*/
private boolean isSimpleReport() {
if (tables.size() != 1)
return false;
GATKReportTable table = tables.firstEntry().getValue();
if (!table.getPrimaryKeyName().equals("id"))
return false;
return true;
}
}

View File

@ -1,8 +1,34 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.math.NumberUtils;
import java.util.*;
import java.util.Arrays;
import java.util.Collection;
import java.util.TreeMap;
/**
* Holds values for a column in a GATK report table
@ -12,27 +38,48 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
final private Object defaultValue;
final private String format;
final private boolean display;
final private GATKReportDataType dataType;
/**
* Construct the column object, specifying the column name, default value, and whether or not the column should be displayed
* Construct the column object, specifying the column name, default value, whether or not the column should be
* displayed, and the format string. This cannot be null.
*
* @param columnName the name of the column
* @param defaultValue the default value of the column
* @param display if true, the column will be displayed in the final output
* @param format format string
* @param columnName the name of the column
* @param defaultValue the default value of the column
* @param display if true, the column will be displayed in the final output
* @param format format string
*/
public GATKReportColumn(String columnName, Object defaultValue, boolean display, String format) {
this.columnName = columnName;
this.defaultValue = defaultValue;
this.display = display;
this.format = format == null ? null : (format.equals("") ? null : format);
if ( format.equals("") ) {
this.format = "%s";
this.dataType = GATKReportDataType.Unknown;
if ( defaultValue != null ) {
this.defaultValue = defaultValue;
//this.dataType = GATKReportDataType.fromObject(defaultValue);
}
else {
this.defaultValue = "";
//this.dataType = GATKReportDataType.Unknown;
}
}
else {
this.format = format;
this.dataType = GATKReportDataType.fromFormatString(format);
if ( defaultValue == null ) {
this.defaultValue = dataType.getDefaultValue();
}
else {
this.defaultValue = defaultValue;
}
}
}
/**
* Initialize an element in the column with a default value
*
* @param primaryKey the primary key position in the column that should be set
* @param primaryKey the primary key position in the column that should be set
*/
public void initialize(Object primaryKey) {
this.put(primaryKey, defaultValue);
@ -43,8 +90,8 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
* tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero
* values) in the table.
*
* @param primaryKey the primary key position in the column that should be retrieved
* @return the value at the specified position in the column, or the default value if the element is not set
* @param primaryKey the primary key position in the column that should be retrieved
* @return the value at the specified position in the column, or the default value if the element is not set
*/
private Object getWithoutSideEffects(Object primaryKey) {
if (!this.containsKey(primaryKey)) {
@ -57,8 +104,8 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
/**
* Return an object from the column, but if it doesn't exist, return the default value.
*
* @param primaryKey the primary key position in the column that should be retrieved
* @return the string value at the specified position in the column, or the default value if the element is not set
* @param primaryKey the primary key position in the column that should be retrieved
* @return the string value at the specified position in the column, or the default value if the element is not set
*/
public String getStringValue(Object primaryKey) {
return formatValue(getWithoutSideEffects(primaryKey));
@ -68,7 +115,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
* Return the displayable property of the column. If true, the column will be displayed in the final output.
* If not, printing will be suppressed for the contents of the table.
*
* @return true if the column will be displayed, false if otherwise
* @return true if the column will be displayed, false if otherwise
*/
public boolean isDisplayable() {
return display;
@ -76,6 +123,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
/**
* Get the display width for this column. This allows the entire column to be displayed with the appropriate, fixed width.
*
* @return the format string for this column
*/
public GATKReportColumnFormat getColumnFormat() {
@ -112,6 +160,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
/**
* Check if the value can be right aligned. Does not trim the values before checking if numeric since it assumes
* the spaces mean that the value is already padded.
*
* @param value to check
* @return true if the value is a right alignable
*/
@ -121,6 +170,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
/**
* Returns a string version of the values.
*
* @param obj The object to convert to a string
* @return The string representation of the column
*/
@ -128,19 +178,54 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
String value;
if (obj == null) {
value = "null";
} else if ( format != null ) {
} else if ( dataType.equals(GATKReportDataType.Unknown) &&
(obj instanceof Double || obj instanceof Float) ) {
value = String.format("%.8f", obj);
} else
value = String.format(format, obj);
} else if (obj instanceof Float) {
value = String.format("%.8f", (Float) obj);
} else if (obj instanceof Double) {
value = String.format("%.8f", (Double) obj);
} else {
value = obj.toString();
}
return value;
}
public GATKReportDataType getDataType() {
return dataType;
}
public boolean isSameFormat(GATKReportColumn that) {
return (dataType.equals(that.dataType) &&
columnName.equals(that.columnName) &&
display == that.display &&
format.equals(that.format) &&
defaultValue.equals(that.defaultValue) );
}
protected boolean equals(GATKReportColumn that) {
if ( !this.keySet().equals(that.keySet()) ) {
return false;
}
for (Object key : keySet()) {
Object ValueA = this.get(key);
Object ValueB = that.get(key);
//if the value is not equal, (use data type to get the right comparison)
if (!dataType.isEqual(ValueA, ValueB)) {
return false;
}
}
return true;
}
public String getColumnName() {
return columnName;
}
public String getFormat() {
if ( dataType.equals(GATKReportDataType.Unknown) ) {
return "";
}
else
return format;
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011, The Broad Institute
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -24,8 +24,6 @@
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*;
/**
@ -36,6 +34,7 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> i
/**
* Returns the column by index
*
* @param i the index
* @return The column
*/
@ -59,9 +58,44 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> i
public Iterator<GATKReportColumn> iterator() {
return new Iterator<GATKReportColumn>() {
int offset = 0;
public boolean hasNext() { return offset < columnNames.size() ; }
public GATKReportColumn next() { return getByIndex(offset++); }
public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); }
public boolean hasNext() {
return offset < columnNames.size();
}
public GATKReportColumn next() {
return getByIndex(offset++);
}
public void remove() {
throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator");
}
};
}
public boolean isSameFormat(GATKReportColumns that) {
if (!columnNames.equals(that.columnNames)) {
return false;
}
for (String columnName : columnNames) {
if (!this.get(columnName).isSameFormat(that.get(columnName))) {
return false;
}
}
return true;
}
protected boolean equals(GATKReportColumns that) {
for (Map.Entry<String, GATKReportColumn> pair : entrySet()) {
// Make sure that every column is the same, we know that the # of columns
// is the same from isSameFormat()
String key = pair.getKey();
if (!get(key).equals(that.get(key))) {
return false;
}
}
return true;
}
}

View File

@ -0,0 +1,235 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
/**
* The gatherable data types acceptable in a GATK report column.
*/
public enum GATKReportDataType {
/**
* The null type should not be used.
*/
Null("Null"),
/**
* The default value when a format string is not present
*/
Unknown("Unknown"),
/**
* Used for boolean values. Will display as true or false in the table.
*/
Boolean("%[Bb]"),
/**
* Used for byte and char value. Will display as a char so use printable values!
*/
Byte("%[Cc]"),
/**
* Used for float and double values. Will output a decimal with format %.8f unless otherwise specified.
*/
Decimal("%.*[EeFf]"),
/**
* Used for int, and long values. Will display the full number by default.
*/
Integer("%[Dd]"),
/**
* Used for string values. Displays the string itself.
*/
String("%[Ss]");
public final String dataTypeString;
private GATKReportDataType(String dataTypeString) {
this.dataTypeString = dataTypeString;
}
private static final Map<String, GATKReportDataType> lookup = new HashMap<String, GATKReportDataType>();
static {
for (GATKReportDataType s : EnumSet.allOf(GATKReportDataType.class))
lookup.put(s.dataTypeString, s);
}
@Override
public String toString() {
return this.dataTypeString;
}
/**
* Returns a GATK report data type from the Object specified. It looks through the list of acceptable classes and
* returns the appropriate data type.
*
* @param object the object ot derive the data type from
* @return the appropriate data type
*/
public static GATKReportDataType fromObject(Object object) {
GATKReportDataType value;
if (object instanceof Boolean) {
value = GATKReportDataType.Boolean;
} else if (object instanceof Byte || object instanceof Character) {
value = GATKReportDataType.Byte;
} else if (object instanceof Float || object instanceof Double) {
value = GATKReportDataType.Decimal;
} else if (object instanceof Integer || object instanceof Long) {
value = GATKReportDataType.Integer;
} else if (object instanceof String) {
value = GATKReportDataType.String;
} else {
value = GATKReportDataType.Unknown;
//throw new ReviewedStingException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation.");
}
return value;
}
/**
* Returns a GATK report data type from the format string specified. It uses regex matching from the enumerated
* Strings.
*
* @param format the format string to derive the data type from
* @return the appropriate data type
*/
public static GATKReportDataType fromFormatString(String format) {
if (format.equals(""))
return Unknown;
for (GATKReportDataType type : lookup.values()) {
if (format.matches(type.toString()) )
return type;
}
return Unknown;
}
/**
* Returns the default value of the data type. It returns an object that matches the class of the data type.
*
* @return an object that matches the data type
*/
public Object getDefaultValue() {
switch (this) {
case Decimal:
return 0.0D;
case Boolean:
return false;
case Byte:
return (byte) 0;
case Integer:
return 0L;
case String:
return "";
default:
return null;
}
}
/**
* Checks if the two objects are equal using the appropriate test form the data types.
*
* @param a an object
* @param b another object to check if equal
* @return true - the objects are equal, false - the objects are nto equal
*/
public boolean isEqual(Object a, Object b) {
switch (this) {
case Null:
return true;
case Decimal:
case Boolean:
case Integer:
return a.toString().equals(b.toString());
case Byte:
// A mess that checks if the bytes and characters contain the same value
if ((a instanceof Character && b instanceof Character) ||
(a instanceof Byte && b instanceof Byte))
return a.toString().equals(b.toString());
else if (a instanceof Character && b instanceof Byte) {
return ((Character) a).charValue() == ((Byte) b).byteValue();
} else if (a instanceof Byte && b instanceof Character) {
return ((Byte) a).byteValue() == ((Character) b).charValue();
}
case String:
default:
return a.equals(b);
}
}
/**
* Converts an input String to the appropriate type using the data type. Used for parsing loading a GATK report from
* file.
*
* @param obj The input string
* @return an object that matches the data type.
*/
protected Object Parse(Object obj) {
if (obj instanceof String) {
String str = obj.toString();
switch (this) {
case Decimal:
return Double.parseDouble(str);
case Boolean:
return java.lang.Boolean.parseBoolean(str);
case Integer:
return Long.parseLong(str);
case String:
return str;
case Byte:
return (byte) str.toCharArray()[0];
default:
return str;
}
} else
return null;
}
/**
* Returns a format string version of the value according to the data type.
*
* @return The printf string representation of the object according to data type.
*/
public String getDefaultFormatString() {
switch (this) {
case Decimal:
return "%.8f";
case Boolean:
return "%b";
case Integer:
return "%d";
case String:
return "%s";
case Byte:
return "%c";
case Null:
default:
return "%s";
}
}
}

View File

@ -0,0 +1,46 @@
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.commandline.Gatherer;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 1/9/12
* Time: 11:17 PM
* To change this template use File | Settings | File Templates.
*/
public class GATKReportGatherer extends Gatherer {
@Override
public void gather(List<File> inputs, File output) {
//Combines inputs GATKReport to one output
PrintStream o;
try {
o = new PrintStream(output);
} catch (FileNotFoundException e) {
throw new UserException("File to be output by CoverageByRG Gather function was not found");
}
GATKReport current = new GATKReport();
boolean isFirst = true;
for (File input : inputs) {
// If the table is empty
if (isFirst) {
current = new GATKReport(input);
isFirst = false;
} else {
GATKReport toAdd = new GATKReport(input);
current.combineWith(toAdd);
}
}
current.print(o);
}
}

View File

@ -1,8 +1,34 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.ObjectUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.io.BufferedReader;
import java.io.PrintStream;
import java.util.*;
import java.util.regex.Matcher;
@ -12,12 +38,12 @@ import java.util.regex.Pattern;
* A data structure that allows data to be collected over the course of a walker's computation, then have that data
* written to a PrintStream such that it's human-readable, AWK-able, and R-friendly (given that you load it using the
* GATKReport loader module).
*
* <p/>
* The goal of this object is to use the same data structure for both accumulating data during a walker's computation
* and emitting that data to a file for easy analysis in R (or any other program/language that can take in a table of
* results). Thus, all of the infrastructure below is designed simply to make printing the following as easy as
* possible:
*
* <p/>
* ##:GATKReport.v0.1 ErrorRatePerCycle : The error rate per sequenced position in the reads
* cycle errorrate.61PA8.7 qualavg.61PA8.7
* 0 0.007451835696110506 25.474613284804366
@ -29,60 +55,60 @@ import java.util.regex.Pattern;
* 6 5.452562704471102E-4 36.1217248908297
* 7 5.452562704471102E-4 36.1910480349345
* 8 5.452562704471102E-4 36.00345705967977
*
* <p/>
* Here, we have a GATKReport table - a well-formatted, easy to read representation of some tabular data. Every single
* table has this same GATKReport.v0.1 header, which permits multiple files from different sources to be cat-ed
* together, which makes it very easy to pull tables from different programs into R via a single file.
*
* <p/>
* ------------
* Definitions:
*
* <p/>
* Table info:
* The first line, structured as
* ##:<report version> <table name> : <table description>
*
* The first line, structured as
* ##:<report version> <table name> : <table description>
* <p/>
* Table header:
* The second line, specifying a unique name for each column in the table.
*
* The first column mentioned in the table header is the "primary key" column - a column that provides the unique
* identifier for each row in the table. Once this column is created, any element in the table can be referenced by
* the row-column coordinate, i.e. "primary key"-"column name" coordinate.
*
* When a column is added to a table, a default value must be specified (usually 0). This is the initial value for
* an element in a column. This permits operations like increment() and decrement() to work properly on columns that
* are effectively counters for a particular event.
*
* Finally, the display property for each column can be set during column creation. This is useful when a given
* column stores an intermediate result that will be used later on, perhaps to calculate the value of another column.
* In these cases, it's obviously necessary to store the value required for further computation, but it's not
* necessary to actually print the intermediate column.
*
* The second line, specifying a unique name for each column in the table.
* <p/>
* The first column mentioned in the table header is the "primary key" column - a column that provides the unique
* identifier for each row in the table. Once this column is created, any element in the table can be referenced by
* the row-column coordinate, i.e. "primary key"-"column name" coordinate.
* <p/>
* When a column is added to a table, a default value must be specified (usually 0). This is the initial value for
* an element in a column. This permits operations like increment() and decrement() to work properly on columns that
* are effectively counters for a particular event.
* <p/>
* Finally, the display property for each column can be set during column creation. This is useful when a given
* column stores an intermediate result that will be used later on, perhaps to calculate the value of another column.
* In these cases, it's obviously necessary to store the value required for further computation, but it's not
* necessary to actually print the intermediate column.
* <p/>
* Table body:
* The values of the table itself.
*
* The values of the table itself.
* <p/>
* ---------------
* Implementation:
*
* <p/>
* The implementation of this table has two components:
* 1. A TreeSet<Object> that stores all the values ever specified for the primary key. Any get() operation that
* refers to an element where the primary key object does not exist will result in its implicit creation. I
* haven't yet decided if this is a good idea...
*
* 2. A HashMap<String, GATKReportColumn> that stores a mapping from column name to column contents. Each
* GATKReportColumn is effectively a map (in fact, GATKReportColumn extends TreeMap<Object, Object>) between
* primary key and the column value. This means that, given N columns, the primary key information is stored
* N+1 times. This is obviously wasteful and can likely be handled much more elegantly in future implementations.
*
* 1. A TreeSet<Object> that stores all the values ever specified for the primary key. Any get() operation that
* refers to an element where the primary key object does not exist will result in its implicit creation. I
* haven't yet decided if this is a good idea...
* <p/>
* 2. A HashMap<String, GATKReportColumn> that stores a mapping from column name to column contents. Each
* GATKReportColumn is effectively a map (in fact, GATKReportColumn extends TreeMap<Object, Object>) between
* primary key and the column value. This means that, given N columns, the primary key information is stored
* N+1 times. This is obviously wasteful and can likely be handled much more elegantly in future implementations.
* <p/>
* ------------------------------
* Element and column operations:
*
* <p/>
* In addition to simply getting and setting values, this object also permits some simple operations to be applied to
* individual elements or to whole columns. For instance, an element can be easily incremented without the hassle of
* calling get(), incrementing the obtained value by 1, and then calling set() with the new value. Also, some vector
* operations are supported. For instance, two whole columns can be divided and have the result be set to a third
* column. This is especially useful when aggregating counts in two intermediate columns that will eventually need to
* be manipulated row-by-row to compute the final column.
*
* <p/>
* Note: I've made no attempt whatsoever to make these operations efficient. Right now, some of the methods check the
* type of the stored object using an instanceof call and attempt to do the right thing. Others cast the contents of
* the cell to a Number, call the Number.toDouble() method and compute a result. This is clearly not the ideal design,
@ -92,12 +118,17 @@ import java.util.regex.Pattern;
* @author Khalid Shakir
*/
public class GATKReportTable {
/** REGEX that matches any table with an invalid name */
public final static String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]";
private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2;
/**
* REGEX that matches any table with an invalid name
*/
public static final String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]";
public static final String GATKTABLE_HEADER_PREFIX = "#:GATKTable";
public static final String SEPARATOR = ":";
public static final String ENDLINE = ":;";
private String tableName;
private String tableDescription;
private GATKReportVersion version = LATEST_REPORT_VERSION;
private String primaryKeyName;
private Collection<Object> primaryKeyColumn;
@ -106,11 +137,94 @@ public class GATKReportTable {
private GATKReportColumns columns;
public GATKReportTable(BufferedReader reader, GATKReportVersion version) {
try {
int counter = 0;
switch (version) {
case V1_0:
int nHeaders = 2;
String[] tableHeaders = new String[nHeaders];
// Read in the headers
for (int i = 0; i < nHeaders; i++) {
tableHeaders[i] = reader.readLine();
}
String[] tableData = tableHeaders[0].split(":");
String[] userData = tableHeaders[1].split(":");
// Fill in the fields
tableName = userData[2];
tableDescription = userData[3];
primaryKeyDisplay = Boolean.parseBoolean(tableData[2]);
columns = new GATKReportColumns();
int nColumns = Integer.parseInt(tableData[3]);
int nRows = Integer.parseInt(tableData[4]);
// Read column names
String columnLine = reader.readLine();
List<Integer> columnStarts = TextFormattingUtils.getWordStarts(columnLine);
String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts);
if (primaryKeyDisplay) {
addPrimaryKey(columnNames[0]);
} else {
sortByPrimaryKey = true;
addPrimaryKey("id", false);
counter = 1;
}
// Put in columns using the format string from the header
for (int i = 0; i < nColumns; i++) {
String format = tableData[5 + i];
if (primaryKeyDisplay)
addColumn(columnNames[i + 1], true, format);
else
addColumn(columnNames[i], true, format);
}
for (int i = 0; i < nRows; i++) {
// read line
List<String> lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(reader.readLine(), columnStarts));
for (int columnIndex = 0; columnIndex < nColumns; columnIndex++) {
//Input all the remaining values
GATKReportDataType type = getColumns().getByIndex(columnIndex).getDataType();
if (primaryKeyDisplay) {
String columnName = columnNames[columnIndex + 1];
String primaryKey = lineSplits.get(0);
set(primaryKey, columnName, type.Parse(lineSplits.get(columnIndex + 1)));
} else {
String columnName = columnNames[columnIndex];
set(counter, columnName, type.Parse(lineSplits.get(columnIndex)));
}
}
counter++;
}
reader.readLine();
// When you see empty line or null, quit out
}
} catch (Exception e) {
//throw new StingException("Cannot read GATKReport: " + e);
e.printStackTrace();
}
}
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
*
* @param name the name of the table or column
* @return true if the name is valid, false if otherwise
* @param name the name of the table or column
* @return true if the name is valid, false if otherwise
*/
private boolean isValidName(String name) {
Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
@ -122,8 +236,8 @@ public class GATKReportTable {
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
*
* @param description the name of the table or column
* @return true if the name is valid, false if otherwise
* @param description the name of the table or column
* @return true if the name is valid, false if otherwise
*/
private boolean isValidDescription(String description) {
Pattern p = Pattern.compile("\\r|\\n");
@ -135,15 +249,23 @@ public class GATKReportTable {
/**
* Construct a new GATK report table with the specified name and description
*
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param tableName the name of the table
* @param tableDescription the description of the table
*/
public GATKReportTable(String tableName, String tableDescription) {
this(tableName, tableDescription, true);
}
/**
* Construct a new GATK report table with the specified name and description and whether to sort rows by the primary
* key
*
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param sortByPrimaryKey whether to sort rows by the primary key (instead of order added)
*/
public GATKReportTable(String tableName, String tableDescription, boolean sortByPrimaryKey) {
if (!isValidName(tableName)) {
if (!isValidName(tableName)) {
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
}
@ -158,28 +280,21 @@ public class GATKReportTable {
columns = new GATKReportColumns();
}
public GATKReportVersion getVersion() {
return version;
}
protected void setVersion(GATKReportVersion version) {
this.version = version;
}
/**
* Add a primary key column. This becomes the unique identifier for every column in the table.
*
* @param primaryKeyName the name of the primary key column
* @param primaryKeyName the name of the primary key column
*/
public void addPrimaryKey(String primaryKeyName) {
addPrimaryKey(primaryKeyName, true);
}
/**
* Add an optionally visible primary key column. This becomes the unique identifier for every column in the table, and will always be printed as the first column.
* Add an optionally visible primary key column. This becomes the unique identifier for every column in the table,
* and will always be printed as the first column.
*
* @param primaryKeyName the name of the primary key column
* @param display should this primary key be displayed?
* @param primaryKeyName the name of the primary key column
* @param display should this primary key be displayed?
*/
public void addPrimaryKey(String primaryKeyName, boolean display) {
if (!isValidName(primaryKeyName)) {
@ -195,6 +310,7 @@ public class GATKReportTable {
/**
* Returns the first primary key matching the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
*
* @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or throws an exception.
*/
@ -208,6 +324,7 @@ public class GATKReportTable {
/**
* Returns true if there is at least on row with the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
*
* @param dottedColumnValues Period concatenated values.
* @return true if there is at least one row matching the columns.
*/
@ -218,6 +335,7 @@ public class GATKReportTable {
/**
* Returns the first primary key matching the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
*
* @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or null.
*/
@ -228,6 +346,7 @@ public class GATKReportTable {
/**
* Returns the first primary key matching the column values.
* Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" }
*
* @param columnValues column values.
* @return The first primary key matching the column values.
*/
@ -235,7 +354,7 @@ public class GATKReportTable {
for (Object primaryKey : primaryKeyColumn) {
boolean matching = true;
for (int i = 0; matching && i < columnValues.length; i++) {
matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1));
matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i + 1));
}
if (matching)
return primaryKey;
@ -244,29 +363,65 @@ public class GATKReportTable {
}
/**
* Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set.
* Add a column to the report and specify the default value that should be supplied if a given position in the table
* is never explicitly set.
*
* @param columnName the name of the column
* @param defaultValue the default value for the column
* @param columnName the name of the column
* @param defaultValue the default value for the column
*/
public void addColumn(String columnName, Object defaultValue) {
addColumn(columnName, defaultValue, null);
addColumn(columnName, defaultValue, true);
}
/**
* Add a column to the report, specify the default column value, and specify whether the column should be displayed
* in the final output (useful when intermediate columns are necessary for later calculations, but are not required
* to be in the output file.
*
* @param columnName the name of the column
* @param defaultValue the default value of the column
* @param display if true - the column will be displayed; if false - the column will be hidden
*/
public void addColumn(String columnName, Object defaultValue, boolean display) {
addColumn(columnName, defaultValue, display, "");
}
/**
* Add a column to the report, specify the default column value, and specify whether the column should be displayed
* in the final output (useful when intermediate columns are necessary for later calculations, but are not required
* to be in the output file.
*
* @param columnName the name of the column
* @param defaultValue the default value of the column
* @param format the format string used to display data
*/
public void addColumn(String columnName, Object defaultValue, String format) {
addColumn(columnName, defaultValue, true, format);
}
/**
* Add a column to the report, specify the default column value, and specify whether the column should be displayed in the final output (useful when intermediate columns are necessary for later calculations, but are not required to be in the output file.
* Add a column to the report, specify whether the column should be displayed in the final output (useful when
* intermediate columns are necessary for later calculations, but are not required to be in the output file), and the
* format string used to display the data.
*
* @param columnName the name of the column
* @param defaultValue the default value of the column
* @param display if true - the column will be displayed; if false - the column will be hidden
* @param columnName the name of the column
* @param display if true - the column will be displayed; if false - the column will be hidden
* @param format the format string used to display data
*/
public void addColumn(String columnName, Object defaultValue, boolean display) {
addColumn(columnName, defaultValue, display, null);
public void addColumn(String columnName, boolean display, String format) {
addColumn(columnName, null, display, format);
}
/**
* Add a column to the report, specify the default column value, whether the column should be displayed in the final
* output (useful when intermediate columns are necessary for later calculations, but are not required to be in the
* output file), and the format string used to display the data.
*
* @param columnName the name of the column
* @param defaultValue if true - the column will be displayed; if false - the column will be hidden
* @param display
* @param format the format string used to display data
*/
public void addColumn(String columnName, Object defaultValue, boolean display, String format) {
if (!isValidName(columnName)) {
throw new ReviewedStingException("Attempted to set a GATKReportTable column name of '" + columnName + "'. GATKReportTable column names must be purely alphanumeric - no spaces or special characters are allowed.");
@ -274,11 +429,17 @@ public class GATKReportTable {
columns.put(columnName, new GATKReportColumn(columnName, defaultValue, display, format));
}
public GATKReportVersion getVersion() {
return GATKReport.LATEST_REPORT_VERSION;
}
/**
* Check if the requested element exists, and if not, create it.
*
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param primaryKey the primary key value
* @param columnName the name of the column
*/
private void verifyEntry(Object primaryKey, String columnName) {
if (!columns.containsKey(columnName)) {
@ -303,26 +464,68 @@ public class GATKReportTable {
/**
* Set the value for a given position in the table
*
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param value the value to set
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param value the value to set
*/
public void set(Object primaryKey, String columnName, Object value) {
verifyEntry(primaryKey, columnName);
GATKReportColumn column = columns.get(columnName);
// Check if value is of same type as column
columns.get(columnName).put(primaryKey, value);
// We do not accept internal null values
if (value == null)
value = "null";
// This code is bs. Why am do I have to conform to bad code
// Below is some ode to convert a string into its appropriate type.
// This is just Roger ranting
// If we got a string but the column is not a String type
Object newValue = null;
if (value instanceof String && !column.getDataType().equals(GATKReportDataType.String)) {
// Integer case
if (column.getDataType().equals(GATKReportDataType.Integer)) {
try {
newValue = Long.parseLong((String) value);
} catch (Exception e) {
}
}
if (column.getDataType().equals(GATKReportDataType.Decimal)) {
try {
newValue = Double.parseDouble((String) value);
} catch (Exception e) {
}
}
if (column.getDataType().equals(GATKReportDataType.Byte) &&
((String) value).length() == 1) {
newValue = ((String) value).charAt(0);
}
}
if (newValue != null)
value = newValue;
if (column.getDataType().equals(GATKReportDataType.fromObject(value)) ||
column.getDataType().equals(GATKReportDataType.Unknown) ||
value == null)
columns.get(columnName).put(primaryKey, value);
else
throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s",
GATKReportDataType.fromObject(value).name(), column.getDataType().name()));
}
/**
* Get a value from the given position in the table
*
* @param primaryKey the primary key value
* @param columnName the name of the column
* @return the value stored at the specified position in the table
* @param primaryKey the primary key value
* @param columnName the name of the column
* @return the value stored at the specified position in the table
*/
public Object get(Object primaryKey, String columnName) {
verifyEntry(primaryKey, columnName);
return columns.get(columnName).get(primaryKey);
}
@ -331,7 +534,7 @@ public class GATKReportTable {
*
* @param primaryKey the primary key value
* @param columnIndex the index of the column
* @return the value stored at the specified position in the table
* @return the value stored at the specified position in the table
*/
private Object get(Object primaryKey, int columnIndex) {
return columns.getByIndex(columnIndex).get(primaryKey);
@ -340,8 +543,8 @@ public class GATKReportTable {
/**
* Increment an element in the table. This implementation is awful - a functor would probably be better.
*
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param primaryKey the primary key value
* @param columnName the name of the column
*/
public void increment(Object primaryKey, String columnName) {
Object oldValue = get(primaryKey, columnName);
@ -369,8 +572,8 @@ public class GATKReportTable {
/**
* Decrement an element in the table. This implementation is awful - a functor would probably be better.
*
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param primaryKey the primary key value
* @param columnName the name of the column
*/
public void decrement(Object primaryKey, String columnName) {
Object oldValue = get(primaryKey, columnName);
@ -398,9 +601,9 @@ public class GATKReportTable {
/**
* Add the specified value to an element in the table
*
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param valueToAdd the value to add
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param valueToAdd the value to add
*/
public void add(Object primaryKey, String columnName, Object valueToAdd) {
Object oldValue = get(primaryKey, columnName);
@ -428,8 +631,8 @@ public class GATKReportTable {
/**
* Subtract the specified value from an element in the table
*
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param valueToSubtract the value to subtract
*/
public void subtract(Object primaryKey, String columnName, Object valueToSubtract) {
@ -458,9 +661,9 @@ public class GATKReportTable {
/**
* Multiply the specified value to an element in the table
*
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param valueToMultiply the value to multiply by
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param valueToMultiply the value to multiply by
*/
public void multiply(Object primaryKey, String columnName, Object valueToMultiply) {
Object oldValue = get(primaryKey, columnName);
@ -488,9 +691,9 @@ public class GATKReportTable {
/**
* Divide the specified value from an element in the table
*
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param valueToDivide the value to divide by
* @param primaryKey the primary key value
* @param columnName the name of the column
* @param valueToDivide the value to divide by
*/
public void divide(Object primaryKey, String columnName, Object valueToDivide) {
Object oldValue = get(primaryKey, columnName);
@ -518,9 +721,9 @@ public class GATKReportTable {
/**
* Add two columns to each other and set the results to a third column
*
* @param columnToSet the column that should hold the results
* @param augend the column that shall be the augend
* @param addend the column that shall be the addend
* @param columnToSet the column that should hold the results
* @param augend the column that shall be the augend
* @param addend the column that shall be the addend
*/
public void addColumns(String columnToSet, String augend, String addend) {
for (Object primaryKey : primaryKeyColumn) {
@ -536,8 +739,8 @@ public class GATKReportTable {
/**
* Subtract one column from another and set the results to a third column
*
* @param columnToSet the column that should hold the results
* @param minuend the column that shall be the minuend (the a in a - b)
* @param columnToSet the column that should hold the results
* @param minuend the column that shall be the minuend (the a in a - b)
* @param subtrahend the column that shall be the subtrahend (the b in a - b)
*/
public void subtractColumns(String columnToSet, String minuend, String subtrahend) {
@ -555,8 +758,8 @@ public class GATKReportTable {
* Multiply two columns by each other and set the results to a third column
*
* @param columnToSet the column that should hold the results
* @param multiplier the column that shall be the multiplier
* @param multiplicand the column that shall be the multiplicand
* @param multiplier the column that shall be the multiplier
* @param multiplicand the column that shall be the multiplicand
*/
public void multiplyColumns(String columnToSet, String multiplier, String multiplicand) {
for (Object primaryKey : primaryKeyColumn) {
@ -572,9 +775,9 @@ public class GATKReportTable {
/**
* Divide two columns by each other and set the results to a third column
*
* @param columnToSet the column that should hold the results
* @param numeratorColumn the column that shall be the numerator
* @param denominatorColumn the column that shall be the denominator
* @param columnToSet the column that should hold the results
* @param numeratorColumn the column that shall be the numerator
* @param denominatorColumn the column that shall be the denominator
*/
public void divideColumns(String columnToSet, String numeratorColumn, String denominatorColumn) {
for (Object primaryKey : primaryKeyColumn) {
@ -589,10 +792,11 @@ public class GATKReportTable {
/**
* Return the print width of the primary key column
* @return the width of the primary key column
*
* @return the width of the primary key column
*/
public int getPrimaryKeyColumnWidth() {
int maxWidth = primaryKeyName.length();
int maxWidth = getPrimaryKeyName().length();
for (Object primaryKey : primaryKeyColumn) {
int width = primaryKey.toString().length();
@ -608,9 +812,18 @@ public class GATKReportTable {
/**
* Write the table to the PrintStream, formatted nicely to be human-readable, AWK-able, and R-friendly.
*
* @param out the PrintStream to which the table should be written
* @param out the PrintStream to which the table should be written
*/
public void write(PrintStream out) {
/*
* Table header:
* #:GATKTable:nColumns:nRows:(DataType for each column):;
* #:GATKTable:TableName:Description :;
* key colA colB
* row1 xxxx xxxxx
*/
// Get the column widths for everything
HashMap<String, GATKReportColumnFormat> columnFormats = new HashMap<String, GATKReportColumnFormat>();
for (String columnName : columns.keySet()) {
@ -619,18 +832,30 @@ public class GATKReportTable {
String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s";
// Emit the table definition
out.printf("##:GATKReport.%s %s : %s%n", LATEST_REPORT_VERSION.versionString, tableName, tableDescription);
String formatHeader = String.format(GATKTABLE_HEADER_PREFIX + ":%b:%d:%d", primaryKeyDisplay, getColumns().size(), getNumRows());
// Add all the formats for all the columns
for (GATKReportColumn column : getColumns()) {
if (column.isDisplayable())
formatHeader += (SEPARATOR + column.getFormat());
}
out.println(formatHeader + ENDLINE);
out.printf(GATKTABLE_HEADER_PREFIX + ":%s:%s\n", tableName, tableDescription);
//out.printf("#:GATKTable:%s:%s", Algorithm);
// Emit the table header, taking into account the padding requirement if the primary key is a hidden column
boolean needsPadding = false;
if (primaryKeyDisplay) {
out.printf(primaryKeyFormat, primaryKeyName);
out.printf(primaryKeyFormat, getPrimaryKeyName());
needsPadding = true;
}
for (String columnName : columns.keySet()) {
if (columns.get(columnName).isDisplayable()) {
if (needsPadding) { out.printf(" "); }
if (needsPadding) {
out.printf(" ");
}
out.printf(columnFormats.get(columnName).getNameFormat(), columnName);
needsPadding = true;
@ -649,7 +874,9 @@ public class GATKReportTable {
for (String columnName : columns.keySet()) {
if (columns.get(columnName).isDisplayable()) {
if (needsPadding) { out.printf(" "); }
if (needsPadding) {
out.printf(" ");
}
String value = columns.get(columnName).getStringValue(primaryKey);
out.printf(columnFormats.get(columnName).getValueFormat(), value);
@ -660,7 +887,6 @@ public class GATKReportTable {
out.printf("%n");
}
// Close the table
out.printf("%n");
}
@ -679,4 +905,97 @@ public class GATKReportTable {
public GATKReportColumns getColumns() {
return columns;
}
/**
* Combines two compatible GATK report tables. This is the general function which will call the different algorithms
* necessary to gather the tables. Every column's combine algorithm is read and treated accordingly.
*
* @param input Another GATK table
*/
protected void combineWith(GATKReportTable input) {
/*
* This function is different from addRowsFrom because we will add the ability to sum,average, etc rows
* TODO: Add other combining algorithms
*/
// Make sure the columns match AND the Primary Key
if (input.getColumns().keySet().equals(this.getColumns().keySet()) &&
input.getPrimaryKeyName().equals(this.getPrimaryKeyName())) {
this.addRowsFrom(input);
} else
throw new ReviewedStingException("Failed to combine GATKReportTable, columns don't match!");
}
/**
* A gather algorithm that simply takes the rows from the argument, and adds them to the current table. This is the
* default gather algorithm.
*
* @param input Another GATK table to add rows from.
*/
private void addRowsFrom(GATKReportTable input) {
// add column by column
// For every column
for (String columnKey : input.getColumns().keySet()) {
GATKReportColumn current = this.getColumns().get(columnKey);
GATKReportColumn toAdd = input.getColumns().get(columnKey);
// We want to take the current column and add all the values from input
// The column is a map of values <Key, Value>
for (Object rowKey : toAdd.keySet()) {
// We add every value from toAdd to the current
if (!current.containsKey(rowKey)) {
this.set(rowKey, columnKey, toAdd.get(rowKey));
//System.out.printf("Putting row with PK: %s \n", rowKey);
} else {
// TODO we should be able to handle combining data by adding, averaging, etc.
this.set(rowKey, columnKey, toAdd.get(rowKey));
System.out.printf("OVERWRITING Row with PK: %s \n", rowKey);
}
}
}
}
public String getPrimaryKeyName() {
return primaryKeyName;
}
/**
* Returns whether or not the two tables have the same format including columns and everything in between. This does
* not check if the data inside is the same. This is the check to see if the two tables are gatherable or
* reduceable
*
* @param table another GATK table
* @return true if the the tables are gatherable
*/
public boolean isSameFormat(GATKReportTable table) {
//Should we add the sortByPrimaryKey as a check?
if (!columns.isSameFormat(table.columns)) {
return false;
}
return (primaryKeyDisplay == table.primaryKeyDisplay &&
primaryKeyName.equals(table.primaryKeyName) &&
tableName.equals(table.tableName) &&
tableDescription.equals(table.tableDescription));
}
/**
* Checks that the tables are exactly the same.
*
* @param table another GATK report
* @return true if all field in the reports, tables, and columns are equal.
*/
public boolean equals(GATKReportTable table) {
if (!isSameFormat(table)) {
return false;
}
return (columns.equals(table.columns) &&
primaryKeyColumn.equals(table.primaryKeyColumn) &&
sortByPrimaryKey == table.sortByPrimaryKey);
}
}

View File

@ -31,7 +31,7 @@ public enum GATKReportVersion {
* Differences between other versions:
* - Does not allow spaces in cells.
* - Mostly fixed width but has a bug where the string width of floating point
* values was not measured correctly leading to columns that aren't aligned
* values was not measured correctly leading to columns that aren't aligned
*/
V0_1("v0.1"),
@ -40,7 +40,15 @@ public enum GATKReportVersion {
* - Spaces allowed in cells, for example in sample names with spaces in them ex: "C507/FG-CR 6".
* - Fixed width fixed for floating point values
*/
V0_2("v0.2");
V0_2("v0.2"),
/*
* Differences between v0.x
* - Added table and report headers
* - Headers changed format, include the numbe rof tables, rows, and metadata for gathering
* - IS GATHERABLE
*/
V1_0("v1.0");
public final String versionString;
@ -53,8 +61,13 @@ public enum GATKReportVersion {
return versionString;
}
public boolean equals(GATKReportVersion that) {
return (versionString.equals(that.versionString));
}
/**
* Returns the GATK Report Version from the file header.
*
* @param header Header from the file starting with ##:GATKReport.v[version]
* @return The version as an enum.
*/
@ -65,6 +78,9 @@ public enum GATKReportVersion {
if (header.startsWith("##:GATKReport.v0.2 "))
return GATKReportVersion.V0_2;
if (header.startsWith("#:GATKReport.v1.0"))
return GATKReportVersion.V1_0;
throw new ReviewedStingException("Unknown GATK report version in header: " + header);
}
}

View File

@ -36,9 +36,14 @@ import java.io.IOException;
/**
* Class implementing diffnode reader for GATKReports
*/
// TODO Version check to be added at the report level
public class GATKReportDiffableReader implements DiffableReader {
@Override
public String getName() { return "GATKReport"; }
public String getName() {
return "GATKReport";
}
@Override
public DiffElement readFromFile(File file, int maxElementsToRead) {
@ -47,12 +52,12 @@ public class GATKReportDiffableReader implements DiffableReader {
// one line reads the whole thing into memory
GATKReport report = new GATKReport(file);
for (GATKReportTable table : report.getTables() ) {
for (GATKReportTable table : report.getTables()) {
root.add(tableToNode(table, root));
}
return root.getBinding();
} catch ( Exception e ) {
} catch (Exception e) {
return null;
}
}
@ -62,9 +67,8 @@ public class GATKReportDiffableReader implements DiffableReader {
tableRoot.add("Description", table.getTableDescription());
tableRoot.add("NumberOfRows", table.getNumRows());
tableRoot.add("Version", table.getVersion());
for ( GATKReportColumn column : table.getColumns().values() ) {
for (GATKReportColumn column : table.getColumns().values()) {
DiffNode columnRoot = DiffNode.empty(column.getColumnName(), tableRoot);
columnRoot.add("Width", column.getColumnFormat().getWidth());
@ -72,7 +76,7 @@ public class GATKReportDiffableReader implements DiffableReader {
columnRoot.add("Displayable", column.isDisplayable());
int n = 1;
for ( Object elt : column.values() ) {
for (Object elt : column.values()) {
String name = column.getColumnName() + n++;
columnRoot.add(name, elt.toString());
}
@ -91,7 +95,7 @@ public class GATKReportDiffableReader implements DiffableReader {
new FileReader(file).read(buff, 0, HEADER.length());
String firstLine = new String(buff);
return firstLine.startsWith(HEADER);
} catch ( IOException e ) {
} catch (IOException e) {
return false;
}
}

View File

@ -317,7 +317,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
sum.nCallsMade++;
writer.add(value);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
throw new IllegalArgumentException(e.getMessage());
}
return sum;

View File

@ -19,19 +19,19 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
*/
@Analysis(description = "The overlap between eval and comp sites")
public class CompOverlap extends VariantEvaluator implements StandardEval {
@DataPoint(description = "number of eval SNP sites")
@DataPoint(description = "number of eval SNP sites", format = "%d")
long nEvalVariants = 0;
@DataPoint(description = "number of eval sites outside of comp sites")
@DataPoint(description = "number of eval sites outside of comp sites", format = "%d")
long novelSites = 0;
@DataPoint(description = "number of eval sites at comp sites")
@DataPoint(description = "number of eval sites at comp sites", format = "%d")
long nVariantsAtComp = 0;
@DataPoint(description = "percentage of eval sites at comp sites", format = "%.2f" )
double compRate = 0.0;
@DataPoint(description = "number of concordant sites")
@DataPoint(description = "number of concordant sites", format = "%d")
long nConcordant = 0;
@DataPoint(description = "the concordance rate", format = "%.2f")

View File

@ -15,50 +15,50 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
// the following fields are in output order:
// basic counts on various rates found
@DataPoint(description = "Number of processed loci")
@DataPoint(description = "Number of processed loci", format = "%d")
public long nProcessedLoci = 0;
@DataPoint(description = "Number of called loci")
@DataPoint(description = "Number of called loci", format = "%d")
public long nCalledLoci = 0;
@DataPoint(description = "Number of reference loci")
@DataPoint(description = "Number of reference loci", format = "%d")
public long nRefLoci = 0;
@DataPoint(description = "Number of variant loci")
@DataPoint(description = "Number of variant loci", format = "%d")
public long nVariantLoci = 0;
// the following two calculations get set in the finalizeEvaluation
@DataPoint(description = "Variants per loci rate")
@DataPoint(description = "Variants per loci rate", format = "%.8f")
public double variantRate = 0;
@DataPoint(description = "Number of variants per base")
@DataPoint(description = "Number of variants per base", format = "%.8f")
public double variantRatePerBp = 0;
@DataPoint(description = "Number of snp loci")
@DataPoint(description = "Number of snp loci", format = "%d")
public long nSNPs = 0;
@DataPoint(description = "Number of mnp loci")
@DataPoint(description = "Number of mnp loci", format = "%d")
public long nMNPs = 0;
@DataPoint(description = "Number of insertions")
@DataPoint(description = "Number of insertions", format = "%d")
public long nInsertions = 0;
@DataPoint(description = "Number of deletions")
@DataPoint(description = "Number of deletions", format = "%d")
public long nDeletions = 0;
@DataPoint(description = "Number of complex indels")
@DataPoint(description = "Number of complex indels", format = "%d")
public long nComplex = 0;
@DataPoint(description = "Number of symbolic events")
@DataPoint(description = "Number of symbolic events", format = "%d")
public long nSymbolic = 0;
@DataPoint(description = "Number of mixed loci (loci that can't be classified as a SNP, Indel or MNP)")
@DataPoint(description = "Number of mixed loci (loci that can't be classified as a SNP, Indel or MNP)", format = "%d")
public long nMixed = 0;
@DataPoint(description = "Number of no calls loci")
@DataPoint(description = "Number of no calls loci", format = "%d")
public long nNoCalls = 0;
@DataPoint(description = "Number of het loci")
@DataPoint(description = "Number of het loci", format = "%d")
public long nHets = 0;
@DataPoint(description = "Number of hom ref loci")
@DataPoint(description = "Number of hom ref loci", format = "%d")
public long nHomRef = 0;
@DataPoint(description = "Number of hom var loci")
@DataPoint(description = "Number of hom var loci", format = "%d")
public long nHomVar = 0;
@DataPoint(description = "Number of singletons")
@DataPoint(description = "Number of singletons", format = "%d")
public long nSingletons = 0;
@DataPoint(description = "Number of derived homozygotes")
@DataPoint(description = "Number of derived homozygotes", format = "%d")
public long nHomDerived = 0;
// calculations that get set in the finalizeEvaluation method

View File

@ -1,17 +1,16 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
@ -44,80 +43,80 @@ import java.util.Set;
@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator")
public class MendelianViolationEvaluator extends VariantEvaluator {
@DataPoint(description = "Number of variants found with at least one family having genotypes")
@DataPoint(description = "Number of variants found with at least one family having genotypes", format = "%d")
long nVariants;
@DataPoint(description = "Number of variants found with no family having genotypes -- these sites do not count in the nNoCall")
@DataPoint(description = "Number of variants found with no family having genotypes -- these sites do not count in the nNoCall", format = "%d")
long nSkipped;
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual)")
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual)", format = "%d")
long nFamCalled;
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual) that contain at least one var allele.")
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual) that contain at least one var allele.", format = "%d")
long nVarFamCalled;
@DataPoint(description="Number of variants x families discarded as low quality")
@DataPoint(description="Number of variants x families discarded as low quality", format = "%d")
long nLowQual;
@DataPoint(description="Number of variants x families discarded as no call")
@DataPoint(description="Number of variants x families discarded as no call", format = "%d")
long nNoCall;
@DataPoint(description="Number of loci with mendelian violations")
@DataPoint(description="Number of loci with mendelian violations", format = "%d")
long nLociViolations;
@DataPoint(description = "Number of mendelian violations found")
@DataPoint(description = "Number of mendelian violations found", format = "%d")
long nViolations;
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant", format = "%d")
long KidHomRef_ParentHomVar;
@DataPoint(description = "number of child het calls where the parent was hom ref")
@DataPoint(description = "number of child het calls where the parent was hom ref", format = "%d")
long KidHet_ParentsHomRef;
@DataPoint(description = "number of child het calls where the parent was hom variant")
@DataPoint(description = "number of child het calls where the parent was hom variant", format = "%d")
long KidHet_ParentsHomVar;
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
@DataPoint(description = "number of child hom variant calls where the parent was hom ref", format = "%d")
long KidHomVar_ParentHomRef;
*/
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR")
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR", format = "%d")
long mvRefRef_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET")
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET", format = "%d")
long mvRefRef_Het;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HET -> HOM_VAR")
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HET -> HOM_VAR", format = "%d")
long mvRefHet_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_VAR")
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_VAR", format = "%d")
long mvRefVar_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_REF")
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_REF", format = "%d")
long mvRefVar_Ref;
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HET -> HOM_REF")
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HET -> HOM_REF", format = "%d")
long mvVarHet_Ref;
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HOM_REF")
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HOM_REF", format = "%d")
long mvVarVar_Ref;
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET")
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET", format = "%d")
long mvVarVar_Het;
/*@DataPoint(description ="Number of inherited var alleles from het parents")
/*@DataPoint(description ="Number of inherited var alleles from het parents", format = "%d")
long nInheritedVar;
@DataPoint(description ="Number of inherited ref alleles from het parents")
@DataPoint(description ="Number of inherited ref alleles from het parents", format = "%d")
long nInheritedRef;*/
@DataPoint(description="Number of HomRef/HomRef/HomRef trios")
@DataPoint(description="Number of HomRef/HomRef/HomRef trios", format = "%d")
long HomRefHomRef_HomRef;
@DataPoint(description="Number of Het/Het/Het trios")
@DataPoint(description="Number of Het/Het/Het trios", format = "%d")
long HetHet_Het;
@DataPoint(description="Number of Het/Het/HomRef trios")
@DataPoint(description="Number of Het/Het/HomRef trios", format = "%d")
long HetHet_HomRef;
@DataPoint(description="Number of Het/Het/HomVar trios")
@DataPoint(description="Number of Het/Het/HomVar trios", format = "%d")
long HetHet_HomVar;
@DataPoint(description="Number of HomVar/HomVar/HomVar trios")
@DataPoint(description="Number of HomVar/HomVar/HomVar trios", format = "%d")
long HomVarHomVar_HomVar;
@DataPoint(description="Number of HomRef/HomVar/Het trios")
@DataPoint(description="Number of HomRef/HomVar/Het trios", format = "%d")
long HomRefHomVAR_Het;
@DataPoint(description="Number of ref alleles inherited from het/het parents")
@DataPoint(description="Number of ref alleles inherited from het/het parents", format = "%d")
long HetHet_inheritedRef;
@DataPoint(description="Number of var alleles inherited from het/het parents")
@DataPoint(description="Number of var alleles inherited from het/het parents", format = "%d")
long HetHet_inheritedVar;
@DataPoint(description="Number of ref alleles inherited from homRef/het parents")
@DataPoint(description="Number of ref alleles inherited from homRef/het parents", format = "%d")
long HomRefHet_inheritedRef;
@DataPoint(description="Number of var alleles inherited from homRef/het parents")
@DataPoint(description="Number of var alleles inherited from homRef/het parents", format = "%d")
long HomRefHet_inheritedVar;
@DataPoint(description="Number of ref alleles inherited from homVar/het parents")
@DataPoint(description="Number of ref alleles inherited from homVar/het parents", format = "%d")
long HomVarHet_inheritedRef;
@DataPoint(description="Number of var alleles inherited from homVar/het parents")
@DataPoint(description="Number of var alleles inherited from homVar/het parents", format = "%d")
long HomVarHet_inheritedVar;
MendelianViolation mv;

View File

@ -48,45 +48,45 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
}
// basic counts on various rates found
@DataPoint(description = "Number of processed loci")
@DataPoint(description = "Number of processed loci", format = "%d")
public long nProcessedLoci = 0;
@DataPoint(description = "Number of SNPs")
@DataPoint(description = "Number of SNPs", format = "%d")
public int nSNPs = 0;
@DataPoint(description = "Number of multi-allelic SNPs")
@DataPoint(description = "Number of multi-allelic SNPs", format = "%d")
public int nMultiSNPs = 0;
@DataPoint(description = "% processed sites that are multi-allelic SNPs", format = "%.5f")
public double processedMultiSnpRatio = 0;
@DataPoint(description = "% SNP sites that are multi-allelic", format = "%.3f")
public double variantMultiSnpRatio = 0;
@DataPoint(description = "Number of Indels")
@DataPoint(description = "Number of Indels", format = "%d")
public int nIndels = 0;
@DataPoint(description = "Number of multi-allelic Indels")
@DataPoint(description = "Number of multi-allelic Indels", format = "%d")
public int nMultiIndels = 0;
@DataPoint(description = "% processed sites that are multi-allelic Indels", format = "%.5f")
public double processedMultiIndelRatio = 0;
@DataPoint(description = "% Indel sites that are multi-allelic", format = "%.3f")
public double variantMultiIndelRatio = 0;
@DataPoint(description = "Number of Transitions")
@DataPoint(description = "Number of Transitions", format = "%d")
public int nTi = 0;
@DataPoint(description = "Number of Transversions")
@DataPoint(description = "Number of Transversions", format = "%d")
public int nTv = 0;
@DataPoint(description = "Overall TiTv ratio", format = "%.2f")
public double TiTvRatio = 0;
@DataPoint(description = "Multi-allelic SNPs partially known")
@DataPoint(description = "Multi-allelic SNPs partially known", format = "%d")
public int knownSNPsPartial = 0;
@DataPoint(description = "Multi-allelic SNPs completely known")
@DataPoint(description = "Multi-allelic SNPs completely known", format = "%d")
public int knownSNPsComplete = 0;
@DataPoint(description = "Multi-allelic SNP Novelty Rate")
public String SNPNoveltyRate = "NA";
//TODO -- implement me
//@DataPoint(description = "Multi-allelic Indels partially known")
//@DataPoint(description = "Multi-allelic Indels partially known", format = "%d")
public int knownIndelsPartial = 0;
//@DataPoint(description = "Multi-allelic Indels completely known")
//@DataPoint(description = "Multi-allelic Indels completely known", format = "%d")
public int knownIndelsComplete = 0;
//@DataPoint(description = "Multi-allelic Indel Novelty Rate")
public String indelNoveltyRate = "NA";

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites")
public class PrintMissingComp extends VariantEvaluator {
@DataPoint(description = "number of eval sites outside of comp sites")
@DataPoint(description = "number of eval sites outside of comp sites", format = "%d")
long nMissing = 0;
//public PrintMissingComp(VariantEvalWalker parent) {

View File

@ -14,15 +14,15 @@ import java.util.concurrent.ConcurrentMap;
@Analysis(description = "Computes different estimates of theta based on variant sites and genotypes")
public class ThetaVariantEvaluator extends VariantEvaluator {
@DataPoint(description = "Average heterozygosity at variant sites; note that missing genotypes are ignored when computing this value")
@DataPoint(description = "Average heterozygosity at variant sites; note that missing genotypes are ignored when computing this value", format = "%.8f")
double avgHet = 0.0;
@DataPoint(description = "Average pairwise differences at aligned sequences; averaged over both number of sequeneces and number of variant sites; note that missing genotypes are ignored when computing this value")
@DataPoint(description = "Average pairwise differences at aligned sequences; averaged over both number of sequeneces and number of variant sites; note that missing genotypes are ignored when computing this value", format = "%.8f")
double avgAvgDiffs = 0.0;
@DataPoint(description = "Sum of heterozygosity over all variant sites; divide this by total target to get estimate of per base theta")
@DataPoint(description = "Sum of heterozygosity over all variant sites; divide this by total target to get estimate of per base theta", format = "%.8f")
double totalHet = 0.0;
@DataPoint(description = "Sum of pairwise diffs over all variant sites; divide this by total target to get estimate of per base theta")
@DataPoint(description = "Sum of pairwise diffs over all variant sites; divide this by total target to get estimate of per base theta", format = "%.8f")
double totalAvgDiffs = 0.0;
@DataPoint(description = "Theta for entire region estimated based on number of segregating sites; divide ths by total target to get estimate of per base theta")
@DataPoint(description = "Theta for entire region estimated based on number of segregating sites; divide ths by total target to get estimate of per base theta", format = "%.8f")
double thetaRegionNumSites = 0.0;
//helper variables

View File

@ -12,21 +12,21 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@Analysis(description = "Ti/Tv Variant Evaluator")
public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval {
@DataPoint(description = "number of transition loci")
@DataPoint(description = "number of transition loci", format = "%d")
long nTi = 0;
@DataPoint(description = "number of transversion loci")
@DataPoint(description = "number of transversion loci", format = "%d")
long nTv = 0;
@DataPoint(description = "the transition to transversion ratio", format = "%.2f")
double tiTvRatio = 0.0;
@DataPoint(description = "number of comp transition sites")
@DataPoint(description = "number of comp transition sites", format = "%d")
long nTiInComp = 0;
@DataPoint(description = "number of comp transversion sites")
@DataPoint(description = "number of comp transversion sites", format = "%d")
long nTvInComp = 0;
@DataPoint(description = "the transition to transversion ratio for comp sites", format = "%.2f")
double TiTvRatioStandard = 0.0;
@DataPoint(description = "number of derived transition loci")
@DataPoint(description = "number of derived transition loci", format = "%d")
long nTiDerived = 0;
@DataPoint(description = "number of derived transversion loci")
@DataPoint(description = "number of derived transversion loci", format = "%d")
long nTvDerived = 0;
@DataPoint(description = "the derived transition to transversion ratio", format = "%.2f")
double tiTvDerivedRatio = 0.0;

View File

@ -24,29 +24,29 @@ import java.util.Collection;
@Analysis(description = "Assess site accuracy and sensitivity of callset against follow-up validation assay")
public class ValidationReport extends VariantEvaluator implements StandardEval {
// todo -- note this isn't strictly allele away. It's really focused on sites. A/T call at a validated A/G site is currently counted as a TP
@DataPoint(description = "nComp") int nComp = 0;
@DataPoint(description = "TP") int TP = 0;
@DataPoint(description = "FP") int FP = 0;
@DataPoint(description = "FN") int FN = 0;
@DataPoint(description = "TN") int TN = 0;
@DataPoint(description = "nComp", format = "%d") int nComp = 0;
@DataPoint(description = "TP", format = "%d") int TP = 0;
@DataPoint(description = "FP", format = "%d") int FP = 0;
@DataPoint(description = "FN", format = "%d") int FN = 0;
@DataPoint(description = "TN", format = "%d") int TN = 0;
@DataPoint(description = "Sensitivity", format = "%.2f") double sensitivity = 0;
@DataPoint(description = "Specificity", format = "%.2f") double specificity = 0;
@DataPoint(description = "PPV", format = "%.2f") double PPV = 0;
@DataPoint(description = "FDR", format = "%.2f") double FDR = 0;
@DataPoint(description = "CompMonoEvalNoCall") int CompMonoEvalNoCall = 0;
@DataPoint(description = "CompMonoEvalFiltered") int CompMonoEvalFiltered = 0;
@DataPoint(description = "CompMonoEvalMono") int CompMonoEvalMono = 0;
@DataPoint(description = "CompMonoEvalPoly") int CompMonoEvalPoly = 0;
@DataPoint(description = "CompMonoEvalNoCall", format = "%d") int CompMonoEvalNoCall = 0;
@DataPoint(description = "CompMonoEvalFiltered", format = "%d") int CompMonoEvalFiltered = 0;
@DataPoint(description = "CompMonoEvalMono", format = "%d") int CompMonoEvalMono = 0;
@DataPoint(description = "CompMonoEvalPoly", format = "%d") int CompMonoEvalPoly = 0;
@DataPoint(description = "CompPolyEvalNoCall") int CompPolyEvalNoCall = 0;
@DataPoint(description = "CompPolyEvalFiltered") int CompPolyEvalFiltered = 0;
@DataPoint(description = "CompPolyEvalMono") int CompPolyEvalMono = 0;
@DataPoint(description = "CompPolyEvalPoly") int CompPolyEvalPoly = 0;
@DataPoint(description = "CompPolyEvalNoCall", format = "%d") int CompPolyEvalNoCall = 0;
@DataPoint(description = "CompPolyEvalFiltered", format = "%d") int CompPolyEvalFiltered = 0;
@DataPoint(description = "CompPolyEvalMono", format = "%d") int CompPolyEvalMono = 0;
@DataPoint(description = "CompPolyEvalPoly", format = "%d") int CompPolyEvalPoly = 0;
@DataPoint(description = "CompFiltered") int CompFiltered = 0;
@DataPoint(description = "Eval and comp have different alleles") int nDifferentAlleleSites = 0;
@DataPoint(description = "CompFiltered", format = "%d") int CompFiltered = 0;
@DataPoint(description = "Eval and comp have different alleles", format = "%d") int nDifferentAlleleSites = 0;
private static final boolean TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED = true;
private static final boolean REQUIRE_IDENTICAL_ALLELES = false;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011, The Broad Institute
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -58,39 +58,39 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
Map<String, IntervalTree<GenomeLoc>> knownCNVs = null;
// basic counts on various rates found
@DataPoint(description = "Number of samples")
@DataPoint(description = "Number of samples", format = "%d")
public long nSamples = 0;
@DataPoint(description = "Number of processed loci")
@DataPoint(description = "Number of processed loci", format = "%d")
public long nProcessedLoci = 0;
@DataPoint(description = "Number of SNPs")
@DataPoint(description = "Number of SNPs", format = "%d")
public long nSNPs = 0;
@DataPoint(description = "Overall TiTv ratio", format = "%.2f")
public double TiTvRatio = 0;
@DataPoint(description = "SNP Novelty Rate")
@DataPoint(description = "SNP Novelty Rate", format = "%s")
public String SNPNoveltyRate = "NA";
@DataPoint(description = "Mean number of SNPs per individual")
@DataPoint(description = "Mean number of SNPs per individual", format = "%d")
public long nSNPsPerSample = 0;
@DataPoint(description = "Mean TiTv ratio per individual", format = "%.2f")
public double TiTvRatioPerSample = 0;
@DataPoint(description = "Mean depth of coverage per sample at SNPs", format = "%.1f")
public double SNPDPPerSample = 0;
@DataPoint(description = "Number of Indels")
@DataPoint(description = "Number of Indels", format = "%d")
public long nIndels = 0;
@DataPoint(description = "Indel Novelty Rate")
@DataPoint(description = "Indel Novelty Rate", format = "%s")
public String IndelNoveltyRate = "NA";
@DataPoint(description = "Mean number of Indels per individual")
@DataPoint(description = "Mean number of Indels per individual", format = "%d")
public long nIndelsPerSample = 0;
@DataPoint(description = "Mean depth of coverage per sample at Indels", format = "%.1f")
public double IndelDPPerSample = 0;
@DataPoint(description = "Number of SVs")
@DataPoint(description = "Number of SVs", format = "%d")
public long nSVs = 0;
@DataPoint(description = "SV Novelty Rate")
@DataPoint(description = "SV Novelty Rate", format = "%s")
public String SVNoveltyRate = "NA";
@DataPoint(description = "Mean number of SVs per individual")
@DataPoint(description = "Mean number of SVs per individual", format = "%d")
public long nSVsPerSample = 0;
TypeSampleMap allVariantCounts, knownVariantCounts;

View File

@ -1,3 +1,27 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.util;
import org.apache.log4j.Logger;
@ -35,8 +59,8 @@ public class VariantEvalUtils {
* List all of the available evaluation modules, then exit successfully
*/
public void listModulesAndExit() {
List<Class<? extends VariantStratifier>> vsClasses = new PluginManager<VariantStratifier>( VariantStratifier.class ).getPlugins();
List<Class<? extends VariantEvaluator>> veClasses = new PluginManager<VariantEvaluator>( VariantEvaluator.class ).getPlugins();
List<Class<? extends VariantStratifier>> vsClasses = new PluginManager<VariantStratifier>(VariantStratifier.class).getPlugins();
List<Class<? extends VariantEvaluator>> veClasses = new PluginManager<VariantEvaluator>(VariantEvaluator.class).getPlugins();
logger.info("Available stratification modules:");
logger.info("(Standard modules are starred)");
@ -58,9 +82,9 @@ public class VariantEvalUtils {
/**
* Initialize required, standard and user-specified stratification objects
*
* @param variantEvalWalker the parent walker
* @param noStandardStrats don't use the standard stratifications
* @param modulesToUse the list of stratification modules to use
* @param variantEvalWalker the parent walker
* @param noStandardStrats don't use the standard stratifications
* @param modulesToUse the list of stratification modules to use
* @return set of stratifications to use
*/
public TreeSet<VariantStratifier> initializeStratificationObjects(VariantEvalWalker variantEvalWalker, boolean noStandardStrats, String[] modulesToUse) {
@ -246,7 +270,8 @@ public class VariantEvalUtils {
field.setAccessible(true);
if (!(field.get(vei) instanceof TableType)) {
table.addColumn(field.getName(), 0.0, datamap.get(field).format());
String format = datamap.get(field).format();
table.addColumn(field.getName(), true, format);
}
}
} catch (InstantiationException e) {
@ -297,7 +322,6 @@ public class VariantEvalUtils {
* Additional variant contexts per sample are automatically generated and added to the map unless the sample name
* matches the ALL_SAMPLE_NAME constant.
*
*
* @param tracker the metadata tracker
* @param ref the reference context
* @param tracks the list of tracks to process
@ -306,57 +330,56 @@ public class VariantEvalUtils {
* @param subsetBySample if false, do not separate the track into per-sample VCs
* @param trackPerSample if false, don't stratify per sample (and don't cut up the VariantContext like we would need
* to do this)
*
* @return the mapping of track to VC list that should be populated
*/
public HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>>
bindVariantContexts(RefMetaDataTracker tracker,
ReferenceContext ref,
List<RodBinding<VariantContext>> tracks,
boolean byFilter,
boolean subsetBySample,
boolean trackPerSample,
boolean mergeTracks) {
if ( tracker == null )
bindVariantContexts(RefMetaDataTracker tracker,
ReferenceContext ref,
List<RodBinding<VariantContext>> tracks,
boolean byFilter,
boolean subsetBySample,
boolean trackPerSample,
boolean mergeTracks) {
if (tracker == null)
return null;
HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> bindings = new HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>>();
RodBinding<VariantContext> firstTrack = tracks.isEmpty() ? null : tracks.get(0);
for ( RodBinding<VariantContext> track : tracks ) {
for (RodBinding<VariantContext> track : tracks) {
HashMap<String, Collection<VariantContext>> mapping = new HashMap<String, Collection<VariantContext>>();
for ( VariantContext vc : tracker.getValues(track, ref.getLocus()) ) {
for (VariantContext vc : tracker.getValues(track, ref.getLocus())) {
// First, filter the VariantContext to represent only the samples for evaluation
VariantContext vcsub = vc;
if ( subsetBySample && vc.hasGenotypes() && vc.hasGenotypes(variantEvalWalker.getSampleNamesForEvaluation()) ) {
if (subsetBySample && vc.hasGenotypes() && vc.hasGenotypes(variantEvalWalker.getSampleNamesForEvaluation())) {
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
}
if ( (byFilter || !vcsub.isFiltered()) ) {
if ((byFilter || !vcsub.isFiltered())) {
addMapping(mapping, VariantEvalWalker.getAllSampleName(), vcsub);
}
// Now, if stratifying, split the subsetted vc per sample and add each as a new context
if ( vc.hasGenotypes() && trackPerSample ) {
for ( String sampleName : variantEvalWalker.getSampleNamesForEvaluation() ) {
if (vc.hasGenotypes() && trackPerSample) {
for (String sampleName : variantEvalWalker.getSampleNamesForEvaluation()) {
VariantContext samplevc = getSubsetOfVariantContext(vc, sampleName);
if ( byFilter || !samplevc.isFiltered() ) {
if (byFilter || !samplevc.isFiltered()) {
addMapping(mapping, sampleName, samplevc);
}
}
}
}
if ( mergeTracks && bindings.containsKey(firstTrack) ) {
if (mergeTracks && bindings.containsKey(firstTrack)) {
// go through each binding of sample -> value and add all of the bindings from this entry
HashMap<String, Collection<VariantContext>> firstMapping = bindings.get(firstTrack);
for ( Map.Entry<String, Collection<VariantContext>> elt : mapping.entrySet() ) {
for (Map.Entry<String, Collection<VariantContext>> elt : mapping.entrySet()) {
Collection<VariantContext> firstMappingSet = firstMapping.get(elt.getKey());
if ( firstMappingSet != null ) {
if (firstMappingSet != null) {
firstMappingSet.addAll(elt.getValue());
} else {
firstMapping.put(elt.getKey(), elt.getValue());
@ -371,7 +394,7 @@ public class VariantEvalUtils {
}
private void addMapping(HashMap<String, Collection<VariantContext>> mappings, String sample, VariantContext vc) {
if ( !mappings.containsKey(sample) )
if (!mappings.containsKey(sample))
mappings.put(sample, new ArrayList<VariantContext>(1));
mappings.get(sample).add(vc);
}

View File

@ -128,11 +128,11 @@ public class MathUtils {
return big + MathUtils.jacobianLogTable[ind];
}
public static double sum(Collection<Number> numbers) {
public static double sum(Collection<? extends Number> numbers) {
return sum(numbers, false);
}
public static double sum(Collection<Number> numbers, boolean ignoreNan) {
public static double sum(Collection<? extends Number> numbers, boolean ignoreNan) {
double sum = 0;
for (Number n : numbers) {
if (!ignoreNan || !Double.isNaN(n.doubleValue())) {
@ -152,8 +152,8 @@ public class MathUtils {
return size;
}
public static double average(Collection<Integer> x) {
return (double) sum(x) / x.size();
public static double average(Collection<? extends Number> x) {
return sum(x) / x.size();
}
public static double average(Collection<Number> numbers, boolean ignoreNan) {
@ -1100,13 +1100,6 @@ public class MathUtils {
return getQScoreOrderStatistic(reads, offsets, (int) Math.floor(reads.size() / 2.));
}
public static long sum(Collection<Integer> x) {
long sum = 0;
for (int v : x)
sum += v;
return sum;
}
/**
* A utility class that computes on the fly average and standard deviation for a stream of numbers.
* The number of observations does not have to be known in advance, and can be also very big (so that

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011, The Broad Institute
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -29,43 +29,47 @@ import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
public class GATKReportUnitTest extends BaseTest {
@Test(enabled = false)
public void testParse() throws Exception {
String reportPath = validationDataLocation + "exampleGATKReport.eval";
String reportPath = validationDataLocation + "exampleGATKReportv1.tbl";
GATKReport report = new GATKReport(reportPath);
GATKReportTable countVariants = report.getTable("CountVariants");
Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1);
//Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1);
Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all");
Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000");
Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872");
GATKReportTable validationReport = report.getTable("ValidationReport");
Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1);
//Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1);
Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known");
Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN");
}
@DataProvider(name = "rightAlignValues")
public Object[][] getRightAlignValues() {
return new Object[][] {
new Object[] {null, true},
new Object[] {"null", true},
new Object[] {"NA", true},
new Object[] {"0", true},
new Object[] {"0.0", true},
new Object[] {"-0", true},
new Object[] {"-0.0", true},
new Object[] {String.valueOf(Long.MAX_VALUE), true},
new Object[] {String.valueOf(Long.MIN_VALUE), true},
new Object[] {String.valueOf(Float.MIN_NORMAL), true},
new Object[] {String.valueOf(Double.MAX_VALUE), true},
new Object[] {String.valueOf(Double.MIN_VALUE), true},
new Object[] {String.valueOf(Double.POSITIVE_INFINITY), true},
new Object[] {String.valueOf(Double.NEGATIVE_INFINITY), true},
new Object[] {String.valueOf(Double.NaN), true},
new Object[] {"hello", false}
return new Object[][]{
new Object[]{null, true},
new Object[]{"null", true},
new Object[]{"NA", true},
new Object[]{"0", true},
new Object[]{"0.0", true},
new Object[]{"-0", true},
new Object[]{"-0.0", true},
new Object[]{String.valueOf(Long.MAX_VALUE), true},
new Object[]{String.valueOf(Long.MIN_VALUE), true},
new Object[]{String.valueOf(Float.MIN_NORMAL), true},
new Object[]{String.valueOf(Double.MAX_VALUE), true},
new Object[]{String.valueOf(Double.MIN_VALUE), true},
new Object[]{String.valueOf(Double.POSITIVE_INFINITY), true},
new Object[]{String.valueOf(Double.NEGATIVE_INFINITY), true},
new Object[]{String.valueOf(Double.NaN), true},
new Object[]{"hello", false}
};
}
@ -73,4 +77,109 @@ public class GATKReportUnitTest extends BaseTest {
public void testIsRightAlign(String value, boolean expected) {
Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'");
}
@Test
public void testSimpleGATKReport() {
GATKReport report = GATKReport.newSimpleReport("TableName", "a", "b", "Roger", "is", "Awesome");
report.addRow("a", 'F', 12, 23.45, true);
report.addRow("ans", '3', 24.5, 456L, 2345);
report.addRow("hi", null, null, "", 2.3);
//report.print(System.out);
try {
File file = createTempFile("GATKReportGatherer-UnitTest", ".tbl");
//System.out.format("The temporary file" + " has been created: %s%n", file);
PrintStream ps = new PrintStream(file);
report.print(ps);
//System.out.println("File succesfully outputed!");
GATKReport inputRead = new GATKReport(file);
//System.out.println("File succesfully read!");
//inputRead.print(System.out);
Assert.assertTrue(report.isSameFormat(inputRead));
} catch (IOException x) {
System.err.format("IOException: %s%n", x);
}
}
@Test
public void testGATKReportGatherer() {
boolean displayPK = false;
GATKReport report1, report2, report3;
report1 = new GATKReport();
report1.addTable("TableName", "Description");
report1.getTable("TableName").addPrimaryKey("id", displayPK);
report1.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
report1.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c");
report1.getTable("TableName").set(1, "colA", "NotNum");
report1.getTable("TableName").set(1, "colB", (byte) 64);
report2 = new GATKReport();
report2.addTable("TableName", "Description");
report2.getTable("TableName").addPrimaryKey("id", displayPK);
report2.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
report2.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c");
report2.getTable("TableName").set(2, "colA", "df3");
report2.getTable("TableName").set(2, "colB", 'A');
report3 = new GATKReport();
report3.addTable("TableName", "Description");
report3.getTable("TableName").addPrimaryKey("id", displayPK);
report3.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
report3.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c");
report3.getTable("TableName").set(3, "colA", "df5f");
report3.getTable("TableName").set(3, "colB", 'c');
report1.combineWith(report2);
report1.combineWith(report3);
report1.addTable("Table2", "To contain some more data types");
GATKReportTable table = report1.getTable("Table2");
table.addPrimaryKey("KEY");
table.addColumn("SomeInt", GATKReportDataType.Integer.getDefaultValue(), true, "%d");
table.addColumn("SomeFloat", GATKReportDataType.Decimal.getDefaultValue(), true, "%.16E");
table.addColumn("TrueFalse", false, true, "%B");
table.set("12df", "SomeInt", 34);
table.set("12df", "SomeFloat", 34.0);
table.set("12df", "TrueFalse", true);
table.set("5f", "SomeInt", -1);
table.set("5f", "SomeFloat", 0.000003);
table.set("5f", "TrueFalse", false);
table.set("RZ", "SomeInt", 904948230958203958L);
table.set("RZ", "SomeFloat", 535646345.657453464576);
table.set("RZ", "TrueFalse", true);
report1.addTable("Table3", "blah");
report1.getTable("Table3").addPrimaryKey("HAI");
report1.getTable("Table3").addColumn("a", true, GATKReportDataType.String.getDefaultFormatString());
report1.getTable("Table3").set("q", "a", "34");
report1.getTable("Table3").set("5", "a", "c4g34");
report1.getTable("Table3").set("573s", "a", "fDlwueg");
report1.getTable("Table3").set("ZZZ", "a", "Dfs");
//report1.print(System.out);
try {
File file = createTempFile("GATKReportGatherer-UnitTest", ".tbl");
//System.out.format("The temporary file" + " has been created: %s%n", file);
PrintStream ps = new PrintStream(file);
report1.print(ps);
//System.out.println("File succesfully outputed!");
GATKReport inputRead = new GATKReport(file);
//System.out.println("File succesfully read!");
//inputRead.print(System.out);
Assert.assertTrue(report1.isSameFormat(inputRead));
Assert.assertTrue(report1.equals(inputRead));
} catch (IOException x) {
System.err.format("IOException: %s%n", x);
}
//Assert.assertEquals(1,1);
}
}

View File

@ -35,7 +35,7 @@ public class ErrorRatePerCycleIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s",
1,
Arrays.asList("0cc212ecb6df300e321784039ff29f13"));
Arrays.asList("71685716c7dde64c51bbd908c06ea742"));
executeTest("ErrorRatePerCycle:", spec);
}
}

View File

@ -38,7 +38,7 @@ public class ReadGroupPropertiesIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ReadGroupProperties -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-11,000,000 -o %s",
1,
Arrays.asList("6b8cce223af28cbadcfe87a3b841fc56"));
Arrays.asList("3f1f97a1d2c5fb552ed4f33ea30d136d"));
executeTest("ReadGroupProperties:", spec);
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011, The Broad Institute
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -50,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
@DataProvider(name = "data")
public Object[][] createData() {
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "da3dc85a0e35a9aade5520591891b4fa");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "7dc8200730313e6753237a696296fb73");
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "dac62fcd25e1052bf18b5707700dda7e");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "e10c48dd294fb257802d4e73bb50580d");
return TestParams.getTests(TestParams.class);
}

View File

@ -1,3 +1,27 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval;
import org.broadinstitute.sting.WalkerTest;
@ -30,7 +54,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("c8d8bffa5c572df9dec7364f71a1b943")
Arrays.asList("add8b2213c091a41f5d7a2c8dd68c03a")
);
executeTest("testFunctionClassWithSnpeff", spec);
}
@ -50,7 +74,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("081fcaa532c7ba8f23da739389e6f7c3")
Arrays.asList("621a712deb01e7fc7e5a13d3627b11ba")
);
executeTest("testStratifySamplesAndExcludeMonomorphicSites", spec);
}
@ -70,7 +94,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("b3852f84d07c270b8a12874083c3e31b")
Arrays.asList("94fb8cba9e236131c6fbf1d7fee738fe")
);
executeTest("testFundamentalsCountVariantsSNPsandIndels", spec);
}
@ -91,7 +115,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("cf70468b5ebaec408419da69b0a7fcb9")
Arrays.asList("670979268b05c3024297ba98d67d89ab")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
}
@ -113,7 +137,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("5e3b8b85acfc41365c8208c23abf746b")
Arrays.asList("c38ce9c872a76ae7dd26c3e353bf0765")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
}
@ -134,7 +158,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("ccdbc50d30ece6d0d3b199c397f03ed3")
Arrays.asList("2c37f23bf6114a2b27f21ed445806fd2")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec);
}
@ -155,7 +179,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("95c690d5af8ed51573eb2f0503dcd9c2")
Arrays.asList("206f0d629de9af0b97340cb22d34a81b")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec);
}
@ -176,7 +200,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("8e8547eb38b34bec0095b0500fd9641d")
Arrays.asList("bd869725429deae8f56175ba9a8ab390")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec);
}
@ -197,7 +221,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("158a4651a656aea7f84c79548f6fe519")
Arrays.asList("9c7f6783a57ad681bb754b5e71de27dc")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec);
}
@ -220,7 +244,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("76c8a0b28d2993644120f7afa5833ab2")
Arrays.asList("a2d280440aa3771937f3d2d10f1eea74")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec);
}
@ -245,7 +269,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("34682193f458b93b39efac00b4fc6723")
Arrays.asList("2925d811dd521beb00059f8c8e818d83")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec);
}
@ -264,7 +288,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("52f6655f1532bcea24b402010d93ce73")
Arrays.asList("4b79bf2dfd73ddac0ceb0838a352bf9a")
);
executeTest("testFundamentalsCountVariantsNoCompRod", spec);
}
@ -277,7 +301,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("c49e239292704447a36e01ee9a71e729"));
1, Arrays.asList("c2a4b0266c509944eafe6220fd8389da"));
executeTestParallel("testSelect1", spec);
}
@ -287,7 +311,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG --eval:VCF3 " + validationDataLocation + vcfFile + " --comp:VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
1,
Arrays.asList("9a56c20a7b9a554a7b530f2cb1dd776d"));
Arrays.asList("70da6a0f91a9f1052d68fc360cc99aed"));
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
}
@ -298,14 +322,14 @@ public class VariantEvalIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s -mvq 0 -noST",
1,
Arrays.asList("66e72c887124f40933d32254b2dd44a3"));
Arrays.asList("03581adcb4f2f7960662fc7ffd910f43"));
executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec);
}
@Test
public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("fa13eb59892892c07711c6ffe31bf870"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("19cde5078dd7284c95be4797695d3200"));
executeTestParallel("testCompVsEvalAC",spec);
}
@ -323,7 +347,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testCompOverlap() {
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals --comp:comphapmap " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf --eval " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9002023b8aa8fc2c9aac58b8a79bca1e"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("00241ce70476187a2f910606b9242697"));
executeTestParallel("testCompOverlap",spec);
}
@ -335,7 +359,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --dbsnp " + b37dbSNP132 +
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e42cda858649a35eaa9d14ea2d70a956"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5ac240e33082887264e07be7de0f095f"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
}
@ -347,7 +371,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9561cb4c7aa36dcf30ba253385299859"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("4dec767b6e7f2743eef89e586faab948"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
}
@ -364,13 +388,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -noST -noEV -ST Novelty -EV CompOverlap" +
" -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("61052c19211e7eb61fbbb62db5e40b56"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("15f6a6ba4f7fed49c617589ce9fdcbc5"));
executeTestParallel("testMultipleCompTracks",spec);
}
@Test
public void testPerSampleAndSubsettedSampleHaveSameResults1() {
String md5 = "0edded1cd578db62fa296c99c34a909d";
String md5 = "bcf55537db0762b8fd68f7f02439c475";
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
@ -425,7 +449,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("ee22604616b3e9fc48a6dcbbf73a056d")
Arrays.asList("0c632b5be8a54e43afa576510b40c4da")
);
executeTest("testAlleleCountStrat", spec);
}
@ -446,7 +470,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("240369cd651c77e05e8a6659f4a6237e")
Arrays.asList("92404820a94e7cfb854ae73450a0fbd9")
);
executeTest("testIntervalStrat", spec);
}
@ -463,7 +487,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("397b0e77459b9b69d2e0dd1dac320c3c")
Arrays.asList("8cb8a393a0176e4df4290af7817c8647")
);
executeTest("testModernVCFWithLargeIndels", spec);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011, The Broad Institute
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -98,7 +98,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
" -EV CompOverlap -noEV -noST" +
" -o %s",
1,
Arrays.asList("addf5f4596ddacef40808f6d3d281111")
Arrays.asList("666036d38f224d7c95b46a8d7197fe68")
);
executeTest("testVCFStreamingChain", selectTestSpec);

View File

@ -321,7 +321,7 @@ class GATKResourcesBundle extends QScript {
}
class LiftOverPerl(@Input val in: File, @Output val out: File, @Input val chain: File, oldRef: Reference, newRef: Reference) extends CommandLineFunction {
this.memoryLimit = 8
this.memoryLimit = 12
def commandLine = ("%s -vcf %s -chain %s -out %s " +
"-gatk ./ -newRef %s -oldRef %s -tmp %s").format(liftOverPerl, in.getAbsolutePath, chain,
out.getAbsolutePath, newRef.file.replace(".fasta", ""),