Conflicts:
	private/java/src/org/broadinstitute/sting/gatk/walkers/qc/TestVariantContextWalker.java
	public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
	public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
	public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
	public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
	public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
	public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java
	public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
	public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java
This commit is contained in:
Mark DePristo 2011-08-03 15:09:47 -04:00
commit 79e4a8f6d3
59 changed files with 2852 additions and 2528 deletions

View File

@ -20,6 +20,20 @@
assign(tableName, d, envir=tableEnv);
}
# Read a fixed width line of text into a list.
.gsa.splitFixedWidth <- function(line, columnStarts) {
splitStartStop <- function(x) {
x = substring(x, starts, stops);
x = gsub("^[[:space:]]+|[[:space:]]+$", "", x);
x;
}
starts = c(1, columnStarts);
stops = c(columnStarts - 1, nchar(line));
sapply(line, splitStartStop)[,1];
}
# Load all GATKReport tables from a file
gsa.read.gatkreport <- function(filename) {
con = file(filename, "r", blocking = TRUE);
@ -31,9 +45,10 @@ gsa.read.gatkreport <- function(filename) {
tableName = NA;
tableHeader = c();
tableRows = c();
version = NA;
for (line in lines) {
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) {
headerFields = unlist(strsplit(line, "[[:space:]]+"));
if (!is.na(tableName)) {
@ -43,13 +58,37 @@ gsa.read.gatkreport <- function(filename) {
tableName = headerFields[2];
tableHeader = c();
tableRows = c();
# For differences in versions see
# $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
version = "v0.1";
} else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) {
version = "v0.2";
columnStarts = c();
}
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
# do nothing
} else if (!is.na(tableName)) {
row = unlist(strsplit(line, "[[:space:]]+"));
if (version == "v0.1") {
row = unlist(strsplit(line, "[[:space:]]+"));
} else if (version == "v0.2") {
if (length(tableHeader) == 0) {
headerChars = unlist(strsplit(line, ""));
# Find the first position of non space characters, excluding the first character
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
}
row = .gsa.splitFixedWidth(line, columnStarts);
}
if (length(tableHeader) == 0) {
tableHeader = row;
tableHeader = row;
} else {
tableRows = rbind(tableRows, row);
}

View File

@ -87,8 +87,8 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
writer.writeHeader(stub.getVCFHeader());
}
public void add(VariantContext vc, byte ref) {
writer.add(vc, ref);
public void add(VariantContext vc) {
writer.add(vc);
}
/**
@ -117,7 +117,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
BasicFeatureSource<VariantContext> source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false);
for ( VariantContext vc : source.iterator() ) {
target.writer.add(vc, vc.getReferenceBaseForIndel());
target.writer.add(vc);
}
source.close();

View File

@ -192,8 +192,8 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
/**
* @{inheritDoc}
*/
public void add(VariantContext vc, byte ref) {
outputTracker.getStorage(this).add(vc,ref);
public void add(VariantContext vc) {
outputTracker.getStorage(this).add(vc);
}
/**

View File

@ -127,14 +127,13 @@ public class VariantContextAdaptors {
Map<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
if ( sawNullAllele ) {
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index]));
}
Collection<Genotype> genotypes = null;
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
Map<String, Genotype> genotypes = null;
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel);
return vc;
} else
return null; // can't handle anything else

View File

@ -1,21 +1,23 @@
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.io.*;
import java.util.List;
import java.util.TreeMap;
/**
* Container class for GATK report tables
*/
public class GATKReport {
private TreeMap<String, GATKReportTable> tables;
private TreeMap<String, GATKReportTable> tables = new TreeMap<String, GATKReportTable>();
/**
* Create a new, empty GATKReport.
*/
public GATKReport() {
tables = new TreeMap<String, GATKReportTable>();
}
/**
@ -23,7 +25,7 @@ public class GATKReport {
* @param filename the path to the file to load
*/
public GATKReport(String filename) {
loadReport(new File(filename));
this(new File(filename));
}
/**
@ -31,7 +33,6 @@ public class GATKReport {
* @param file the file to load
*/
public GATKReport(File file) {
tables = new TreeMap<String, GATKReportTable>();
loadReport(file);
}
@ -46,11 +47,17 @@ public class GATKReport {
GATKReportTable table = null;
String[] header = null;
int id = 0;
GATKReportVersion version = null;
List<Integer> columnStarts = null;
String line;
while ( (line = reader.readLine()) != null ) {
if (line.startsWith("##:GATKReport.v0.1 ")) {
line = line.replaceFirst("##:GATKReport.v0.1 ", "");
if (line.startsWith("##:GATKReport.v")) {
version = GATKReportVersion.fromHeader(line);
line = line.replaceFirst("##:GATKReport." + version.versionString + " ", "");
String[] pieces = line.split(" : ");
String tableName = pieces[0];
@ -58,14 +65,35 @@ public class GATKReport {
addTable(tableName, tableDesc);
table = getTable(tableName);
table.setVersion(version);
header = null;
} else if ( line.isEmpty() ) {
columnStarts = null;
} else if ( line.trim().isEmpty() ) {
// do nothing
} else {
if (table != null) {
String[] splitLine;
switch (version) {
case V0_1:
splitLine = TextFormattingUtils.splitWhiteSpace(line);
break;
case V0_2:
if (header == null) {
columnStarts = TextFormattingUtils.getWordStarts(line);
}
splitLine = TextFormattingUtils.splitFixedWidth(line, columnStarts);
break;
default:
throw new ReviewedStingException("GATK report version parsing not implemented for: " + line);
}
if (header == null) {
header = line.split("\\s+");
header = splitLine;
table.addPrimaryKey("id", false);
@ -75,10 +103,8 @@ public class GATKReport {
id = 0;
} else {
String[] entries = line.split("\\s+");
for (int columnIndex = 0; columnIndex < header.length; columnIndex++) {
table.set(id, header[columnIndex], entries[columnIndex]);
table.set(id, header[columnIndex], splitLine[columnIndex]);
}
id++;
@ -125,7 +151,10 @@ public class GATKReport {
* @return the table object
*/
public GATKReportTable getTable(String tableName) {
return tables.get(tableName);
GATKReportTable table = tables.get(tableName);
if (table == null)
throw new ReviewedStingException("Table is not in GATKReport: " + tableName);
return table;
}
/**

View File

@ -37,10 +37,10 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
* tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero
* values) in the table.
*
* @param primaryKey the primary key position in the column that should be set
* @param primaryKey the primary key position in the column that should be retrieved
* @return the value at the specified position in the column, or the default value if the element is not set
*/
public Object getWithoutSideEffects(Object primaryKey) {
private Object getWithoutSideEffects(Object primaryKey) {
if (!this.containsKey(primaryKey)) {
return defaultValue;
}
@ -48,6 +48,16 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
return this.get(primaryKey);
}
/**
* Return an object from the column, but if it doesn't exist, return the default value.
*
* @param primaryKey the primary key position in the column that should be retrieved
* @return the string value at the specified position in the column, or the default value if the element is not set
*/
public String getStringValue(Object primaryKey) {
return toString(getWithoutSideEffects(primaryKey));
}
/**
* Return the displayable property of the column. If true, the column will be displayed in the final output.
* If not, printing will be suppressed for the contents of the table.
@ -67,7 +77,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
for (Object obj : this.values()) {
if (obj != null) {
int width = obj.toString().length();
int width = toString(obj).length();
if (width > maxWidth) {
maxWidth = width;
@ -77,4 +87,23 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
return maxWidth;
}
/**
* Returns a string version of the values.
* @param obj The object to convert to a string
* @return The string representation of the column
*/
private static String toString(Object obj) {
String value;
if (obj == null) {
value = "null";
} else if (obj instanceof Float) {
value = String.format("%.8f", (Float) obj);
} else if (obj instanceof Double) {
value = String.format("%.8f", (Double) obj);
} else {
value = obj.toString();
}
return value;
}
}

View File

@ -24,26 +24,32 @@
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.*;
import java.io.File;
/**
* Tracks a linked list of GATKReportColumn in order by name.
*/
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
private List<String> columnNames = new ArrayList<String>();
public class GATKReportParserUnitTest extends BaseTest {
@Test
public void testParse() throws Exception {
GATKReportParser parser = new GATKReportParser();
parser.parse(new File(validationDataLocation + "exampleGATKReport.eval"));
/**
* Returns the column by index
* @param i the index
* @return The column
*/
public GATKReportColumn getByIndex(int i) {
return get(columnNames.get(i));
}
Assert.assertEquals(parser.getValue("CountVariants", "none.eval.none.all", "nProcessedLoci"), "100000");
Assert.assertEquals(parser.getValue("CountVariants", "none.eval.none.all", "nNoCalls"), "99872");
@Override
public GATKReportColumn remove(Object key) {
columnNames.remove(key);
return super.remove(key);
}
Assert.assertEquals(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2", "AC"), "2");
Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2.bad", "AC"));
Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2", "AC.bad"));
Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics.bad", "none.eval.none.novel.ac2", "AC"));
Assert.assertEquals(parser.getValue("ValidationReport", "none.eval.none.known", "sensitivity"), "NaN");
@Override
public GATKReportColumn put(String key, GATKReportColumn value) {
columnNames.add(key);
return super.put(key, value);
}
}

View File

@ -1,83 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
public class GATKReportParser {
private List<GATKReportTableParser> tables = new ArrayList<GATKReportTableParser>();
public void parse(File file) throws IOException {
InputStream stream = FileUtils.openInputStream(file);
try {
parse(stream);
} finally {
IOUtils.closeQuietly(stream);
}
}
public void parse(InputStream input) throws IOException {
GATKReportTableParser table = null;
for (String line: new XReadLines(input)) {
if (line.startsWith("##:GATKReport.v0.1 ")) {
table = newTableParser(line);
tables.add(table);
table.parse(line);
} else if (table != null) {
if (line.trim().length() == 0)
table = null;
else
table.parse(line);
}
}
}
public String getValue(String tableName, String[] key, String column) {
for (GATKReportTableParser table: tables)
if (table.getTableName().equals(tableName))
return table.getValue(key, column);
return null;
}
public String getValue(String tableName, String key, String column) {
for (GATKReportTableParser table: tables)
if (table.getTableName().equals(tableName))
return table.getValue(key, column);
return null;
}
private GATKReportTableParser newTableParser(String header) {
return new GATKReportTableParser();
}
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.ObjectUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.PrintStream;
@ -88,17 +89,20 @@ import java.util.regex.Pattern;
* but at least the prototype contained herein works.
*
* @author Kiran Garimella
* @author Khalid Shakir
*/
public class GATKReportTable {
private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2;
private String tableName;
private String tableDescription;
private GATKReportVersion version = LATEST_REPORT_VERSION;
private String primaryKeyName;
private Collection<Object> primaryKeyColumn;
private boolean primaryKeyDisplay;
boolean sortByPrimaryKey = true;
private boolean sortByPrimaryKey = true;
private LinkedHashMap<String, GATKReportColumn> columns;
private GATKReportColumns columns;
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
@ -113,6 +117,19 @@ public class GATKReportTable {
return !m.find();
}
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
*
* @param description the name of the table or column
* @return true if the name is valid, false if otherwise
*/
private boolean isValidDescription(String description) {
Pattern p = Pattern.compile("\\r|\\n");
Matcher m = p.matcher(description);
return !m.find();
}
/**
* Construct a new GATK report table with the specified name and description
*
@ -128,11 +145,23 @@ public class GATKReportTable {
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
}
if (!isValidDescription(tableDescription)) {
throw new ReviewedStingException("Attempted to set a GATKReportTable description of '" + tableDescription + "'. GATKReportTable descriptions must not contain newlines.");
}
this.tableName = tableName;
this.tableDescription = tableDescription;
this.sortByPrimaryKey = sortByPrimaryKey;
columns = new LinkedHashMap<String, GATKReportColumn>();
columns = new GATKReportColumns();
}
public GATKReportVersion getVersion() {
return version;
}
protected void setVersion(GATKReportVersion version) {
this.version = version;
}
/**
@ -161,6 +190,57 @@ public class GATKReportTable {
primaryKeyDisplay = display;
}
/**
* Returns the first primary key matching the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
* @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or throws an exception.
*/
public Object getPrimaryKey(String dottedColumnValues) {
Object key = findPrimaryKey(dottedColumnValues);
if (key == null)
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues);
return key;
}
/**
* Returns true if there is at least on row with the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
* @param dottedColumnValues Period concatenated values.
* @return true if there is at least one row matching the columns.
*/
public boolean containsPrimaryKey(String dottedColumnValues) {
return findPrimaryKey(dottedColumnValues) != null;
}
/**
* Returns the first primary key matching the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
* @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or null.
*/
private Object findPrimaryKey(String dottedColumnValues) {
return findPrimaryKey(dottedColumnValues.split("\\."));
}
/**
* Returns the first primary key matching the column values.
* Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" }
* @param columnValues column values.
* @return The first primary key matching the column values.
*/
private Object findPrimaryKey(Object[] columnValues) {
for (Object primaryKey : primaryKeyColumn) {
boolean matching = true;
for (int i = 0; matching && i < columnValues.length; i++) {
matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1));
}
if (matching)
return primaryKey;
}
return null;
}
/**
* Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set.
*
@ -230,6 +310,17 @@ public class GATKReportTable {
return columns.get(columnName).get(primaryKey);
}
/**
* Get a value from the given position in the table
*
* @param primaryKey the primary key value
* @param columnIndex the index of the column
* @return the value stored at the specified position in the table
*/
private Object get(Object primaryKey, int columnIndex) {
return columns.getByIndex(columnIndex).get(primaryKey);
}
/**
* Increment an element in the table. This implementation is awful - a functor would probably be better.
*
@ -515,7 +606,7 @@ public class GATKReportTable {
String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s";
// Emit the table definition
out.printf("##:GATKReport.v0.1 %s : %s%n", tableName, tableDescription);
out.printf("##:GATKReport.%s %s : %s%n", LATEST_REPORT_VERSION.versionString, tableName, tableDescription);
// Emit the table header, taking into account the padding requirement if the primary key is a hidden column
boolean needsPadding = false;
@ -545,22 +636,8 @@ public class GATKReportTable {
for (String columnName : columns.keySet()) {
if (columns.get(columnName).isDisplayable()) {
Object obj = columns.get(columnName).getWithoutSideEffects(primaryKey);
if (needsPadding) { out.printf(" "); }
String value = "null";
if (obj != null) {
if (obj instanceof Float) {
value = String.format("%.8f", (Float) obj);
} else if (obj instanceof Double) {
value = String.format("%.8f", (Double) obj);
} else {
value = obj.toString();
}
}
//out.printf(columnWidths.get(columnName), obj == null ? "null" : obj.toString());
String value = columns.get(columnName).getStringValue(primaryKey);
out.printf(columnWidths.get(columnName), value);
needsPadding = true;

View File

@ -1,75 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.StringUtils;
import java.util.*;
public class GATKReportTableParser {
private int lineNum = 0;
private String[] descriptions;
private Map<String, Integer> headers = new HashMap<String, Integer>();
private List<String[]> values = new ArrayList<String[]>();
public void parse(String line) {
lineNum++;
switch (lineNum) {
case 1:
descriptions = parseLine(line);
case 2:
String[] columnHeaders = parseLine(line);
for (int i = 0; i < columnHeaders.length; i++)
headers.put(columnHeaders[i], i);
default:
values.add(parseLine(line));
}
}
public String getTableName() {
return descriptions[1];
}
public String getValue(String[] key, String column) {
if (!headers.containsKey(column))
return null;
for (String[] row: values)
if (Arrays.equals(key, Arrays.copyOfRange(row, 1, key.length + 1)))
return row[headers.get(column)];
return null;
}
public String getValue(String key, String column) {
return getValue(key.split("\\."), column);
}
private String generateKey(String[] row, int i) {
return StringUtils.join(row, ".", 0, i);
}
private String[] parseLine(String line) {
return line.split(" +");
}
}

View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
public enum GATKReportVersion {
/**
* Differences between other versions:
* - Does not allow spaces in cells.
* - Mostly fixed width but has a bug where the string width of floating point
* values was not measured correctly leading to columns that aren't aligned
*/
V0_1("v0.1"),
/**
* Differences between other versions:
* - Spaces allowed in cells, for example in sample names with spaces in them ex: "C507/FG-CR 6".
* - Fixed width fixed for floating point values
*/
V0_2("v0.2");
public final String versionString;
private GATKReportVersion(String versionString) {
this.versionString = versionString;
}
@Override
public String toString() {
return versionString;
}
/**
* Returns the GATK Report Version from the file header.
* @param header Header from the file starting with ##:GATKReport.v[version]
* @return The version as an enum.
*/
public static GATKReportVersion fromHeader(String header) {
if (header.startsWith("##:GATKReport.v0.1 "))
return GATKReportVersion.V0_1;
if (header.startsWith("##:GATKReport.v0.2 "))
return GATKReportVersion.V0_2;
throw new ReviewedStingException("Unknown GATK report version in header: " + header);
}
}

View File

@ -224,12 +224,12 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
if ( ! indelsOnly ) {
for ( VariantContext annotatedVC : annotatedVCs )
vcfWriter.add(annotatedVC, ref.getBase());
vcfWriter.add(annotatedVC);
} else {
// check to see if the buffered context is different (in location) this context
if ( indelBufferContext != null && ! VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),annotatedVCs.iterator().next())) ) {
for ( VariantContext annotatedVC : indelBufferContext )
vcfWriter.add(annotatedVC, ref.getBase());
vcfWriter.add(annotatedVC);
indelBufferContext = annotatedVCs;
} else {
indelBufferContext = annotatedVCs;

View File

@ -135,7 +135,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
return 0;
if (vc_input.isFiltered()) {
vcfWriter.add(vc_input, ref.getBase());
vcfWriter.add(vc_input);
return 1;
}
@ -335,7 +335,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
}
vcfWriter.add(VariantContext.modifyAttributes(filteredVC,attributes), ref.getBase());
vcfWriter.add(VariantContext.modifyAttributes(filteredVC,attributes));
return 1;

View File

@ -170,20 +170,20 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
logger.debug(String.format("boot: %d, test: %d, total: %d", bootstrapSetSize, testSetSize, bootstrapSetSize+testSetSize+1));
if ( (bootstrapSetSize+1.0)/(1.0+bootstrapSetSize+testSetSize) <= bootstrap ) {
if ( bootstrapVCFOutput != null ) {
bootstrapVCFOutput.add(VariantContext.modifyFilters(validation, BOOTSTRAP_FILTER), ref.getBase() );
bootstrapVCFOutput.add(VariantContext.modifyFilters(validation, BOOTSTRAP_FILTER));
}
bootstrapSetSize++;
return true;
} else {
if ( bootstrapVCFOutput != null ) {
bootstrapVCFOutput.add(validation,ref.getBase());
bootstrapVCFOutput.add(validation);
}
testSetSize++;
return false;
}
} else {
if ( validation != null && bootstrapVCFOutput != null ) {
bootstrapVCFOutput.add(validation,ref.getBase());
bootstrapVCFOutput.add(validation);
}
return false;
}

View File

@ -112,7 +112,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker<Integer, Integer>
// if we are holding it back and we are writing a bootstrap VCF, write it out
if ( makeMissing && bootstrapVCFOutput != null ) {
bootstrapVCFOutput.add(vc, ref.getBase());
bootstrapVCFOutput.add(vc);
}
// regardless, all sites are written to the unphased genotypes file, marked as missing if appropriate

View File

@ -235,7 +235,7 @@ public class DiffEngine {
// now that we have a specific list of values we want to show, display them
GATKReport report = new GATKReport();
final String tableName = "diffences";
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false);
report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false);
GATKReportTable table = report.getTable(tableName);
table.addPrimaryKey("Difference", true);
table.addColumn("NumberOfOccurrences", 0);

View File

@ -278,7 +278,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
else
filteredVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes());
writer.add( filteredVC, context.getReferenceContext().getBase() );
writer.add(filteredVC);
}
public Integer reduce(Integer value, Integer sum) {

View File

@ -93,7 +93,7 @@ public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer>
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
VariantContext call = UG_engine.calculateLikelihoods(tracker, refContext, rawContext);
return call == null ? null : new VariantCallContext(call, refContext.getBase(), true);
return call == null ? null : new VariantCallContext(call, true);
}
public Integer reduceInit() { return 0; }
@ -107,7 +107,7 @@ public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer>
return sum;
try {
writer.add(value, value.refBase);
writer.add(value);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
}

View File

@ -115,7 +115,7 @@ public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
try {
Map<String, Object> attrs = new HashMap<String, Object>(value.getAttributes());
VariantContextUtils.calculateChromosomeCounts(value, attrs, true);
writer.add(VariantContext.modifyAttributes(value, attrs), value.refBase);
writer.add(VariantContext.modifyAttributes(value, attrs));
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
}

View File

@ -227,7 +227,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
try {
// we are actually making a call
sum.nCallsMade++;
writer.add(value, value.refBase);
writer.add(value);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
}

View File

@ -247,7 +247,7 @@ public class UnifiedGenotyperEngine {
}
if ( annotationEngine != null ) {
// we want to use the *unfiltered* and *unBAQed* context for the annotations
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
ReadBackedPileup pileup = null;
if (rawContext.hasExtendedEventPileup())
pileup = rawContext.getExtendedEventPileup();
@ -258,7 +258,7 @@ public class UnifiedGenotyperEngine {
vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc);
}
return new VariantCallContext(vc, ref.getBase(), false);
return new VariantCallContext(vc, false);
}
private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, MultiallelicGenotypeLikelihoods> GLs) {
@ -300,7 +300,8 @@ public class UnifiedGenotyperEngine {
genotypes,
VariantContext.NO_NEG_LOG_10PERROR,
null,
null);
null,
refContext.getBase());
}
// private method called by both UnifiedGenotyper and UGCallVariants entry points into the engine
@ -425,10 +426,10 @@ public class UnifiedGenotyperEngine {
myAlleles.add(vc.getReference());
}
VariantContext vcCall = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc,
myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes);
myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes, refContext.getBase());
if ( annotationEngine != null ) {
// first off, we want to use the *unfiltered* and *unBAQed* context for the annotations
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
ReadBackedPileup pileup = null;
if (rawContext.hasExtendedEventPileup())
pileup = rawContext.getExtendedEventPileup();
@ -439,9 +440,7 @@ public class UnifiedGenotyperEngine {
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
}
VariantCallContext call = new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
call.setRefBase(refContext.getBase());
return call;
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
}
private int calculateEndPos(Set<Allele> alleles, Allele refAllele, GenomeLoc loc) {

View File

@ -36,7 +36,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
* Useful helper class to communicate the results of calculateGenotype to framework
*/
public class VariantCallContext extends VariantContext {
public byte refBase;
// Was the site called confidently, either reference or variant?
public boolean confidentlyCalled = false;
@ -55,16 +54,6 @@ public class VariantCallContext extends VariantContext {
this.shouldEmit = shouldEmit;
}
VariantCallContext(VariantContext vc, byte ref, boolean confidentlyCalledP) {
super(vc);
this.refBase = ref;
this.confidentlyCalled = confidentlyCalledP;
}
public void setRefBase(byte ref) {
this.refBase = ref;
}
/* these methods are only implemented for GENOTYPE_GIVEN_ALLELES MODE */
//todo -- expand these methods to all modes

View File

@ -91,7 +91,7 @@ public class MergeAndMatchHaplotypes extends RodWalker<Integer, Integer> {
}
VariantContext newvc = new VariantContext(SOURCE_NAME, pbt.getChr(), pbt.getStart(), pbt.getStart(), pbt.getAlleles(), genotypes, pbt.getNegLog10PError(), pbt.getFilters(), pbt.getAttributes());
vcfWriter.add(newvc, ref.getBase());
vcfWriter.add(newvc);
}
}

View File

@ -118,7 +118,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
innerWriter.close();
}
public void add(VariantContext vc, byte refBase) {
public void add(VariantContext vc) {
if (useSingleSample != null) { // only want to output context for one sample
Genotype sampGt = vc.getGenotype(useSingleSample);
if (sampGt != null) // TODO: subContextFromGenotypes() does not handle any INFO fields [AB, HaplotypeScore, MQ, etc.]. Note that even SelectVariants.subsetRecord() only handles AC,AN,AF, and DP!
@ -138,11 +138,11 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
if (curVcIsNotFiltered) { // still need to wait before can release vc
logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(genomeLocParser, vc));
vcfrWaitingToMerge = new VCFRecord(vc, refBase, false);
vcfrWaitingToMerge = new VCFRecord(vc, false);
}
else if (!emitOnlyMergedRecords) { // filtered records are never merged
logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(genomeLocParser, vc));
innerWriter.add(vc, refBase);
innerWriter.add(vc);
}
}
else { // waiting to merge vcfrWaitingToMerge
@ -151,7 +151,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
if (!curVcIsNotFiltered) {
if (!emitOnlyMergedRecords) { // filtered records are never merged
logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(genomeLocParser, vc));
filteredVcfrList.add(new VCFRecord(vc, refBase, false));
filteredVcfrList.add(new VCFRecord(vc, false));
}
}
else { // waiting to merge vcfrWaitingToMerge, and curVcIsNotFiltered. So, attempt to merge them:
@ -188,14 +188,14 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
addedAttribs.putAll(mergedVc.getAttributes());
mergedVc = VariantContext.modifyAttributes(mergedVc, addedAttribs);
vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase, true);
vcfrWaitingToMerge = new VCFRecord(mergedVc, true);
numMergedRecords++;
}
}
if (!mergedRecords) {
stopWaitingToMerge();
vcfrWaitingToMerge = new VCFRecord(vc, refBase, false);
vcfrWaitingToMerge = new VCFRecord(vc, false);
}
logger.debug("Merged? = " + mergedRecords);
}
@ -210,11 +210,11 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
}
if (!emitOnlyMergedRecords || vcfrWaitingToMerge.resultedFromMerge)
innerWriter.add(vcfrWaitingToMerge.vc, vcfrWaitingToMerge.refBase);
innerWriter.add(vcfrWaitingToMerge.vc);
vcfrWaitingToMerge = null;
for (VCFRecord vcfr : filteredVcfrList)
innerWriter.add(vcfr.vc, vcfr.refBase);
innerWriter.add(vcfr.vc);
filteredVcfrList.clear();
}
@ -257,12 +257,10 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
private static class VCFRecord {
public VariantContext vc;
public byte refBase;
public boolean resultedFromMerge;
public VCFRecord(VariantContext vc, byte refBase, boolean resultedFromMerge) {
public VCFRecord(VariantContext vc, boolean resultedFromMerge) {
this.vc = vc;
this.refBase = refBase;
this.resultedFromMerge = resultedFromMerge;
}
}

View File

@ -311,7 +311,8 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeMap);
vcfWriter.add(newvc, ref.getBase());
vcfWriter.add(newvc);
}
}
return null;

View File

@ -25,20 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.phasing;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
public class WriteVCF {
public static void writeVCF(VariantContext vc, VCFWriter writer, Logger logger) {
byte refBase;
if (!vc.isIndel()) {
Allele refAllele = vc.getReference();
refBase = SNPallelePair.getSingleBase(refAllele);
}
else {
refBase = vc.getReferenceBaseForIndel();
}
writer.add(vc, refBase);
writer.add(vc);
}
}

View File

@ -204,9 +204,9 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
filters.add(filterString);
vc = VariantContext.modifyFilters(vc, filters);
}
vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs), ref.getBase() );
vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs) );
} else { // valid VC but not compatible with this mode, so just emit the variant untouched
vcfWriter.add( vc, ref.getBase() );
vcfWriter.add( vc );
}
}
}

View File

@ -258,7 +258,7 @@ public class VariantDataManager {
datum.consensusCount = 0;
for( final TrainingSet trainingSet : trainingSets ) {
for( final VariantContext trainVC : tracker.getValues(VariantContext.class, trainingSet.name) ) {
for( final VariantContext trainVC : tracker.getValues(VariantContext.class, trainingSet.name, ref.getLocus()) ) {
if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {

View File

@ -156,7 +156,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
if ( ASSUME_IDENTICAL_SAMPLES ) {
for ( final VariantContext vc : vcs ) {
vcfWriter.add( vc, ref.getBase() );
vcfWriter.add(vc);
}
return vcs.isEmpty() ? 0 : 1;
@ -181,7 +181,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
if ( VCsByType.containsKey(type) )
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
}
@ -196,7 +196,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
VariantContext annotatedMergedVC = VariantContext.modifyAttributes(mergedVC, attributes);
if ( minimalVCF )
annotatedMergedVC = VariantContextUtils.pruneVariantContext(annotatedMergedVC, Arrays.asList(SET_KEY));
vcfWriter.add(annotatedMergedVC, ref.getBase());
vcfWriter.add(annotatedMergedVC);
}
return vcs.isEmpty() ? 0 : 1;

View File

@ -82,7 +82,7 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
if ( failed )
failedLocs++;
else
writer.add(vc, ref[0]);
writer.add(vc);
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

View File

@ -94,10 +94,10 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
private int alignAndWrite(VariantContext vc, final ReferenceContext ref) {
if ( vc.isBiallelic() && vc.isIndel() )
if ( vc.isBiallelic() && vc.isIndel() && !vc.isComplexIndel() )
return writeLeftAlignedIndel(vc, ref);
else {
writer.add(vc, ref.getBase());
writer.add(vc);
return 0;
}
}
@ -113,7 +113,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
indelLength = vc.getAlternateAllele(0).length();
if ( indelLength > 200 ) {
writer.add(vc, ref.getBase());
writer.add(vc);
return 0;
}
@ -141,17 +141,12 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
byte[] newBases = new byte[indelLength];
System.arraycopy((vc.isDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
Allele newAllele = Allele.create(newBases, vc.isDeletion());
newVC = updateAllele(newVC, newAllele);
newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]);
// we need to update the reference base just in case it changed
Map<String, Object> attrs = new HashMap<String, Object>(newVC.getAttributes());
attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refSeq[indelIndex-1]);
newVC = VariantContext.modifyAttributes(newVC, attrs);
writer.add(newVC, refSeq[indelIndex-1]);
writer.add(newVC);
return 1;
} else {
writer.add(vc, ref.getBase());
writer.add(vc);
return 0;
}
}
@ -177,7 +172,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
return hap;
}
public static VariantContext updateAllele(VariantContext vc, Allele newAllele) {
public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) {
// create a mapping from original allele to new allele
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
if ( newAllele.isReference() ) {
@ -201,6 +196,6 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
newGenotypes.put(genotype.getKey(), Genotype.modifyAlleles(genotype.getValue(), newAlleles));
}
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), refBaseForIndel);
}
}

View File

@ -128,14 +128,14 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
vc = VariantContext.modifyAttributes(vc, attrs);
}
VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, ref.getBase(), false);
VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, false);
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0)));
}
writer.add(vc, ref.getBase());
writer.add(vc);
successfulIntervals++;
} else {
failedIntervals++;

View File

@ -98,9 +98,9 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
for ( VariantContext vc : vcs ) {
int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000);
if ( random < iFraction )
vcfWriter1.add(vc, ref.getBase());
vcfWriter1.add(vc);
else
vcfWriter2.add(vc, ref.getBase());
vcfWriter2.add(vc);
}
return 1;

View File

@ -32,6 +32,8 @@ import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -126,16 +128,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
/* Private class used to store the intermediate variants in the integer random selection process */
private class RandomVariantStructure {
private VariantContext vc;
private byte refBase;
RandomVariantStructure(VariantContext vcP, byte refBaseP) {
RandomVariantStructure(VariantContext vcP) {
vc = vcP;
refBase = refBaseP;
}
public void set (VariantContext vcP, byte refBaseP) {
public void set (VariantContext vcP) {
vc = vcP;
refBase = refBaseP;
}
}
@ -356,7 +355,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
randomlyAddVariant(++variantNumber, sub, ref.getBase());
}
else if (!SELECT_RANDOM_FRACTION || (!KEEP_AF_SPECTRUM && GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
vcfWriter.add(sub, ref.getBase());
vcfWriter.add(sub);
}
else {
if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) {
@ -404,7 +403,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
//System.out.format("%s .. %4.4f\n",afo.toString(), af);
if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * afBoost * afBoost)
vcfWriter.add(sub, ref.getBase());
vcfWriter.add(sub);
}
@ -511,7 +510,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if (SELECT_RANDOM_NUMBER) {
int positionToPrint = positionToAdd;
for (int i=0; i<numRandom; i++) {
vcfWriter.add(variantArray[positionToPrint].vc, variantArray[positionToPrint].refBase);
vcfWriter.add(variantArray[positionToPrint].vc);
positionToPrint = nextCircularPosition(positionToPrint);
}
}
@ -574,13 +573,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
private void randomlyAddVariant(int rank, VariantContext vc, byte refBase) {
if (nVariantsAdded < numRandom)
variantArray[nVariantsAdded++] = new RandomVariantStructure(vc, refBase);
variantArray[nVariantsAdded++] = new RandomVariantStructure(vc);
else {
double v = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
double t = (1.0/(rank-numRandom+1));
if ( v < t) {
variantArray[positionToAdd].set(vc, refBase);
variantArray[positionToAdd].set(vc);
nVariantsAdded++;
positionToAdd = nextCircularPosition(positionToAdd);
}

View File

@ -70,7 +70,7 @@ public class VariantValidationAssessor extends RodWalker<Pair<VariantContext, By
private TreeSet<String> sampleNames = null;
// variant context records
private ArrayList<Pair<VariantContext, Byte>> records = new ArrayList<Pair<VariantContext, Byte>>();
private ArrayList<VariantContext> records = new ArrayList<VariantContext>();
// statistics
private int numRecords = 0;
@ -91,7 +91,7 @@ public class VariantValidationAssessor extends RodWalker<Pair<VariantContext, By
return 0;
}
public Pair<VariantContext, Byte> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null )
return null;
@ -106,7 +106,7 @@ public class VariantValidationAssessor extends RodWalker<Pair<VariantContext, By
return addVariantInformationToCall(ref, vc);
}
public Integer reduce(Pair<VariantContext, Byte> call, Integer numVariants) {
public Integer reduce(VariantContext call, Integer numVariants) {
if ( call != null ) {
numVariants++;
records.add(call);
@ -156,12 +156,12 @@ public class VariantValidationAssessor extends RodWalker<Pair<VariantContext, By
vcfwriter.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)));
for ( Pair<VariantContext, Byte> record : records )
vcfwriter.add(record.first, record.second);
for ( VariantContext record : records )
vcfwriter.add(record);
}
private Pair<VariantContext, Byte> addVariantInformationToCall(ReferenceContext ref, VariantContext vContext) {
private VariantContext addVariantInformationToCall(ReferenceContext ref, VariantContext vContext) {
// check possible filters
double hwPvalue = hardyWeinbergCalculation(vContext);
@ -203,9 +203,7 @@ public class VariantValidationAssessor extends RodWalker<Pair<VariantContext, By
infoMap.put(VCFConstants.ALLELE_COUNT_KEY, String.format("%d", altAlleleCount));
infoMap.put(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", vContext.getChromosomeCount()));
vContext = VariantContext.modifyAttributes(vContext, infoMap);
return new Pair<VariantContext, Byte>(vContext, ref.getBase());
return VariantContext.modifyAttributes(vContext, infoMap);
}
private double hardyWeinbergCalculation(VariantContext vc) {

View File

@ -83,8 +83,8 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
getters.put("REF", new Getter() {
public String get(VariantContext vc) {
String x = "";
if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) {
Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY));
if ( vc.hasReferenceBaseForIndel() ) {
Byte refByte = vc.getReferenceBaseForIndel();
x=x+new String(new byte[]{refByte});
}
return x+vc.getReference().getDisplayString();
@ -95,8 +95,8 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
StringBuilder x = new StringBuilder();
int n = vc.getAlternateAlleles().size();
if ( n == 0 ) return ".";
if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) {
Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY));
if ( vc.hasReferenceBaseForIndel() ) {
Byte refByte = vc.getReferenceBaseForIndel();
x.append(new String(new byte[]{refByte}));
}

View File

@ -154,9 +154,10 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getName(), hapmap, ref);
if ( vc != null ) {
if ( refBase != null ) {
Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refBase);
vc = VariantContext.modifyAttributes(vc, attrs);
// TODO -- fix me
//Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
//attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refBase);
//vc = VariantContext.modifyAttributes(vc, attrs);
}
hapmapVCs.add(vc);
}
@ -238,7 +239,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
}
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
vcfwriter.add(vc, ref);
vcfwriter.add(vc);
}
public Integer reduceInit() {

View File

@ -42,6 +42,21 @@ public class Utils {
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(Utils.class);
public static final float JAVA_DEFAULT_HASH_LOAD_FACTOR = 0.75f;
/**
* Calculates the optimum initial size for a hash table given the maximum number
* of elements it will need to hold. The optimum size is the smallest size that
* is guaranteed not to result in any rehash/table-resize operations.
*
* @param maxElements The maximum number of elements you expect the hash table
* will need to hold
* @return The optimum initial size for the table, given maxElements
*/
public static int optimumHashSize ( int maxElements ) {
return (int)(maxElements / JAVA_DEFAULT_HASH_LOAD_FACTOR) + 2;
}
public static String getClassName(Class c) {
String FQClassName = c.getName();
int firstChar;

View File

@ -567,7 +567,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
// set the reference base for indels in the attributes
Map<String,Object> attributes = new TreeMap<String,Object>(inputVC.getAttributes());
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(inputVC.getReference().getBases()[0]));
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
@ -611,7 +610,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
genotypes.put(sample.getKey(), Genotype.modifyAlleles(sample.getValue(), trimmedAlleles));
}
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes);
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes, new Byte(inputVC.getReference().getBases()[0]));
}

View File

@ -105,9 +105,8 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
* add a record to the file
*
* @param vc the Variant Context object
* @param refBase the ref base
*/
public void add(VariantContext vc, byte refBase) {
public void add(VariantContext vc) {
/* Note that the code below does not prevent the successive add()-ing of: (chr1, 10), (chr20, 200), (chr15, 100)
since there is no implicit ordering of chromosomes:
*/
@ -122,7 +121,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
noteCurrentRecord(vc); // possibly overwritten
queue.add(new VCFRecord(vc, refBase));
queue.add(new VCFRecord(vc));
emitSafeRecords();
}
@ -133,7 +132,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
// No need to wait, waiting for nothing, or before what we're waiting for:
if (emitUnsafe || mostUpstreamWritableLoc == null || firstRec.vc.getStart() <= mostUpstreamWritableLoc) {
queue.poll();
innerWriter.add(firstRec.vc, firstRec.refBase);
innerWriter.add(firstRec.vc);
}
else {
break;
@ -143,7 +142,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
/**
* Gets a string representation of this object.
* @return
* @return a string representation of this object
*/
@Override
public String toString() {
@ -158,11 +157,9 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
private static class VCFRecord {
public VariantContext vc;
public byte refBase;
public VCFRecord(VariantContext vc, byte refBase) {
public VCFRecord(VariantContext vc) {
this.vc = vc;
this.refBase = refBase;
}
}
}

View File

@ -202,20 +202,18 @@ public class StandardVCFWriter implements VCFWriter {
* add a record to the file
*
* @param vc the Variant Context object
* @param refBase the ref base used for indels
*/
public void add(VariantContext vc, byte refBase) {
add(vc, refBase, false);
public void add(VariantContext vc) {
add(vc, false);
}
/**
* add a record to the file
*
* @param vc the Variant Context object
* @param refBase the ref base used for indels
* @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER THE EVENT INSTEAD)
*/
public void add(VariantContext vc, byte refBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
public void add(VariantContext vc, boolean refBaseShouldBeAppliedToEndOfAlleles) {
if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added: " + locationString());
@ -223,7 +221,7 @@ public class StandardVCFWriter implements VCFWriter {
vc = VariantContext.modifyGenotypes(vc, null);
try {
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBase, refBaseShouldBeAppliedToEndOfAlleles);
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBaseShouldBeAppliedToEndOfAlleles);
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
if ( indexer != null ) indexer.addFeature(vc, positionalStream.getPosition());
@ -285,7 +283,7 @@ public class StandardVCFWriter implements VCFWriter {
Map<String, String> infoFields = new TreeMap<String, String>();
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
String key = field.getKey();
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
continue;
String outputValue = formatVCFField(field.getValue());

View File

@ -14,5 +14,5 @@ public interface VCFWriter {
*/
public void close();
public void add(VariantContext vc, byte refBase);
public void add(VariantContext vc);
}

View File

@ -116,4 +116,57 @@ public class TextFormattingUtils {
return bundle;
}
/**
* Returns the word starting positions within line, excluding the first position 0.
* The returned list is compatible with splitFixedWidth.
* @param line Text to parse.
* @return the word starting positions within line, excluding the first position 0.
*/
public static List<Integer> getWordStarts(String line) {
if (line == null)
throw new ReviewedStingException("line is null");
List<Integer> starts = new ArrayList<Integer>();
int stop = line.length();
for (int i = 1; i < stop; i++)
if (Character.isWhitespace(line.charAt(i-1)))
if(!Character.isWhitespace(line.charAt(i)))
starts.add(i);
return starts;
}
/**
* Parses a fixed width line of text.
* @param line Text to parse.
* @param columnStarts the column starting positions within line, excluding the first position 0.
* @return The parsed string array with each entry trimmed.
*/
public static String[] splitFixedWidth(String line, List<Integer> columnStarts) {
if (line == null)
throw new ReviewedStingException("line is null");
if (columnStarts == null)
throw new ReviewedStingException("columnStarts is null");
int startCount = columnStarts.size();
String[] row = new String[startCount + 1];
if (startCount == 0) {
row[0] = line.trim();
} else {
row[0] = line.substring(0, columnStarts.get(0)).trim();
for (int i = 1; i < startCount; i++)
row[i] = line.substring(columnStarts.get(i - 1), columnStarts.get(i)).trim();
row[startCount] = line.substring(columnStarts.get(startCount - 1)).trim();
}
return row;
}
/**
* Parses a line of text by whitespace.
* @param line Text to parse.
* @return The parsed string array.
*/
public static String[] splitWhiteSpace(String line) {
if (line == null)
throw new ReviewedStingException("line is null");
return line.trim().split("\\s+");
}
}

View File

@ -27,15 +27,15 @@ public class MutableVariantContext extends VariantContext {
}
public MutableVariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles) {
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
super(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
}
public MutableVariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Collection<Genotype> genotypes) {
this(source, contig, start, stop, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
super(source, contig, start, stop, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
}
public MutableVariantContext(VariantContext parent) {
this(parent.getSource(), parent.contig, parent.start, parent.stop, parent.getAlleles(), parent.getGenotypes(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes());
super(parent.getSource(), parent.contig, parent.start, parent.stop, parent.getAlleles(), parent.getGenotypes(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes(), parent.getReferenceBaseForIndel());
}
/**

View File

@ -5,6 +5,7 @@ import org.broad.tribble.TribbleException;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*;
@ -163,11 +164,12 @@ import java.util.*;
public class VariantContext implements Feature { // to enable tribble intergration
protected InferredGeneticContext commonInfo = null;
public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR;
public final static String REFERENCE_BASE_FOR_INDEL_KEY = "_REFERENCE_BASE_FOR_INDEL_";
public final static String UNPARSED_GENOTYPE_MAP_KEY = "_UNPARSED_GENOTYPE_MAP_";
public final static String UNPARSED_GENOTYPE_PARSER_KEY = "_UNPARSED_GENOTYPE_PARSER_";
public final static String ID_KEY = "ID";
private final Byte REFERENCE_BASE_FOR_INDEL;
public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());
/** The location of this VariantContext */
@ -205,6 +207,24 @@ public class VariantContext implements Feature { // to enable tribble intergrati
// ---------------------------------------------------------------------------------------------------------
/**
* the complete constructor. Makes a complete VariantContext from its arguments
*
* @param source source
* @param contig the contig
* @param start the start base (one based)
* @param stop the stop reference base (one based)
* @param alleles alleles
* @param genotypes genotypes map
* @param negLog10PError qual
* @param filters filters: use null for unfiltered and empty set for passes filters
* @param attributes attributes
* @param referenceBaseForIndel padded reference base
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) {
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false);
}
/**
* the complete constructor. Makes a complete VariantContext from its arguments
*
@ -219,7 +239,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, false);
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false);
}
/**
@ -239,7 +259,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, true);
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, null, true);
}
/**
@ -256,7 +276,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes, false);
this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes, null, false);
}
/**
@ -269,7 +289,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param alleles alleles
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles) {
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, false);
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false);
}
/**
@ -292,7 +312,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param other the VariantContext to copy
*/
public VariantContext(VariantContext other) {
this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), false);
this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false);
}
/**
@ -307,8 +327,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param negLog10PError qual
* @param filters filters: use null for unfiltered and empty set for passes filters
* @param attributes attributes
* @param referenceBaseForIndel padded reference base
* @param genotypesAreUnparsed true if the genotypes have not yet been parsed
*/
private VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean genotypesAreUnparsed) {
private VariantContext(String source, String contig, long start, long stop,
Collection<Allele> alleles, Map<String, Genotype> genotypes,
double negLog10PError, Set<String> filters, Map<String, ?> attributes,
Byte referenceBaseForIndel, boolean genotypesAreUnparsed) {
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
this.contig = contig;
this.start = start;
@ -323,6 +348,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
this.commonInfo = new InferredGeneticContext(source, negLog10PError, filters, attributes);
filtersWereAppliedToContext = filters != null;
REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel;
if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); }
@ -355,23 +381,23 @@ public class VariantContext implements Feature { // to enable tribble intergrati
// ---------------------------------------------------------------------------------------------------------
public static VariantContext modifyGenotypes(VariantContext vc, Map<String, Genotype> genotypes) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), false);
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), false);
}
public static VariantContext modifyLocation(VariantContext vc, String chr, int start, int end) {
return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), true);
return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
}
public static VariantContext modifyFilters(VariantContext vc, Set<String> filters) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap<String, Object>(vc.getAttributes()), true);
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
}
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, true);
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true);
}
public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set<String> filters, Map<String, Object> attributes) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, true);
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true);
}
// ---------------------------------------------------------------------------------------------------------
@ -414,7 +440,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @return vc subcontext
*/
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Set<Allele> alleles) {
return new VariantContext(getSource(), contig, start, stop, alleles, genotypes, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes());
return new VariantContext(getSource(), contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes(), getReferenceBaseForIndel());
}
@ -603,6 +629,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return (String)commonInfo.getAttribute(ID_KEY);
}
public boolean hasReferenceBaseForIndel() {
return REFERENCE_BASE_FOR_INDEL != null;
}
// the indel base that gets stripped off for indels
public Byte getReferenceBaseForIndel() {
return REFERENCE_BASE_FOR_INDEL;
}
// ---------------------------------------------------------------------------------------------------------
//
// get routines to access context info fields
@ -1151,6 +1186,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
private boolean validate(boolean throwException) {
try {
validateReferencePadding();
validateAlleles();
validateGenotypes();
} catch ( IllegalArgumentException e ) {
@ -1163,6 +1199,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return true;
}
private void validateReferencePadding() {
boolean needsPadding = hasSymbolicAlleles() || (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed
if ( needsPadding && !hasReferenceBaseForIndel() )
throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + getStart() + "; no padded reference base was provided.");
}
private void validateAlleles() {
// check alleles
boolean alreadySeenRef = false, alreadySeenNull = false;
@ -1221,16 +1264,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati
//
// ---------------------------------------------------------------------------------------------------------
// the indel base that gets stripped off for indels
public boolean hasReferenceBaseForIndel() {
return hasAttribute(REFERENCE_BASE_FOR_INDEL_KEY);
}
// the indel base that gets stripped off for indels
public byte getReferenceBaseForIndel() {
return hasReferenceBaseForIndel() ? (Byte)getAttribute(REFERENCE_BASE_FOR_INDEL_KEY) : (byte)'N';
}
private void determineType() {
if ( type == null ) {
switch ( getNAlleles() ) {
@ -1357,8 +1390,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return false;
}
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, byte inputRefBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
Allele refAllele = inputVC.getReference();
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
// see if we need to pad common reference base from all alleles
boolean padVC;
@ -1368,31 +1400,20 @@ public class VariantContext implements Feature { // to enable tribble intergrati
long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
if (inputVC.hasSymbolicAlleles())
padVC = true;
else if (refAllele.length() == locLength)
else if (inputVC.getReference().length() == locLength)
padVC = false;
else if (refAllele.length() == locLength-1)
else if (inputVC.getReference().length() == locLength-1)
padVC = true;
else throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
// nothing to do if we don't need to pad bases
if (padVC) {
Byte refByte;
Map<String,Object> attributes = inputVC.getAttributes();
if ( !inputVC.hasReferenceBaseForIndel() )
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
// upper-case for consistency; note that we can safely make these casts because the input is constrained to be a byte
inputRefBase = (byte)Character.toUpperCase((char)inputRefBase);
if (attributes.containsKey(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY))
refByte = (Byte)attributes.get(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY);
else if (inputRefBase == 'A' || inputRefBase == 'T' || inputRefBase == 'C' || inputRefBase == 'G' || inputRefBase == 'N')
refByte = inputRefBase;
else
throw new IllegalArgumentException("Error when trying to pad Variant Context at location " + String.valueOf(inputVC.getStart())
+ " in contig " + inputVC.getChr() +
". Either input reference base ("+(char)inputRefBase+
", ascii code="+inputRefBase+") must be a regular base, or input VC must contain reference base key");
Byte refByte = inputVC.getReferenceBaseForIndel();
List<Allele> alleles = new ArrayList<Allele>();
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
@ -1444,11 +1465,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
// Do not change the filter state if filters were not applied to this context
Set<String> inputVCFilters = inputVC.filtersWereAppliedToContext ? inputVC.getFilters() : null;
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(),
inputVCFilters, attributes);
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVCFilters, inputVC.getAttributes());
}
else
return inputVC;

View File

@ -295,10 +295,7 @@ public class VariantContextUtils {
@Requires("vc != null")
@Ensures("result != null")
public static VariantContext sitesOnlyVariantContext(VariantContext vc) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(),
vc.getAlleles(), vc.getNegLog10PError(),
vc.filtersWereApplied() ? vc.getFilters() : null,
vc.getAttributes());
return VariantContext.modifyGenotypes(vc, null);
}
/**
@ -449,7 +446,7 @@ public class VariantContextUtils {
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false);
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, "set", false, false);
}
/**
@ -464,7 +461,6 @@ public class VariantContextUtils {
* @param genotypeMergeOptions merge option for genotypes
* @param annotateOrigin should we annotate the set it came from?
* @param printMessages should we print messages?
* @param inputRefBase the ref base
* @param setKey the key name of the set
* @param filteredAreUncalled are filtered records uncalled?
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
@ -472,7 +468,7 @@ public class VariantContextUtils {
*/
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
boolean annotateOrigin, boolean printMessages, String setKey,
boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) {
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
return null;
@ -490,7 +486,7 @@ public class VariantContextUtils {
for (VariantContext vc : prepaddedVCs) {
// also a reasonable place to remove filtered calls, if needed
if ( ! filteredAreUncalled || vc.isNotFiltered() )
VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false));
VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc, false));
}
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
return null;

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
public class GATKReportUnitTest extends BaseTest {
@Test
public void testParse() throws Exception {
String reportPath = validationDataLocation + "exampleGATKReport.eval";
GATKReport report = new GATKReport(reportPath);
GATKReportTable countVariants = report.getTable("CountVariants");
Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1);
Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all");
Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000");
Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872");
GATKReportTable validationReport = report.getTable("ValidationReport");
Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1);
Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known");
Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN");
GATKReportTable simpleMetricsByAC = report.getTable("SimpleMetricsByAC.metrics");
Assert.assertEquals(simpleMetricsByAC.getVersion(), GATKReportVersion.V0_1);
Object simpleMetricsByACPK = simpleMetricsByAC.getPrimaryKey("none.eval.none.novel.ac2");
Assert.assertEquals(simpleMetricsByAC.get(simpleMetricsByACPK, "AC"), "2");
Assert.assertFalse(simpleMetricsByAC.containsPrimaryKey("none.eval.none.novel.ac2.bad"));
}
}

View File

@ -30,8 +30,6 @@ import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class DiffObjectsIntegrationTest extends WalkerTest {
private class TestParams extends TestDataProvider {
@ -52,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
@DataProvider(name = "data")
public Object[][] createData() {
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "4d9f4636de05b93c354d05011264546e");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "37e6efd833b5cd6d860a9df3df9713fc");
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "92311de76dda3f38aac289d807ef23d0");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "0c69412c385fda50210f2a612e1ffe4a");
return TestParams.getTests(TestParams.class);
}

View File

@ -16,7 +16,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest {
" -L chr1:1-50,000,000" +
" -standard" +
" -OQ" +
" -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" +
" -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_hg18.vcf" +
" -recalFile /dev/null" + moreArgs,
0,
new ArrayList<String>(0));
@ -31,7 +31,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest {
" -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" +
" -standard" +
" -OQ" +
" -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" +
" -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
" -recalFile /dev/null" + moreArgs,
0,
new ArrayList<String>(0));

View File

@ -120,6 +120,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void complexTestFull() { combineComplexSites("", "b5a53ee92bdaacd2bb3327e9004ae058"); }
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); }
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f72a178137e25dbe0b931934cdc0079d"); }
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); }
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); }
}

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.util.Arrays;
/**
* Tests LeftAlignVariants
*/
public class LeftAlignVariantsIntegrationTest extends WalkerTest {
@Test
public void testLeftAlignment() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LeftAlignVariants -o %s -R " + b37KGReference + " -B:variant,vcf " + validationDataLocation + "forLeftAlignVariantsTest.vcf -NO_HEADER",
1,
Arrays.asList("158b1d71b28c52e2789f164500b53732"));
executeTest("test left alignment", spec);
}
}

View File

@ -98,7 +98,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
" -EV CompOverlap -noEV -noST" +
" -o %s",
1,
Arrays.asList("f60729c900bc8368717653b3fad80d1e") //"f60729c900bc8368717653b3fad80d1e"
Arrays.asList("ea09bf764adba9765b99921c5ba2c709")
);
executeTest("testVCFStreamingChain", selectTestSpec);

View File

@ -70,7 +70,7 @@ public class IndexFactoryUnitTest {
CloseableTribbleIterator<VariantContext> it = source.iterator();
while (it.hasNext() && (counter++ < maxRecords || maxRecords == -1) ) {
VariantContext vc = it.next();
writer.add(vc, vc.getReferenceBaseForIndel());
writer.add(vc);
}
writer.close();

View File

@ -57,8 +57,8 @@ public class VCFWriterUnitTest extends BaseTest {
VCFHeader header = createFakeHeader(metaData,additionalColumns);
VCFWriter writer = new StandardVCFWriter(fakeVCFFile);
writer.writeHeader(header);
writer.add(createVC(header),"A".getBytes()[0]);
writer.add(createVC(header),"A".getBytes()[0]);
writer.add(createVC(header));
writer.add(createVC(header));
writer.close();
VCFCodec reader = new VCFCodec();
AsciiLineReader lineReader;
@ -135,7 +135,7 @@ public class VCFWriterUnitTest extends BaseTest {
genotypes.put(name,gt);
}
return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes);
return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes, (byte)'A');
}

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.text;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.Collections;
public class TextFormattingUtilsUnitTest extends BaseTest {
@Test(expectedExceptions = ReviewedStingException.class)
public void testSplitWhiteSpaceNullLine() {
TextFormattingUtils.splitWhiteSpace(null);
}
@Test
public void testSplitWhiteSpace() {
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz"), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz"), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace(" foo bar baz"), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace(" foo bar baz "), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz "), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("\tfoo\tbar\tbaz\t"), new String[]{"foo", "bar", "baz"});
}
@Test(expectedExceptions = ReviewedStingException.class)
public void testGetWordStartsNullLine() {
TextFormattingUtils.getWordStarts(null);
}
@Test
public void testGetWordStarts() {
Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz"), Arrays.asList(4, 8));
Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz"), Arrays.asList(5, 10));
Assert.assertEquals(TextFormattingUtils.getWordStarts(" foo bar baz"), Arrays.asList(1, 5, 9));
Assert.assertEquals(TextFormattingUtils.getWordStarts(" foo bar baz "), Arrays.asList(1, 5, 9));
Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz "), Arrays.asList(4, 8));
Assert.assertEquals(TextFormattingUtils.getWordStarts("\tfoo\tbar\tbaz\t"), Arrays.asList(1, 5, 9));
}
@Test(expectedExceptions = ReviewedStingException.class)
public void testSplitFixedWidthNullLine() {
TextFormattingUtils.splitFixedWidth(null, Collections.<Integer>emptyList());
}
@Test(expectedExceptions = ReviewedStingException.class)
public void testSplitFixedWidthNullColumnStarts() {
TextFormattingUtils.splitFixedWidth("foo bar baz", null);
}
@Test
public void testSplitFixedWidth() {
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz", Arrays.asList(4, 8)), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz", Arrays.asList(5, 10)), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" foo bar baz", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" foo bar baz ", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz ", Arrays.asList(4, 8)), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("\tfoo\tbar\tbaz\t", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("f o b r b z", Arrays.asList(4, 8)), new String[] { "f o", "b r", "b z" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" f o b r b z", Arrays.asList(4, 8)), new String[] { "f o", "b r", "b z" });
Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" f o b r b z", Arrays.asList(4, 8)), new String[] { "f", "o b", "r b z" });
}
}

View File

@ -92,45 +92,45 @@ public class VariantContextUnitTest {
// test INDELs
alleles = Arrays.asList(Aref, ATC);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
alleles = Arrays.asList(ATCref, A);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
alleles = Arrays.asList(Tref, TA, TC);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
alleles = Arrays.asList(ATCref, A, AC);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
alleles = Arrays.asList(ATCref, A, Allele.create("ATCTC"));
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
// test MIXED
alleles = Arrays.asList(TAref, T, TC);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
alleles = Arrays.asList(TAref, T, AC);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
alleles = Arrays.asList(ACref, ATC, AT);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
alleles = Arrays.asList(Aref, T, symbolic);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
// test SYMBOLIC
alleles = Arrays.asList(Tref, symbolic);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles);
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getType(), VariantContext.Type.SYMBOLIC);
}
@ -191,7 +191,7 @@ public class VariantContextUnitTest {
@Test
public void testCreatingDeletionVariantContext() {
List<Allele> alleles = Arrays.asList(ATCref, del);
VariantContext vc = new VariantContext("test", delLoc, delLocStart, delLocStop, alleles);
VariantContext vc = new VariantContext("test", delLoc, delLocStart, delLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getChr(), delLoc);
Assert.assertEquals(vc.getStart(), delLocStart);
@ -218,7 +218,7 @@ public class VariantContextUnitTest {
@Test
public void testCreatingInsertionVariantContext() {
List<Allele> alleles = Arrays.asList(delRef, ATC);
VariantContext vc = new VariantContext("test", insLoc, insLocStart, insLocStop, alleles);
VariantContext vc = new VariantContext("test", insLoc, insLocStart, insLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getChr(), insLoc);
Assert.assertEquals(vc.getStart(), insLocStart);
@ -251,7 +251,7 @@ public class VariantContextUnitTest {
new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, del));
}
@Test (expectedExceptions = IllegalArgumentException.class)
@Test (expectedExceptions = IllegalStateException.class)
public void testBadConstructorArgs3() {
new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(del));
}

View File

@ -59,10 +59,10 @@ class ExampleUnifiedGenotyper extends QScript {
evalUnfiltered.rodBind :+= RodBind("eval", "VCF", genotyper.out)
evalUnfiltered.out = swapExt(genotyper.out, "vcf", "eval")
variantFilter.rodBind :+= RodBind("vcf", "VCF", genotyper.out)
variantFilter.rodBind :+= RodBind("variant", "VCF", genotyper.out)
variantFilter.out = swapExt(qscript.bamFile, "bam", "filtered.vcf")
variantFilter.filterName = filterNames
variantFilter.filterExpression = filterExpressions
variantFilter.filterExpression = filterExpressions.map("\"" + _ + "\"")
evalFiltered.rodBind :+= RodBind("eval", "VCF", variantFilter.out)
evalFiltered.out = swapExt(variantFilter.out, "vcf", "eval")

View File

@ -34,8 +34,8 @@ import org.broadinstitute.sting.BaseTest
import org.broadinstitute.sting.MD5DB
import org.broadinstitute.sting.queue.QCommandLine
import org.broadinstitute.sting.queue.util.{Logging, ProcessController}
import java.io.{FileNotFoundException, File}
import org.broadinstitute.sting.gatk.report.GATKReportParser
import java.io.File
import org.broadinstitute.sting.gatk.report.GATKReport
import org.apache.commons.io.FileUtils
import org.broadinstitute.sting.queue.engine.CommandLinePluginManager
@ -118,12 +118,11 @@ object PipelineTest extends BaseTest with Logging {
// write the report to the shared validation data location
val formatter = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss")
val reportLocation = "%s%s/%s/validation.%s.eval".format(validationReportsDataLocation, jobRunner, name, formatter.format(new Date))
val report = new File(reportLocation)
val reportFile = new File(reportLocation)
FileUtils.copyFile(new File(runDir(name, jobRunner) + evalSpec.evalReport), report);
FileUtils.copyFile(new File(runDir(name, jobRunner) + evalSpec.evalReport), reportFile);
val parser = new GATKReportParser
parser.parse(report)
val report = new GATKReport(reportFile);
var allInRange = true
@ -131,7 +130,9 @@ object PipelineTest extends BaseTest with Logging {
println(name + " validation values:")
println(" value (min,target,max) table key metric")
for (validation <- evalSpec.validations) {
val value = parser.getValue(validation.table, validation.key, validation.metric)
val table = report.getTable(validation.table)
val key = table.getPrimaryKey(validation.key)
val value = String.valueOf(table.get(key, validation.metric))
val inRange = if (value == null) false else validation.inRange(value)
val flag = if (!inRange) "*" else " "
println(" %s %s (%s,%s,%s) %s %s %s".format(flag, value, validation.min, validation.target, validation.max, validation.table, validation.key, validation.metric))