Merge
Conflicts: private/java/src/org/broadinstitute/sting/gatk/walkers/qc/TestVariantContextWalker.java public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java
This commit is contained in:
commit
79e4a8f6d3
|
|
@ -20,6 +20,20 @@
|
|||
assign(tableName, d, envir=tableEnv);
|
||||
}
|
||||
|
||||
# Read a fixed width line of text into a list.
|
||||
.gsa.splitFixedWidth <- function(line, columnStarts) {
|
||||
splitStartStop <- function(x) {
|
||||
x = substring(x, starts, stops);
|
||||
x = gsub("^[[:space:]]+|[[:space:]]+$", "", x);
|
||||
x;
|
||||
}
|
||||
|
||||
starts = c(1, columnStarts);
|
||||
stops = c(columnStarts - 1, nchar(line));
|
||||
|
||||
sapply(line, splitStartStop)[,1];
|
||||
}
|
||||
|
||||
# Load all GATKReport tables from a file
|
||||
gsa.read.gatkreport <- function(filename) {
|
||||
con = file(filename, "r", blocking = TRUE);
|
||||
|
|
@ -31,9 +45,10 @@ gsa.read.gatkreport <- function(filename) {
|
|||
tableName = NA;
|
||||
tableHeader = c();
|
||||
tableRows = c();
|
||||
version = NA;
|
||||
|
||||
for (line in lines) {
|
||||
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
|
||||
if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) {
|
||||
headerFields = unlist(strsplit(line, "[[:space:]]+"));
|
||||
|
||||
if (!is.na(tableName)) {
|
||||
|
|
@ -43,13 +58,37 @@ gsa.read.gatkreport <- function(filename) {
|
|||
tableName = headerFields[2];
|
||||
tableHeader = c();
|
||||
tableRows = c();
|
||||
|
||||
# For differences in versions see
|
||||
# $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
|
||||
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
|
||||
version = "v0.1";
|
||||
|
||||
} else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) {
|
||||
version = "v0.2";
|
||||
columnStarts = c();
|
||||
|
||||
}
|
||||
|
||||
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
|
||||
# do nothing
|
||||
} else if (!is.na(tableName)) {
|
||||
row = unlist(strsplit(line, "[[:space:]]+"));
|
||||
|
||||
if (version == "v0.1") {
|
||||
row = unlist(strsplit(line, "[[:space:]]+"));
|
||||
|
||||
} else if (version == "v0.2") {
|
||||
if (length(tableHeader) == 0) {
|
||||
headerChars = unlist(strsplit(line, ""));
|
||||
# Find the first position of non space characters, excluding the first character
|
||||
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
|
||||
}
|
||||
|
||||
row = .gsa.splitFixedWidth(line, columnStarts);
|
||||
}
|
||||
|
||||
if (length(tableHeader) == 0) {
|
||||
tableHeader = row;
|
||||
tableHeader = row;
|
||||
} else {
|
||||
tableRows = rbind(tableRows, row);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,8 +87,8 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
writer.writeHeader(stub.getVCFHeader());
|
||||
}
|
||||
|
||||
public void add(VariantContext vc, byte ref) {
|
||||
writer.add(vc, ref);
|
||||
public void add(VariantContext vc) {
|
||||
writer.add(vc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -117,7 +117,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
BasicFeatureSource<VariantContext> source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false);
|
||||
|
||||
for ( VariantContext vc : source.iterator() ) {
|
||||
target.writer.add(vc, vc.getReferenceBaseForIndel());
|
||||
target.writer.add(vc);
|
||||
}
|
||||
|
||||
source.close();
|
||||
|
|
|
|||
|
|
@ -192,8 +192,8 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
/**
|
||||
* @{inheritDoc}
|
||||
*/
|
||||
public void add(VariantContext vc, byte ref) {
|
||||
outputTracker.getStorage(this).add(vc,ref);
|
||||
public void add(VariantContext vc) {
|
||||
outputTracker.getStorage(this).add(vc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -127,14 +127,13 @@ public class VariantContextAdaptors {
|
|||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
|
||||
|
||||
if ( sawNullAllele ) {
|
||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index]));
|
||||
}
|
||||
Collection<Genotype> genotypes = null;
|
||||
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
|
||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
||||
|
||||
Map<String, Genotype> genotypes = null;
|
||||
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel);
|
||||
return vc;
|
||||
} else
|
||||
return null; // can't handle anything else
|
||||
|
|
|
|||
|
|
@ -1,21 +1,23 @@
|
|||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Container class for GATK report tables
|
||||
*/
|
||||
public class GATKReport {
|
||||
private TreeMap<String, GATKReportTable> tables;
|
||||
private TreeMap<String, GATKReportTable> tables = new TreeMap<String, GATKReportTable>();
|
||||
|
||||
/**
|
||||
* Create a new, empty GATKReport.
|
||||
*/
|
||||
public GATKReport() {
|
||||
tables = new TreeMap<String, GATKReportTable>();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -23,7 +25,7 @@ public class GATKReport {
|
|||
* @param filename the path to the file to load
|
||||
*/
|
||||
public GATKReport(String filename) {
|
||||
loadReport(new File(filename));
|
||||
this(new File(filename));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -31,7 +33,6 @@ public class GATKReport {
|
|||
* @param file the file to load
|
||||
*/
|
||||
public GATKReport(File file) {
|
||||
tables = new TreeMap<String, GATKReportTable>();
|
||||
loadReport(file);
|
||||
}
|
||||
|
||||
|
|
@ -46,11 +47,17 @@ public class GATKReport {
|
|||
GATKReportTable table = null;
|
||||
String[] header = null;
|
||||
int id = 0;
|
||||
GATKReportVersion version = null;
|
||||
List<Integer> columnStarts = null;
|
||||
|
||||
String line;
|
||||
while ( (line = reader.readLine()) != null ) {
|
||||
if (line.startsWith("##:GATKReport.v0.1 ")) {
|
||||
line = line.replaceFirst("##:GATKReport.v0.1 ", "");
|
||||
|
||||
if (line.startsWith("##:GATKReport.v")) {
|
||||
|
||||
version = GATKReportVersion.fromHeader(line);
|
||||
|
||||
line = line.replaceFirst("##:GATKReport." + version.versionString + " ", "");
|
||||
String[] pieces = line.split(" : ");
|
||||
|
||||
String tableName = pieces[0];
|
||||
|
|
@ -58,14 +65,35 @@ public class GATKReport {
|
|||
|
||||
addTable(tableName, tableDesc);
|
||||
table = getTable(tableName);
|
||||
table.setVersion(version);
|
||||
|
||||
header = null;
|
||||
} else if ( line.isEmpty() ) {
|
||||
columnStarts = null;
|
||||
} else if ( line.trim().isEmpty() ) {
|
||||
// do nothing
|
||||
} else {
|
||||
if (table != null) {
|
||||
|
||||
String[] splitLine;
|
||||
|
||||
switch (version) {
|
||||
case V0_1:
|
||||
splitLine = TextFormattingUtils.splitWhiteSpace(line);
|
||||
break;
|
||||
|
||||
case V0_2:
|
||||
if (header == null) {
|
||||
columnStarts = TextFormattingUtils.getWordStarts(line);
|
||||
}
|
||||
splitLine = TextFormattingUtils.splitFixedWidth(line, columnStarts);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new ReviewedStingException("GATK report version parsing not implemented for: " + line);
|
||||
}
|
||||
|
||||
if (header == null) {
|
||||
header = line.split("\\s+");
|
||||
header = splitLine;
|
||||
|
||||
table.addPrimaryKey("id", false);
|
||||
|
||||
|
|
@ -75,10 +103,8 @@ public class GATKReport {
|
|||
|
||||
id = 0;
|
||||
} else {
|
||||
String[] entries = line.split("\\s+");
|
||||
|
||||
for (int columnIndex = 0; columnIndex < header.length; columnIndex++) {
|
||||
table.set(id, header[columnIndex], entries[columnIndex]);
|
||||
table.set(id, header[columnIndex], splitLine[columnIndex]);
|
||||
}
|
||||
|
||||
id++;
|
||||
|
|
@ -125,7 +151,10 @@ public class GATKReport {
|
|||
* @return the table object
|
||||
*/
|
||||
public GATKReportTable getTable(String tableName) {
|
||||
return tables.get(tableName);
|
||||
GATKReportTable table = tables.get(tableName);
|
||||
if (table == null)
|
||||
throw new ReviewedStingException("Table is not in GATKReport: " + tableName);
|
||||
return table;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -37,10 +37,10 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
|||
* tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero
|
||||
* values) in the table.
|
||||
*
|
||||
* @param primaryKey the primary key position in the column that should be set
|
||||
* @param primaryKey the primary key position in the column that should be retrieved
|
||||
* @return the value at the specified position in the column, or the default value if the element is not set
|
||||
*/
|
||||
public Object getWithoutSideEffects(Object primaryKey) {
|
||||
private Object getWithoutSideEffects(Object primaryKey) {
|
||||
if (!this.containsKey(primaryKey)) {
|
||||
return defaultValue;
|
||||
}
|
||||
|
|
@ -48,6 +48,16 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
|||
return this.get(primaryKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an object from the column, but if it doesn't exist, return the default value.
|
||||
*
|
||||
* @param primaryKey the primary key position in the column that should be retrieved
|
||||
* @return the string value at the specified position in the column, or the default value if the element is not set
|
||||
*/
|
||||
public String getStringValue(Object primaryKey) {
|
||||
return toString(getWithoutSideEffects(primaryKey));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the displayable property of the column. If true, the column will be displayed in the final output.
|
||||
* If not, printing will be suppressed for the contents of the table.
|
||||
|
|
@ -67,7 +77,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
|||
|
||||
for (Object obj : this.values()) {
|
||||
if (obj != null) {
|
||||
int width = obj.toString().length();
|
||||
int width = toString(obj).length();
|
||||
|
||||
if (width > maxWidth) {
|
||||
maxWidth = width;
|
||||
|
|
@ -77,4 +87,23 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
|||
|
||||
return maxWidth;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string version of the values.
|
||||
* @param obj The object to convert to a string
|
||||
* @return The string representation of the column
|
||||
*/
|
||||
private static String toString(Object obj) {
|
||||
String value;
|
||||
if (obj == null) {
|
||||
value = "null";
|
||||
} else if (obj instanceof Float) {
|
||||
value = String.format("%.8f", (Float) obj);
|
||||
} else if (obj instanceof Double) {
|
||||
value = String.format("%.8f", (Double) obj);
|
||||
} else {
|
||||
value = obj.toString();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,26 +24,32 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
import java.util.*;
|
||||
|
||||
import java.io.File;
|
||||
/**
|
||||
* Tracks a linked list of GATKReportColumn in order by name.
|
||||
*/
|
||||
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
|
||||
private List<String> columnNames = new ArrayList<String>();
|
||||
|
||||
public class GATKReportParserUnitTest extends BaseTest {
|
||||
@Test
|
||||
public void testParse() throws Exception {
|
||||
GATKReportParser parser = new GATKReportParser();
|
||||
parser.parse(new File(validationDataLocation + "exampleGATKReport.eval"));
|
||||
/**
|
||||
* Returns the column by index
|
||||
* @param i the index
|
||||
* @return The column
|
||||
*/
|
||||
public GATKReportColumn getByIndex(int i) {
|
||||
return get(columnNames.get(i));
|
||||
}
|
||||
|
||||
Assert.assertEquals(parser.getValue("CountVariants", "none.eval.none.all", "nProcessedLoci"), "100000");
|
||||
Assert.assertEquals(parser.getValue("CountVariants", "none.eval.none.all", "nNoCalls"), "99872");
|
||||
@Override
|
||||
public GATKReportColumn remove(Object key) {
|
||||
columnNames.remove(key);
|
||||
return super.remove(key);
|
||||
}
|
||||
|
||||
Assert.assertEquals(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2", "AC"), "2");
|
||||
Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2.bad", "AC"));
|
||||
Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2", "AC.bad"));
|
||||
Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics.bad", "none.eval.none.novel.ac2", "AC"));
|
||||
|
||||
Assert.assertEquals(parser.getValue("ValidationReport", "none.eval.none.known", "sensitivity"), "NaN");
|
||||
@Override
|
||||
public GATKReportColumn put(String key, GATKReportColumn value) {
|
||||
columnNames.add(key);
|
||||
return super.put(key, value);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,83 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class GATKReportParser {
|
||||
private List<GATKReportTableParser> tables = new ArrayList<GATKReportTableParser>();
|
||||
|
||||
public void parse(File file) throws IOException {
|
||||
InputStream stream = FileUtils.openInputStream(file);
|
||||
try {
|
||||
parse(stream);
|
||||
} finally {
|
||||
IOUtils.closeQuietly(stream);
|
||||
}
|
||||
}
|
||||
|
||||
public void parse(InputStream input) throws IOException {
|
||||
GATKReportTableParser table = null;
|
||||
|
||||
for (String line: new XReadLines(input)) {
|
||||
if (line.startsWith("##:GATKReport.v0.1 ")) {
|
||||
table = newTableParser(line);
|
||||
tables.add(table);
|
||||
table.parse(line);
|
||||
} else if (table != null) {
|
||||
if (line.trim().length() == 0)
|
||||
table = null;
|
||||
else
|
||||
table.parse(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getValue(String tableName, String[] key, String column) {
|
||||
for (GATKReportTableParser table: tables)
|
||||
if (table.getTableName().equals(tableName))
|
||||
return table.getValue(key, column);
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getValue(String tableName, String key, String column) {
|
||||
for (GATKReportTableParser table: tables)
|
||||
if (table.getTableName().equals(tableName))
|
||||
return table.getValue(key, column);
|
||||
return null;
|
||||
}
|
||||
|
||||
private GATKReportTableParser newTableParser(String header) {
|
||||
return new GATKReportTableParser();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.apache.commons.lang.ObjectUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -88,17 +89,20 @@ import java.util.regex.Pattern;
|
|||
* but at least the prototype contained herein works.
|
||||
*
|
||||
* @author Kiran Garimella
|
||||
* @author Khalid Shakir
|
||||
*/
|
||||
public class GATKReportTable {
|
||||
private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2;
|
||||
private String tableName;
|
||||
private String tableDescription;
|
||||
private GATKReportVersion version = LATEST_REPORT_VERSION;
|
||||
|
||||
private String primaryKeyName;
|
||||
private Collection<Object> primaryKeyColumn;
|
||||
private boolean primaryKeyDisplay;
|
||||
boolean sortByPrimaryKey = true;
|
||||
private boolean sortByPrimaryKey = true;
|
||||
|
||||
private LinkedHashMap<String, GATKReportColumn> columns;
|
||||
private GATKReportColumns columns;
|
||||
|
||||
/**
|
||||
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
|
||||
|
|
@ -113,6 +117,19 @@ public class GATKReportTable {
|
|||
return !m.find();
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
|
||||
*
|
||||
* @param description the name of the table or column
|
||||
* @return true if the name is valid, false if otherwise
|
||||
*/
|
||||
private boolean isValidDescription(String description) {
|
||||
Pattern p = Pattern.compile("\\r|\\n");
|
||||
Matcher m = p.matcher(description);
|
||||
|
||||
return !m.find();
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new GATK report table with the specified name and description
|
||||
*
|
||||
|
|
@ -128,11 +145,23 @@ public class GATKReportTable {
|
|||
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
|
||||
}
|
||||
|
||||
if (!isValidDescription(tableDescription)) {
|
||||
throw new ReviewedStingException("Attempted to set a GATKReportTable description of '" + tableDescription + "'. GATKReportTable descriptions must not contain newlines.");
|
||||
}
|
||||
|
||||
this.tableName = tableName;
|
||||
this.tableDescription = tableDescription;
|
||||
this.sortByPrimaryKey = sortByPrimaryKey;
|
||||
|
||||
columns = new LinkedHashMap<String, GATKReportColumn>();
|
||||
columns = new GATKReportColumns();
|
||||
}
|
||||
|
||||
public GATKReportVersion getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
protected void setVersion(GATKReportVersion version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -161,6 +190,57 @@ public class GATKReportTable {
|
|||
primaryKeyDisplay = display;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first primary key matching the dotted column values.
|
||||
* Ex: dbsnp.eval.called.all.novel.all
|
||||
* @param dottedColumnValues Period concatenated values.
|
||||
* @return The first primary key matching the column values or throws an exception.
|
||||
*/
|
||||
public Object getPrimaryKey(String dottedColumnValues) {
|
||||
Object key = findPrimaryKey(dottedColumnValues);
|
||||
if (key == null)
|
||||
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues);
|
||||
return key;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if there is at least on row with the dotted column values.
|
||||
* Ex: dbsnp.eval.called.all.novel.all
|
||||
* @param dottedColumnValues Period concatenated values.
|
||||
* @return true if there is at least one row matching the columns.
|
||||
*/
|
||||
public boolean containsPrimaryKey(String dottedColumnValues) {
|
||||
return findPrimaryKey(dottedColumnValues) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first primary key matching the dotted column values.
|
||||
* Ex: dbsnp.eval.called.all.novel.all
|
||||
* @param dottedColumnValues Period concatenated values.
|
||||
* @return The first primary key matching the column values or null.
|
||||
*/
|
||||
private Object findPrimaryKey(String dottedColumnValues) {
|
||||
return findPrimaryKey(dottedColumnValues.split("\\."));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first primary key matching the column values.
|
||||
* Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" }
|
||||
* @param columnValues column values.
|
||||
* @return The first primary key matching the column values.
|
||||
*/
|
||||
private Object findPrimaryKey(Object[] columnValues) {
|
||||
for (Object primaryKey : primaryKeyColumn) {
|
||||
boolean matching = true;
|
||||
for (int i = 0; matching && i < columnValues.length; i++) {
|
||||
matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1));
|
||||
}
|
||||
if (matching)
|
||||
return primaryKey;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set.
|
||||
*
|
||||
|
|
@ -230,6 +310,17 @@ public class GATKReportTable {
|
|||
return columns.get(columnName).get(primaryKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a value from the given position in the table
|
||||
*
|
||||
* @param primaryKey the primary key value
|
||||
* @param columnIndex the index of the column
|
||||
* @return the value stored at the specified position in the table
|
||||
*/
|
||||
private Object get(Object primaryKey, int columnIndex) {
|
||||
return columns.getByIndex(columnIndex).get(primaryKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Increment an element in the table. This implementation is awful - a functor would probably be better.
|
||||
*
|
||||
|
|
@ -515,7 +606,7 @@ public class GATKReportTable {
|
|||
String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s";
|
||||
|
||||
// Emit the table definition
|
||||
out.printf("##:GATKReport.v0.1 %s : %s%n", tableName, tableDescription);
|
||||
out.printf("##:GATKReport.%s %s : %s%n", LATEST_REPORT_VERSION.versionString, tableName, tableDescription);
|
||||
|
||||
// Emit the table header, taking into account the padding requirement if the primary key is a hidden column
|
||||
boolean needsPadding = false;
|
||||
|
|
@ -545,22 +636,8 @@ public class GATKReportTable {
|
|||
|
||||
for (String columnName : columns.keySet()) {
|
||||
if (columns.get(columnName).isDisplayable()) {
|
||||
Object obj = columns.get(columnName).getWithoutSideEffects(primaryKey);
|
||||
|
||||
if (needsPadding) { out.printf(" "); }
|
||||
|
||||
String value = "null";
|
||||
if (obj != null) {
|
||||
if (obj instanceof Float) {
|
||||
value = String.format("%.8f", (Float) obj);
|
||||
} else if (obj instanceof Double) {
|
||||
value = String.format("%.8f", (Double) obj);
|
||||
} else {
|
||||
value = obj.toString();
|
||||
}
|
||||
}
|
||||
|
||||
//out.printf(columnWidths.get(columnName), obj == null ? "null" : obj.toString());
|
||||
String value = columns.get(columnName).getStringValue(primaryKey);
|
||||
out.printf(columnWidths.get(columnName), value);
|
||||
|
||||
needsPadding = true;
|
||||
|
|
|
|||
|
|
@ -1,75 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class GATKReportTableParser {
|
||||
private int lineNum = 0;
|
||||
private String[] descriptions;
|
||||
private Map<String, Integer> headers = new HashMap<String, Integer>();
|
||||
private List<String[]> values = new ArrayList<String[]>();
|
||||
|
||||
public void parse(String line) {
|
||||
lineNum++;
|
||||
switch (lineNum) {
|
||||
case 1:
|
||||
descriptions = parseLine(line);
|
||||
case 2:
|
||||
String[] columnHeaders = parseLine(line);
|
||||
for (int i = 0; i < columnHeaders.length; i++)
|
||||
headers.put(columnHeaders[i], i);
|
||||
default:
|
||||
values.add(parseLine(line));
|
||||
}
|
||||
}
|
||||
|
||||
public String getTableName() {
|
||||
return descriptions[1];
|
||||
}
|
||||
|
||||
public String getValue(String[] key, String column) {
|
||||
if (!headers.containsKey(column))
|
||||
return null;
|
||||
for (String[] row: values)
|
||||
if (Arrays.equals(key, Arrays.copyOfRange(row, 1, key.length + 1)))
|
||||
return row[headers.get(column)];
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getValue(String key, String column) {
|
||||
return getValue(key.split("\\."), column);
|
||||
}
|
||||
|
||||
private String generateKey(String[] row, int i) {
|
||||
return StringUtils.join(row, ".", 0, i);
|
||||
}
|
||||
|
||||
private String[] parseLine(String line) {
|
||||
return line.split(" +");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
public enum GATKReportVersion {
|
||||
/**
|
||||
* Differences between other versions:
|
||||
* - Does not allow spaces in cells.
|
||||
* - Mostly fixed width but has a bug where the string width of floating point
|
||||
* values was not measured correctly leading to columns that aren't aligned
|
||||
*/
|
||||
V0_1("v0.1"),
|
||||
|
||||
/**
|
||||
* Differences between other versions:
|
||||
* - Spaces allowed in cells, for example in sample names with spaces in them ex: "C507/FG-CR 6".
|
||||
* - Fixed width fixed for floating point values
|
||||
*/
|
||||
V0_2("v0.2");
|
||||
|
||||
public final String versionString;
|
||||
|
||||
private GATKReportVersion(String versionString) {
|
||||
this.versionString = versionString;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return versionString;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the GATK Report Version from the file header.
|
||||
* @param header Header from the file starting with ##:GATKReport.v[version]
|
||||
* @return The version as an enum.
|
||||
*/
|
||||
public static GATKReportVersion fromHeader(String header) {
|
||||
if (header.startsWith("##:GATKReport.v0.1 "))
|
||||
return GATKReportVersion.V0_1;
|
||||
|
||||
if (header.startsWith("##:GATKReport.v0.2 "))
|
||||
return GATKReportVersion.V0_2;
|
||||
|
||||
throw new ReviewedStingException("Unknown GATK report version in header: " + header);
|
||||
}
|
||||
}
|
||||
|
|
@ -224,12 +224,12 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
|
||||
if ( ! indelsOnly ) {
|
||||
for ( VariantContext annotatedVC : annotatedVCs )
|
||||
vcfWriter.add(annotatedVC, ref.getBase());
|
||||
vcfWriter.add(annotatedVC);
|
||||
} else {
|
||||
// check to see if the buffered context is different (in location) this context
|
||||
if ( indelBufferContext != null && ! VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),annotatedVCs.iterator().next())) ) {
|
||||
for ( VariantContext annotatedVC : indelBufferContext )
|
||||
vcfWriter.add(annotatedVC, ref.getBase());
|
||||
vcfWriter.add(annotatedVC);
|
||||
indelBufferContext = annotatedVCs;
|
||||
} else {
|
||||
indelBufferContext = annotatedVCs;
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
return 0;
|
||||
|
||||
if (vc_input.isFiltered()) {
|
||||
vcfWriter.add(vc_input, ref.getBase());
|
||||
vcfWriter.add(vc_input);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -335,7 +335,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
|
||||
vcfWriter.add(VariantContext.modifyAttributes(filteredVC,attributes), ref.getBase());
|
||||
vcfWriter.add(VariantContext.modifyAttributes(filteredVC,attributes));
|
||||
|
||||
|
||||
return 1;
|
||||
|
|
|
|||
|
|
@ -170,20 +170,20 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
logger.debug(String.format("boot: %d, test: %d, total: %d", bootstrapSetSize, testSetSize, bootstrapSetSize+testSetSize+1));
|
||||
if ( (bootstrapSetSize+1.0)/(1.0+bootstrapSetSize+testSetSize) <= bootstrap ) {
|
||||
if ( bootstrapVCFOutput != null ) {
|
||||
bootstrapVCFOutput.add(VariantContext.modifyFilters(validation, BOOTSTRAP_FILTER), ref.getBase() );
|
||||
bootstrapVCFOutput.add(VariantContext.modifyFilters(validation, BOOTSTRAP_FILTER));
|
||||
}
|
||||
bootstrapSetSize++;
|
||||
return true;
|
||||
} else {
|
||||
if ( bootstrapVCFOutput != null ) {
|
||||
bootstrapVCFOutput.add(validation,ref.getBase());
|
||||
bootstrapVCFOutput.add(validation);
|
||||
}
|
||||
testSetSize++;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if ( validation != null && bootstrapVCFOutput != null ) {
|
||||
bootstrapVCFOutput.add(validation,ref.getBase());
|
||||
bootstrapVCFOutput.add(validation);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker<Integer, Integer>
|
|||
|
||||
// if we are holding it back and we are writing a bootstrap VCF, write it out
|
||||
if ( makeMissing && bootstrapVCFOutput != null ) {
|
||||
bootstrapVCFOutput.add(vc, ref.getBase());
|
||||
bootstrapVCFOutput.add(vc);
|
||||
}
|
||||
|
||||
// regardless, all sites are written to the unphased genotypes file, marked as missing if appropriate
|
||||
|
|
|
|||
|
|
@ -235,7 +235,7 @@ public class DiffEngine {
|
|||
// now that we have a specific list of values we want to show, display them
|
||||
GATKReport report = new GATKReport();
|
||||
final String tableName = "diffences";
|
||||
report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false);
|
||||
report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false);
|
||||
GATKReportTable table = report.getTable(tableName);
|
||||
table.addPrimaryKey("Difference", true);
|
||||
table.addColumn("NumberOfOccurrences", 0);
|
||||
|
|
|
|||
|
|
@ -278,7 +278,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
else
|
||||
filteredVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes());
|
||||
|
||||
writer.add( filteredVC, context.getReferenceContext().getBase() );
|
||||
writer.add(filteredVC);
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer>
|
|||
|
||||
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||
VariantContext call = UG_engine.calculateLikelihoods(tracker, refContext, rawContext);
|
||||
return call == null ? null : new VariantCallContext(call, refContext.getBase(), true);
|
||||
return call == null ? null : new VariantCallContext(call, true);
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
|
@ -107,7 +107,7 @@ public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer>
|
|||
return sum;
|
||||
|
||||
try {
|
||||
writer.add(value, value.refBase);
|
||||
writer.add(value);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
|
|||
try {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>(value.getAttributes());
|
||||
VariantContextUtils.calculateChromosomeCounts(value, attrs, true);
|
||||
writer.add(VariantContext.modifyAttributes(value, attrs), value.refBase);
|
||||
writer.add(VariantContext.modifyAttributes(value, attrs));
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -227,7 +227,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
try {
|
||||
// we are actually making a call
|
||||
sum.nCallsMade++;
|
||||
writer.add(value, value.refBase);
|
||||
writer.add(value);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -247,7 +247,7 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
if ( annotationEngine != null ) {
|
||||
// we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
ReadBackedPileup pileup = null;
|
||||
if (rawContext.hasExtendedEventPileup())
|
||||
pileup = rawContext.getExtendedEventPileup();
|
||||
|
|
@ -258,7 +258,7 @@ public class UnifiedGenotyperEngine {
|
|||
vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc);
|
||||
}
|
||||
|
||||
return new VariantCallContext(vc, ref.getBase(), false);
|
||||
return new VariantCallContext(vc, false);
|
||||
}
|
||||
|
||||
private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, MultiallelicGenotypeLikelihoods> GLs) {
|
||||
|
|
@ -300,7 +300,8 @@ public class UnifiedGenotyperEngine {
|
|||
genotypes,
|
||||
VariantContext.NO_NEG_LOG_10PERROR,
|
||||
null,
|
||||
null);
|
||||
null,
|
||||
refContext.getBase());
|
||||
}
|
||||
|
||||
// private method called by both UnifiedGenotyper and UGCallVariants entry points into the engine
|
||||
|
|
@ -425,10 +426,10 @@ public class UnifiedGenotyperEngine {
|
|||
myAlleles.add(vc.getReference());
|
||||
}
|
||||
VariantContext vcCall = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc,
|
||||
myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes);
|
||||
myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes, refContext.getBase());
|
||||
|
||||
if ( annotationEngine != null ) {
|
||||
// first off, we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
ReadBackedPileup pileup = null;
|
||||
if (rawContext.hasExtendedEventPileup())
|
||||
pileup = rawContext.getExtendedEventPileup();
|
||||
|
|
@ -439,9 +440,7 @@ public class UnifiedGenotyperEngine {
|
|||
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
|
||||
}
|
||||
|
||||
VariantCallContext call = new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
||||
call.setRefBase(refContext.getBase());
|
||||
return call;
|
||||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
||||
}
|
||||
|
||||
private int calculateEndPos(Set<Allele> alleles, Allele refAllele, GenomeLoc loc) {
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
* Useful helper class to communicate the results of calculateGenotype to framework
|
||||
*/
|
||||
public class VariantCallContext extends VariantContext {
|
||||
public byte refBase;
|
||||
|
||||
// Was the site called confidently, either reference or variant?
|
||||
public boolean confidentlyCalled = false;
|
||||
|
|
@ -55,16 +54,6 @@ public class VariantCallContext extends VariantContext {
|
|||
this.shouldEmit = shouldEmit;
|
||||
}
|
||||
|
||||
VariantCallContext(VariantContext vc, byte ref, boolean confidentlyCalledP) {
|
||||
super(vc);
|
||||
this.refBase = ref;
|
||||
this.confidentlyCalled = confidentlyCalledP;
|
||||
}
|
||||
|
||||
public void setRefBase(byte ref) {
|
||||
this.refBase = ref;
|
||||
}
|
||||
|
||||
/* these methods are only implemented for GENOTYPE_GIVEN_ALLELES MODE */
|
||||
//todo -- expand these methods to all modes
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -91,7 +91,7 @@ public class MergeAndMatchHaplotypes extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
VariantContext newvc = new VariantContext(SOURCE_NAME, pbt.getChr(), pbt.getStart(), pbt.getStart(), pbt.getAlleles(), genotypes, pbt.getNegLog10PError(), pbt.getFilters(), pbt.getAttributes());
|
||||
vcfWriter.add(newvc, ref.getBase());
|
||||
vcfWriter.add(newvc);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
innerWriter.close();
|
||||
}
|
||||
|
||||
public void add(VariantContext vc, byte refBase) {
|
||||
public void add(VariantContext vc) {
|
||||
if (useSingleSample != null) { // only want to output context for one sample
|
||||
Genotype sampGt = vc.getGenotype(useSingleSample);
|
||||
if (sampGt != null) // TODO: subContextFromGenotypes() does not handle any INFO fields [AB, HaplotypeScore, MQ, etc.]. Note that even SelectVariants.subsetRecord() only handles AC,AN,AF, and DP!
|
||||
|
|
@ -138,11 +138,11 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
|
||||
if (curVcIsNotFiltered) { // still need to wait before can release vc
|
||||
logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(genomeLocParser, vc));
|
||||
vcfrWaitingToMerge = new VCFRecord(vc, refBase, false);
|
||||
vcfrWaitingToMerge = new VCFRecord(vc, false);
|
||||
}
|
||||
else if (!emitOnlyMergedRecords) { // filtered records are never merged
|
||||
logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(genomeLocParser, vc));
|
||||
innerWriter.add(vc, refBase);
|
||||
innerWriter.add(vc);
|
||||
}
|
||||
}
|
||||
else { // waiting to merge vcfrWaitingToMerge
|
||||
|
|
@ -151,7 +151,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
if (!curVcIsNotFiltered) {
|
||||
if (!emitOnlyMergedRecords) { // filtered records are never merged
|
||||
logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(genomeLocParser, vc));
|
||||
filteredVcfrList.add(new VCFRecord(vc, refBase, false));
|
||||
filteredVcfrList.add(new VCFRecord(vc, false));
|
||||
}
|
||||
}
|
||||
else { // waiting to merge vcfrWaitingToMerge, and curVcIsNotFiltered. So, attempt to merge them:
|
||||
|
|
@ -188,14 +188,14 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
addedAttribs.putAll(mergedVc.getAttributes());
|
||||
mergedVc = VariantContext.modifyAttributes(mergedVc, addedAttribs);
|
||||
|
||||
vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase, true);
|
||||
vcfrWaitingToMerge = new VCFRecord(mergedVc, true);
|
||||
numMergedRecords++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mergedRecords) {
|
||||
stopWaitingToMerge();
|
||||
vcfrWaitingToMerge = new VCFRecord(vc, refBase, false);
|
||||
vcfrWaitingToMerge = new VCFRecord(vc, false);
|
||||
}
|
||||
logger.debug("Merged? = " + mergedRecords);
|
||||
}
|
||||
|
|
@ -210,11 +210,11 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
}
|
||||
|
||||
if (!emitOnlyMergedRecords || vcfrWaitingToMerge.resultedFromMerge)
|
||||
innerWriter.add(vcfrWaitingToMerge.vc, vcfrWaitingToMerge.refBase);
|
||||
innerWriter.add(vcfrWaitingToMerge.vc);
|
||||
vcfrWaitingToMerge = null;
|
||||
|
||||
for (VCFRecord vcfr : filteredVcfrList)
|
||||
innerWriter.add(vcfr.vc, vcfr.refBase);
|
||||
innerWriter.add(vcfr.vc);
|
||||
filteredVcfrList.clear();
|
||||
}
|
||||
|
||||
|
|
@ -257,12 +257,10 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
|
||||
private static class VCFRecord {
|
||||
public VariantContext vc;
|
||||
public byte refBase;
|
||||
public boolean resultedFromMerge;
|
||||
|
||||
public VCFRecord(VariantContext vc, byte refBase, boolean resultedFromMerge) {
|
||||
public VCFRecord(VariantContext vc, boolean resultedFromMerge) {
|
||||
this.vc = vc;
|
||||
this.refBase = refBase;
|
||||
this.resultedFromMerge = resultedFromMerge;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -311,7 +311,8 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
|
|||
|
||||
VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeMap);
|
||||
|
||||
vcfWriter.add(newvc, ref.getBase());
|
||||
vcfWriter.add(newvc);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -25,20 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.phasing;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
public class WriteVCF {
|
||||
public static void writeVCF(VariantContext vc, VCFWriter writer, Logger logger) {
|
||||
byte refBase;
|
||||
if (!vc.isIndel()) {
|
||||
Allele refAllele = vc.getReference();
|
||||
refBase = SNPallelePair.getSingleBase(refAllele);
|
||||
}
|
||||
else {
|
||||
refBase = vc.getReferenceBaseForIndel();
|
||||
}
|
||||
|
||||
writer.add(vc, refBase);
|
||||
writer.add(vc);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -204,9 +204,9 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
filters.add(filterString);
|
||||
vc = VariantContext.modifyFilters(vc, filters);
|
||||
}
|
||||
vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs), ref.getBase() );
|
||||
vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs) );
|
||||
} else { // valid VC but not compatible with this mode, so just emit the variant untouched
|
||||
vcfWriter.add( vc, ref.getBase() );
|
||||
vcfWriter.add( vc );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -258,7 +258,7 @@ public class VariantDataManager {
|
|||
datum.consensusCount = 0;
|
||||
|
||||
for( final TrainingSet trainingSet : trainingSets ) {
|
||||
for( final VariantContext trainVC : tracker.getValues(VariantContext.class, trainingSet.name) ) {
|
||||
for( final VariantContext trainVC : tracker.getValues(VariantContext.class, trainingSet.name, ref.getLocus()) ) {
|
||||
if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
|
||||
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
|
||||
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
|
||||
|
|
|
|||
|
|
@ -156,7 +156,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
if ( ASSUME_IDENTICAL_SAMPLES ) {
|
||||
for ( final VariantContext vc : vcs ) {
|
||||
vcfWriter.add( vc, ref.getBase() );
|
||||
vcfWriter.add(vc);
|
||||
}
|
||||
|
||||
return vcs.isEmpty() ? 0 : 1;
|
||||
|
|
@ -181,7 +181,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
if ( VCsByType.containsKey(type) )
|
||||
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
|
||||
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -196,7 +196,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
VariantContext annotatedMergedVC = VariantContext.modifyAttributes(mergedVC, attributes);
|
||||
if ( minimalVCF )
|
||||
annotatedMergedVC = VariantContextUtils.pruneVariantContext(annotatedMergedVC, Arrays.asList(SET_KEY));
|
||||
vcfWriter.add(annotatedMergedVC, ref.getBase());
|
||||
vcfWriter.add(annotatedMergedVC);
|
||||
}
|
||||
|
||||
return vcs.isEmpty() ? 0 : 1;
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
|
|||
if ( failed )
|
||||
failedLocs++;
|
||||
else
|
||||
writer.add(vc, ref[0]);
|
||||
writer.add(vc);
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
|
|||
|
|
@ -94,10 +94,10 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
|
||||
private int alignAndWrite(VariantContext vc, final ReferenceContext ref) {
|
||||
if ( vc.isBiallelic() && vc.isIndel() )
|
||||
if ( vc.isBiallelic() && vc.isIndel() && !vc.isComplexIndel() )
|
||||
return writeLeftAlignedIndel(vc, ref);
|
||||
else {
|
||||
writer.add(vc, ref.getBase());
|
||||
writer.add(vc);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -113,7 +113,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
indelLength = vc.getAlternateAllele(0).length();
|
||||
|
||||
if ( indelLength > 200 ) {
|
||||
writer.add(vc, ref.getBase());
|
||||
writer.add(vc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -141,17 +141,12 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
byte[] newBases = new byte[indelLength];
|
||||
System.arraycopy((vc.isDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
|
||||
Allele newAllele = Allele.create(newBases, vc.isDeletion());
|
||||
newVC = updateAllele(newVC, newAllele);
|
||||
newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]);
|
||||
|
||||
// we need to update the reference base just in case it changed
|
||||
Map<String, Object> attrs = new HashMap<String, Object>(newVC.getAttributes());
|
||||
attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refSeq[indelIndex-1]);
|
||||
newVC = VariantContext.modifyAttributes(newVC, attrs);
|
||||
|
||||
writer.add(newVC, refSeq[indelIndex-1]);
|
||||
writer.add(newVC);
|
||||
return 1;
|
||||
} else {
|
||||
writer.add(vc, ref.getBase());
|
||||
writer.add(vc);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -177,7 +172,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
return hap;
|
||||
}
|
||||
|
||||
public static VariantContext updateAllele(VariantContext vc, Allele newAllele) {
|
||||
public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) {
|
||||
// create a mapping from original allele to new allele
|
||||
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
|
||||
if ( newAllele.isReference() ) {
|
||||
|
|
@ -201,6 +196,6 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
newGenotypes.put(genotype.getKey(), Genotype.modifyAlleles(genotype.getValue(), newAlleles));
|
||||
}
|
||||
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), refBaseForIndel);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -128,14 +128,14 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
vc = VariantContext.modifyAttributes(vc, attrs);
|
||||
}
|
||||
|
||||
VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, ref.getBase(), false);
|
||||
VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, false);
|
||||
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
|
||||
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
|
||||
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
|
||||
originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0)));
|
||||
}
|
||||
|
||||
writer.add(vc, ref.getBase());
|
||||
writer.add(vc);
|
||||
successfulIntervals++;
|
||||
} else {
|
||||
failedIntervals++;
|
||||
|
|
|
|||
|
|
@ -98,9 +98,9 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
|
|||
for ( VariantContext vc : vcs ) {
|
||||
int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000);
|
||||
if ( random < iFraction )
|
||||
vcfWriter1.add(vc, ref.getBase());
|
||||
vcfWriter1.add(vc);
|
||||
else
|
||||
vcfWriter2.add(vc, ref.getBase());
|
||||
vcfWriter2.add(vc);
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
|
|
|||
|
|
@ -32,6 +32,8 @@ import org.broadinstitute.sting.utils.text.XReadLines;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -126,16 +128,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
/* Private class used to store the intermediate variants in the integer random selection process */
|
||||
private class RandomVariantStructure {
|
||||
private VariantContext vc;
|
||||
private byte refBase;
|
||||
|
||||
RandomVariantStructure(VariantContext vcP, byte refBaseP) {
|
||||
RandomVariantStructure(VariantContext vcP) {
|
||||
vc = vcP;
|
||||
refBase = refBaseP;
|
||||
}
|
||||
|
||||
public void set (VariantContext vcP, byte refBaseP) {
|
||||
public void set (VariantContext vcP) {
|
||||
vc = vcP;
|
||||
refBase = refBaseP;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -356,7 +355,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
randomlyAddVariant(++variantNumber, sub, ref.getBase());
|
||||
}
|
||||
else if (!SELECT_RANDOM_FRACTION || (!KEEP_AF_SPECTRUM && GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
|
||||
vcfWriter.add(sub, ref.getBase());
|
||||
vcfWriter.add(sub);
|
||||
}
|
||||
else {
|
||||
if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) {
|
||||
|
|
@ -404,7 +403,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
//System.out.format("%s .. %4.4f\n",afo.toString(), af);
|
||||
if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * afBoost * afBoost)
|
||||
vcfWriter.add(sub, ref.getBase());
|
||||
vcfWriter.add(sub);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -511,7 +510,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
if (SELECT_RANDOM_NUMBER) {
|
||||
int positionToPrint = positionToAdd;
|
||||
for (int i=0; i<numRandom; i++) {
|
||||
vcfWriter.add(variantArray[positionToPrint].vc, variantArray[positionToPrint].refBase);
|
||||
vcfWriter.add(variantArray[positionToPrint].vc);
|
||||
positionToPrint = nextCircularPosition(positionToPrint);
|
||||
}
|
||||
}
|
||||
|
|
@ -574,13 +573,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
private void randomlyAddVariant(int rank, VariantContext vc, byte refBase) {
|
||||
if (nVariantsAdded < numRandom)
|
||||
variantArray[nVariantsAdded++] = new RandomVariantStructure(vc, refBase);
|
||||
variantArray[nVariantsAdded++] = new RandomVariantStructure(vc);
|
||||
|
||||
else {
|
||||
double v = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
|
||||
double t = (1.0/(rank-numRandom+1));
|
||||
if ( v < t) {
|
||||
variantArray[positionToAdd].set(vc, refBase);
|
||||
variantArray[positionToAdd].set(vc);
|
||||
nVariantsAdded++;
|
||||
positionToAdd = nextCircularPosition(positionToAdd);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ public class VariantValidationAssessor extends RodWalker<Pair<VariantContext, By
|
|||
private TreeSet<String> sampleNames = null;
|
||||
|
||||
// variant context records
|
||||
private ArrayList<Pair<VariantContext, Byte>> records = new ArrayList<Pair<VariantContext, Byte>>();
|
||||
private ArrayList<VariantContext> records = new ArrayList<VariantContext>();
|
||||
|
||||
// statistics
|
||||
private int numRecords = 0;
|
||||
|
|
@ -91,7 +91,7 @@ public class VariantValidationAssessor extends RodWalker<Pair<VariantContext, By
|
|||
return 0;
|
||||
}
|
||||
|
||||
public Pair<VariantContext, Byte> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class VariantValidationAssessor extends RodWalker<Pair<VariantContext, By
|
|||
return addVariantInformationToCall(ref, vc);
|
||||
}
|
||||
|
||||
public Integer reduce(Pair<VariantContext, Byte> call, Integer numVariants) {
|
||||
public Integer reduce(VariantContext call, Integer numVariants) {
|
||||
if ( call != null ) {
|
||||
numVariants++;
|
||||
records.add(call);
|
||||
|
|
@ -156,12 +156,12 @@ public class VariantValidationAssessor extends RodWalker<Pair<VariantContext, By
|
|||
|
||||
vcfwriter.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)));
|
||||
|
||||
for ( Pair<VariantContext, Byte> record : records )
|
||||
vcfwriter.add(record.first, record.second);
|
||||
for ( VariantContext record : records )
|
||||
vcfwriter.add(record);
|
||||
}
|
||||
|
||||
|
||||
private Pair<VariantContext, Byte> addVariantInformationToCall(ReferenceContext ref, VariantContext vContext) {
|
||||
private VariantContext addVariantInformationToCall(ReferenceContext ref, VariantContext vContext) {
|
||||
|
||||
// check possible filters
|
||||
double hwPvalue = hardyWeinbergCalculation(vContext);
|
||||
|
|
@ -203,9 +203,7 @@ public class VariantValidationAssessor extends RodWalker<Pair<VariantContext, By
|
|||
infoMap.put(VCFConstants.ALLELE_COUNT_KEY, String.format("%d", altAlleleCount));
|
||||
infoMap.put(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", vContext.getChromosomeCount()));
|
||||
|
||||
vContext = VariantContext.modifyAttributes(vContext, infoMap);
|
||||
|
||||
return new Pair<VariantContext, Byte>(vContext, ref.getBase());
|
||||
return VariantContext.modifyAttributes(vContext, infoMap);
|
||||
}
|
||||
|
||||
private double hardyWeinbergCalculation(VariantContext vc) {
|
||||
|
|
|
|||
|
|
@ -83,8 +83,8 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
getters.put("REF", new Getter() {
|
||||
public String get(VariantContext vc) {
|
||||
String x = "";
|
||||
if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) {
|
||||
Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY));
|
||||
if ( vc.hasReferenceBaseForIndel() ) {
|
||||
Byte refByte = vc.getReferenceBaseForIndel();
|
||||
x=x+new String(new byte[]{refByte});
|
||||
}
|
||||
return x+vc.getReference().getDisplayString();
|
||||
|
|
@ -95,8 +95,8 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
StringBuilder x = new StringBuilder();
|
||||
int n = vc.getAlternateAlleles().size();
|
||||
if ( n == 0 ) return ".";
|
||||
if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) {
|
||||
Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY));
|
||||
if ( vc.hasReferenceBaseForIndel() ) {
|
||||
Byte refByte = vc.getReferenceBaseForIndel();
|
||||
x.append(new String(new byte[]{refByte}));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -154,9 +154,10 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getName(), hapmap, ref);
|
||||
if ( vc != null ) {
|
||||
if ( refBase != null ) {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
||||
attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refBase);
|
||||
vc = VariantContext.modifyAttributes(vc, attrs);
|
||||
// TODO -- fix me
|
||||
//Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
||||
//attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refBase);
|
||||
//vc = VariantContext.modifyAttributes(vc, attrs);
|
||||
}
|
||||
hapmapVCs.add(vc);
|
||||
}
|
||||
|
|
@ -238,7 +239,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
|
||||
vcfwriter.add(vc, ref);
|
||||
vcfwriter.add(vc);
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
|
|
|
|||
|
|
@ -42,6 +42,21 @@ public class Utils {
|
|||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(Utils.class);
|
||||
|
||||
public static final float JAVA_DEFAULT_HASH_LOAD_FACTOR = 0.75f;
|
||||
|
||||
/**
|
||||
* Calculates the optimum initial size for a hash table given the maximum number
|
||||
* of elements it will need to hold. The optimum size is the smallest size that
|
||||
* is guaranteed not to result in any rehash/table-resize operations.
|
||||
*
|
||||
* @param maxElements The maximum number of elements you expect the hash table
|
||||
* will need to hold
|
||||
* @return The optimum initial size for the table, given maxElements
|
||||
*/
|
||||
public static int optimumHashSize ( int maxElements ) {
|
||||
return (int)(maxElements / JAVA_DEFAULT_HASH_LOAD_FACTOR) + 2;
|
||||
}
|
||||
|
||||
public static String getClassName(Class c) {
|
||||
String FQClassName = c.getName();
|
||||
int firstChar;
|
||||
|
|
|
|||
|
|
@ -567,7 +567,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
|
||||
// set the reference base for indels in the attributes
|
||||
Map<String,Object> attributes = new TreeMap<String,Object>(inputVC.getAttributes());
|
||||
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(inputVC.getReference().getBases()[0]));
|
||||
|
||||
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
|
|
@ -611,7 +610,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
genotypes.put(sample.getKey(), Genotype.modifyAlleles(sample.getValue(), trimmedAlleles));
|
||||
|
||||
}
|
||||
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes);
|
||||
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes, new Byte(inputVC.getReference().getBases()[0]));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -105,9 +105,8 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
|
|||
* add a record to the file
|
||||
*
|
||||
* @param vc the Variant Context object
|
||||
* @param refBase the ref base
|
||||
*/
|
||||
public void add(VariantContext vc, byte refBase) {
|
||||
public void add(VariantContext vc) {
|
||||
/* Note that the code below does not prevent the successive add()-ing of: (chr1, 10), (chr20, 200), (chr15, 100)
|
||||
since there is no implicit ordering of chromosomes:
|
||||
*/
|
||||
|
|
@ -122,7 +121,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
|
|||
|
||||
noteCurrentRecord(vc); // possibly overwritten
|
||||
|
||||
queue.add(new VCFRecord(vc, refBase));
|
||||
queue.add(new VCFRecord(vc));
|
||||
emitSafeRecords();
|
||||
}
|
||||
|
||||
|
|
@ -133,7 +132,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
|
|||
// No need to wait, waiting for nothing, or before what we're waiting for:
|
||||
if (emitUnsafe || mostUpstreamWritableLoc == null || firstRec.vc.getStart() <= mostUpstreamWritableLoc) {
|
||||
queue.poll();
|
||||
innerWriter.add(firstRec.vc, firstRec.refBase);
|
||||
innerWriter.add(firstRec.vc);
|
||||
}
|
||||
else {
|
||||
break;
|
||||
|
|
@ -143,7 +142,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
|
|||
|
||||
/**
|
||||
* Gets a string representation of this object.
|
||||
* @return
|
||||
* @return a string representation of this object
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
@ -158,11 +157,9 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
|
|||
|
||||
private static class VCFRecord {
|
||||
public VariantContext vc;
|
||||
public byte refBase;
|
||||
|
||||
public VCFRecord(VariantContext vc, byte refBase) {
|
||||
public VCFRecord(VariantContext vc) {
|
||||
this.vc = vc;
|
||||
this.refBase = refBase;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -202,20 +202,18 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
* add a record to the file
|
||||
*
|
||||
* @param vc the Variant Context object
|
||||
* @param refBase the ref base used for indels
|
||||
*/
|
||||
public void add(VariantContext vc, byte refBase) {
|
||||
add(vc, refBase, false);
|
||||
public void add(VariantContext vc) {
|
||||
add(vc, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* add a record to the file
|
||||
*
|
||||
* @param vc the Variant Context object
|
||||
* @param refBase the ref base used for indels
|
||||
* @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER THE EVENT INSTEAD)
|
||||
*/
|
||||
public void add(VariantContext vc, byte refBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||
public void add(VariantContext vc, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||
if ( mHeader == null )
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added: " + locationString());
|
||||
|
||||
|
|
@ -223,7 +221,7 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
vc = VariantContext.modifyGenotypes(vc, null);
|
||||
|
||||
try {
|
||||
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBase, refBaseShouldBeAppliedToEndOfAlleles);
|
||||
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBaseShouldBeAppliedToEndOfAlleles);
|
||||
|
||||
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
|
||||
if ( indexer != null ) indexer.addFeature(vc, positionalStream.getPosition());
|
||||
|
|
@ -285,7 +283,7 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
Map<String, String> infoFields = new TreeMap<String, String>();
|
||||
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
|
||||
String key = field.getKey();
|
||||
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
|
||||
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
|
||||
continue;
|
||||
|
||||
String outputValue = formatVCFField(field.getValue());
|
||||
|
|
|
|||
|
|
@ -14,5 +14,5 @@ public interface VCFWriter {
|
|||
*/
|
||||
public void close();
|
||||
|
||||
public void add(VariantContext vc, byte refBase);
|
||||
public void add(VariantContext vc);
|
||||
}
|
||||
|
|
@ -116,4 +116,57 @@ public class TextFormattingUtils {
|
|||
return bundle;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the word starting positions within line, excluding the first position 0.
|
||||
* The returned list is compatible with splitFixedWidth.
|
||||
* @param line Text to parse.
|
||||
* @return the word starting positions within line, excluding the first position 0.
|
||||
*/
|
||||
public static List<Integer> getWordStarts(String line) {
|
||||
if (line == null)
|
||||
throw new ReviewedStingException("line is null");
|
||||
List<Integer> starts = new ArrayList<Integer>();
|
||||
int stop = line.length();
|
||||
for (int i = 1; i < stop; i++)
|
||||
if (Character.isWhitespace(line.charAt(i-1)))
|
||||
if(!Character.isWhitespace(line.charAt(i)))
|
||||
starts.add(i);
|
||||
return starts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a fixed width line of text.
|
||||
* @param line Text to parse.
|
||||
* @param columnStarts the column starting positions within line, excluding the first position 0.
|
||||
* @return The parsed string array with each entry trimmed.
|
||||
*/
|
||||
public static String[] splitFixedWidth(String line, List<Integer> columnStarts) {
|
||||
if (line == null)
|
||||
throw new ReviewedStingException("line is null");
|
||||
if (columnStarts == null)
|
||||
throw new ReviewedStingException("columnStarts is null");
|
||||
int startCount = columnStarts.size();
|
||||
String[] row = new String[startCount + 1];
|
||||
if (startCount == 0) {
|
||||
row[0] = line.trim();
|
||||
} else {
|
||||
row[0] = line.substring(0, columnStarts.get(0)).trim();
|
||||
for (int i = 1; i < startCount; i++)
|
||||
row[i] = line.substring(columnStarts.get(i - 1), columnStarts.get(i)).trim();
|
||||
row[startCount] = line.substring(columnStarts.get(startCount - 1)).trim();
|
||||
}
|
||||
return row;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a line of text by whitespace.
|
||||
* @param line Text to parse.
|
||||
* @return The parsed string array.
|
||||
*/
|
||||
public static String[] splitWhiteSpace(String line) {
|
||||
if (line == null)
|
||||
throw new ReviewedStingException("line is null");
|
||||
return line.trim().split("\\s+");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,15 +27,15 @@ public class MutableVariantContext extends VariantContext {
|
|||
}
|
||||
|
||||
public MutableVariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles) {
|
||||
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
|
||||
super(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
|
||||
}
|
||||
|
||||
public MutableVariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Collection<Genotype> genotypes) {
|
||||
this(source, contig, start, stop, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
|
||||
super(source, contig, start, stop, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
|
||||
}
|
||||
|
||||
public MutableVariantContext(VariantContext parent) {
|
||||
this(parent.getSource(), parent.contig, parent.start, parent.stop, parent.getAlleles(), parent.getGenotypes(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes());
|
||||
super(parent.getSource(), parent.contig, parent.start, parent.stop, parent.getAlleles(), parent.getGenotypes(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes(), parent.getReferenceBaseForIndel());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broad.tribble.TribbleException;
|
|||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -163,11 +164,12 @@ import java.util.*;
|
|||
public class VariantContext implements Feature { // to enable tribble intergration
|
||||
protected InferredGeneticContext commonInfo = null;
|
||||
public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR;
|
||||
public final static String REFERENCE_BASE_FOR_INDEL_KEY = "_REFERENCE_BASE_FOR_INDEL_";
|
||||
public final static String UNPARSED_GENOTYPE_MAP_KEY = "_UNPARSED_GENOTYPE_MAP_";
|
||||
public final static String UNPARSED_GENOTYPE_PARSER_KEY = "_UNPARSED_GENOTYPE_PARSER_";
|
||||
public final static String ID_KEY = "ID";
|
||||
|
||||
private final Byte REFERENCE_BASE_FOR_INDEL;
|
||||
|
||||
public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());
|
||||
|
||||
/** The location of this VariantContext */
|
||||
|
|
@ -205,6 +207,24 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
/**
|
||||
* the complete constructor. Makes a complete VariantContext from its arguments
|
||||
*
|
||||
* @param source source
|
||||
* @param contig the contig
|
||||
* @param start the start base (one based)
|
||||
* @param stop the stop reference base (one based)
|
||||
* @param alleles alleles
|
||||
* @param genotypes genotypes map
|
||||
* @param negLog10PError qual
|
||||
* @param filters filters: use null for unfiltered and empty set for passes filters
|
||||
* @param attributes attributes
|
||||
* @param referenceBaseForIndel padded reference base
|
||||
*/
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) {
|
||||
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* the complete constructor. Makes a complete VariantContext from its arguments
|
||||
*
|
||||
|
|
@ -219,7 +239,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param attributes attributes
|
||||
*/
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
|
||||
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, false);
|
||||
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -239,7 +259,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param attributes attributes
|
||||
*/
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
|
||||
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, true);
|
||||
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, null, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -256,7 +276,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param attributes attributes
|
||||
*/
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
|
||||
this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes, false);
|
||||
this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -269,7 +289,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param alleles alleles
|
||||
*/
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles) {
|
||||
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, false);
|
||||
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -292,7 +312,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param other the VariantContext to copy
|
||||
*/
|
||||
public VariantContext(VariantContext other) {
|
||||
this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), false);
|
||||
this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -307,8 +327,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param negLog10PError qual
|
||||
* @param filters filters: use null for unfiltered and empty set for passes filters
|
||||
* @param attributes attributes
|
||||
* @param referenceBaseForIndel padded reference base
|
||||
* @param genotypesAreUnparsed true if the genotypes have not yet been parsed
|
||||
*/
|
||||
private VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean genotypesAreUnparsed) {
|
||||
private VariantContext(String source, String contig, long start, long stop,
|
||||
Collection<Allele> alleles, Map<String, Genotype> genotypes,
|
||||
double negLog10PError, Set<String> filters, Map<String, ?> attributes,
|
||||
Byte referenceBaseForIndel, boolean genotypesAreUnparsed) {
|
||||
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
|
||||
this.contig = contig;
|
||||
this.start = start;
|
||||
|
|
@ -323,6 +348,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
|
||||
this.commonInfo = new InferredGeneticContext(source, negLog10PError, filters, attributes);
|
||||
filtersWereAppliedToContext = filters != null;
|
||||
REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel;
|
||||
|
||||
if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); }
|
||||
|
||||
|
|
@ -355,23 +381,23 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public static VariantContext modifyGenotypes(VariantContext vc, Map<String, Genotype> genotypes) {
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), false);
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), false);
|
||||
}
|
||||
|
||||
public static VariantContext modifyLocation(VariantContext vc, String chr, int start, int end) {
|
||||
return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), true);
|
||||
return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
|
||||
}
|
||||
|
||||
public static VariantContext modifyFilters(VariantContext vc, Set<String> filters) {
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap<String, Object>(vc.getAttributes()), true);
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
|
||||
}
|
||||
|
||||
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, true);
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true);
|
||||
}
|
||||
|
||||
public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set<String> filters, Map<String, Object> attributes) {
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, true);
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -414,7 +440,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return vc subcontext
|
||||
*/
|
||||
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Set<Allele> alleles) {
|
||||
return new VariantContext(getSource(), contig, start, stop, alleles, genotypes, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes());
|
||||
return new VariantContext(getSource(), contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes(), getReferenceBaseForIndel());
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -603,6 +629,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
return (String)commonInfo.getAttribute(ID_KEY);
|
||||
}
|
||||
|
||||
public boolean hasReferenceBaseForIndel() {
|
||||
return REFERENCE_BASE_FOR_INDEL != null;
|
||||
}
|
||||
|
||||
// the indel base that gets stripped off for indels
|
||||
public Byte getReferenceBaseForIndel() {
|
||||
return REFERENCE_BASE_FOR_INDEL;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// get routines to access context info fields
|
||||
|
|
@ -1151,6 +1186,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
|
||||
private boolean validate(boolean throwException) {
|
||||
try {
|
||||
validateReferencePadding();
|
||||
validateAlleles();
|
||||
validateGenotypes();
|
||||
} catch ( IllegalArgumentException e ) {
|
||||
|
|
@ -1163,6 +1199,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
return true;
|
||||
}
|
||||
|
||||
private void validateReferencePadding() {
|
||||
boolean needsPadding = hasSymbolicAlleles() || (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed
|
||||
|
||||
if ( needsPadding && !hasReferenceBaseForIndel() )
|
||||
throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + getStart() + "; no padded reference base was provided.");
|
||||
}
|
||||
|
||||
private void validateAlleles() {
|
||||
// check alleles
|
||||
boolean alreadySeenRef = false, alreadySeenNull = false;
|
||||
|
|
@ -1221,16 +1264,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
// the indel base that gets stripped off for indels
|
||||
public boolean hasReferenceBaseForIndel() {
|
||||
return hasAttribute(REFERENCE_BASE_FOR_INDEL_KEY);
|
||||
}
|
||||
|
||||
// the indel base that gets stripped off for indels
|
||||
public byte getReferenceBaseForIndel() {
|
||||
return hasReferenceBaseForIndel() ? (Byte)getAttribute(REFERENCE_BASE_FOR_INDEL_KEY) : (byte)'N';
|
||||
}
|
||||
|
||||
private void determineType() {
|
||||
if ( type == null ) {
|
||||
switch ( getNAlleles() ) {
|
||||
|
|
@ -1357,8 +1390,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
return false;
|
||||
}
|
||||
|
||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, byte inputRefBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||
Allele refAllele = inputVC.getReference();
|
||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||
|
||||
// see if we need to pad common reference base from all alleles
|
||||
boolean padVC;
|
||||
|
|
@ -1368,31 +1400,20 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
|
||||
if (inputVC.hasSymbolicAlleles())
|
||||
padVC = true;
|
||||
else if (refAllele.length() == locLength)
|
||||
else if (inputVC.getReference().length() == locLength)
|
||||
padVC = false;
|
||||
else if (refAllele.length() == locLength-1)
|
||||
else if (inputVC.getReference().length() == locLength-1)
|
||||
padVC = true;
|
||||
else throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
|
||||
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
|
||||
|
||||
|
||||
// nothing to do if we don't need to pad bases
|
||||
if (padVC) {
|
||||
Byte refByte;
|
||||
|
||||
Map<String,Object> attributes = inputVC.getAttributes();
|
||||
if ( !inputVC.hasReferenceBaseForIndel() )
|
||||
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
|
||||
|
||||
// upper-case for consistency; note that we can safely make these casts because the input is constrained to be a byte
|
||||
inputRefBase = (byte)Character.toUpperCase((char)inputRefBase);
|
||||
if (attributes.containsKey(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY))
|
||||
refByte = (Byte)attributes.get(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY);
|
||||
else if (inputRefBase == 'A' || inputRefBase == 'T' || inputRefBase == 'C' || inputRefBase == 'G' || inputRefBase == 'N')
|
||||
refByte = inputRefBase;
|
||||
else
|
||||
throw new IllegalArgumentException("Error when trying to pad Variant Context at location " + String.valueOf(inputVC.getStart())
|
||||
+ " in contig " + inputVC.getChr() +
|
||||
". Either input reference base ("+(char)inputRefBase+
|
||||
", ascii code="+inputRefBase+") must be a regular base, or input VC must contain reference base key");
|
||||
Byte refByte = inputVC.getReferenceBaseForIndel();
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
|
||||
|
|
@ -1444,11 +1465,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
|
||||
// Do not change the filter state if filters were not applied to this context
|
||||
Set<String> inputVCFilters = inputVC.filtersWereAppliedToContext ? inputVC.getFilters() : null;
|
||||
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(),
|
||||
inputVCFilters, attributes);
|
||||
|
||||
|
||||
|
||||
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVCFilters, inputVC.getAttributes());
|
||||
}
|
||||
else
|
||||
return inputVC;
|
||||
|
|
|
|||
|
|
@ -295,10 +295,7 @@ public class VariantContextUtils {
|
|||
@Requires("vc != null")
|
||||
@Ensures("result != null")
|
||||
public static VariantContext sitesOnlyVariantContext(VariantContext vc) {
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(),
|
||||
vc.getAlleles(), vc.getNegLog10PError(),
|
||||
vc.filtersWereApplied() ? vc.getFilters() : null,
|
||||
vc.getAttributes());
|
||||
return VariantContext.modifyGenotypes(vc, null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -449,7 +446,7 @@ public class VariantContextUtils {
|
|||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
|
||||
|
||||
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false);
|
||||
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, "set", false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -464,7 +461,6 @@ public class VariantContextUtils {
|
|||
* @param genotypeMergeOptions merge option for genotypes
|
||||
* @param annotateOrigin should we annotate the set it came from?
|
||||
* @param printMessages should we print messages?
|
||||
* @param inputRefBase the ref base
|
||||
* @param setKey the key name of the set
|
||||
* @param filteredAreUncalled are filtered records uncalled?
|
||||
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
||||
|
|
@ -472,7 +468,7 @@ public class VariantContextUtils {
|
|||
*/
|
||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
|
||||
boolean annotateOrigin, boolean printMessages, String setKey,
|
||||
boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) {
|
||||
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
|
||||
return null;
|
||||
|
|
@ -490,7 +486,7 @@ public class VariantContextUtils {
|
|||
for (VariantContext vc : prepaddedVCs) {
|
||||
// also a reasonable place to remove filtered calls, if needed
|
||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||
VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false));
|
||||
VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc, false));
|
||||
}
|
||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class GATKReportUnitTest extends BaseTest {
|
||||
@Test
|
||||
public void testParse() throws Exception {
|
||||
String reportPath = validationDataLocation + "exampleGATKReport.eval";
|
||||
GATKReport report = new GATKReport(reportPath);
|
||||
|
||||
GATKReportTable countVariants = report.getTable("CountVariants");
|
||||
Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1);
|
||||
Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all");
|
||||
Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000");
|
||||
Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872");
|
||||
|
||||
GATKReportTable validationReport = report.getTable("ValidationReport");
|
||||
Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1);
|
||||
Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known");
|
||||
Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN");
|
||||
|
||||
GATKReportTable simpleMetricsByAC = report.getTable("SimpleMetricsByAC.metrics");
|
||||
Assert.assertEquals(simpleMetricsByAC.getVersion(), GATKReportVersion.V0_1);
|
||||
Object simpleMetricsByACPK = simpleMetricsByAC.getPrimaryKey("none.eval.none.novel.ac2");
|
||||
Assert.assertEquals(simpleMetricsByAC.get(simpleMetricsByACPK, "AC"), "2");
|
||||
|
||||
Assert.assertFalse(simpleMetricsByAC.containsPrimaryKey("none.eval.none.novel.ac2.bad"));
|
||||
}
|
||||
}
|
||||
|
|
@ -30,8 +30,6 @@ import org.testng.annotations.Test;
|
|||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class DiffObjectsIntegrationTest extends WalkerTest {
|
||||
private class TestParams extends TestDataProvider {
|
||||
|
|
@ -52,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
|
|||
|
||||
@DataProvider(name = "data")
|
||||
public Object[][] createData() {
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "4d9f4636de05b93c354d05011264546e");
|
||||
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "37e6efd833b5cd6d860a9df3df9713fc");
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "92311de76dda3f38aac289d807ef23d0");
|
||||
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "0c69412c385fda50210f2a612e1ffe4a");
|
||||
return TestParams.getTests(TestParams.class);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest {
|
|||
" -L chr1:1-50,000,000" +
|
||||
" -standard" +
|
||||
" -OQ" +
|
||||
" -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" +
|
||||
" -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_hg18.vcf" +
|
||||
" -recalFile /dev/null" + moreArgs,
|
||||
0,
|
||||
new ArrayList<String>(0));
|
||||
|
|
@ -31,7 +31,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest {
|
|||
" -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" +
|
||||
" -standard" +
|
||||
" -OQ" +
|
||||
" -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" +
|
||||
" -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
|
||||
" -recalFile /dev/null" + moreArgs,
|
||||
0,
|
||||
new ArrayList<String>(0));
|
||||
|
|
|
|||
|
|
@ -120,6 +120,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test public void complexTestFull() { combineComplexSites("", "b5a53ee92bdaacd2bb3327e9004ae058"); }
|
||||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f72a178137e25dbe0b931934cdc0079d"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); }
|
||||
}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Tests LeftAlignVariants
|
||||
*/
|
||||
public class LeftAlignVariantsIntegrationTest extends WalkerTest {
|
||||
|
||||
@Test
|
||||
public void testLeftAlignment() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LeftAlignVariants -o %s -R " + b37KGReference + " -B:variant,vcf " + validationDataLocation + "forLeftAlignVariantsTest.vcf -NO_HEADER",
|
||||
1,
|
||||
Arrays.asList("158b1d71b28c52e2789f164500b53732"));
|
||||
executeTest("test left alignment", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -98,7 +98,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
" -EV CompOverlap -noEV -noST" +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("f60729c900bc8368717653b3fad80d1e") //"f60729c900bc8368717653b3fad80d1e"
|
||||
Arrays.asList("ea09bf764adba9765b99921c5ba2c709")
|
||||
);
|
||||
executeTest("testVCFStreamingChain", selectTestSpec);
|
||||
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ public class IndexFactoryUnitTest {
|
|||
CloseableTribbleIterator<VariantContext> it = source.iterator();
|
||||
while (it.hasNext() && (counter++ < maxRecords || maxRecords == -1) ) {
|
||||
VariantContext vc = it.next();
|
||||
writer.add(vc, vc.getReferenceBaseForIndel());
|
||||
writer.add(vc);
|
||||
}
|
||||
writer.close();
|
||||
|
||||
|
|
|
|||
|
|
@ -57,8 +57,8 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
VCFHeader header = createFakeHeader(metaData,additionalColumns);
|
||||
VCFWriter writer = new StandardVCFWriter(fakeVCFFile);
|
||||
writer.writeHeader(header);
|
||||
writer.add(createVC(header),"A".getBytes()[0]);
|
||||
writer.add(createVC(header),"A".getBytes()[0]);
|
||||
writer.add(createVC(header));
|
||||
writer.add(createVC(header));
|
||||
writer.close();
|
||||
VCFCodec reader = new VCFCodec();
|
||||
AsciiLineReader lineReader;
|
||||
|
|
@ -135,7 +135,7 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
genotypes.put(name,gt);
|
||||
|
||||
}
|
||||
return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes);
|
||||
return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes, (byte)'A');
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.text;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
public class TextFormattingUtilsUnitTest extends BaseTest {
|
||||
@Test(expectedExceptions = ReviewedStingException.class)
|
||||
public void testSplitWhiteSpaceNullLine() {
|
||||
TextFormattingUtils.splitWhiteSpace(null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSplitWhiteSpace() {
|
||||
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz"), new String[] { "foo", "bar", "baz" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz"), new String[] { "foo", "bar", "baz" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace(" foo bar baz"), new String[] { "foo", "bar", "baz" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace(" foo bar baz "), new String[] { "foo", "bar", "baz" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz "), new String[] { "foo", "bar", "baz" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("\tfoo\tbar\tbaz\t"), new String[]{"foo", "bar", "baz"});
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = ReviewedStingException.class)
|
||||
public void testGetWordStartsNullLine() {
|
||||
TextFormattingUtils.getWordStarts(null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetWordStarts() {
|
||||
Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz"), Arrays.asList(4, 8));
|
||||
Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz"), Arrays.asList(5, 10));
|
||||
Assert.assertEquals(TextFormattingUtils.getWordStarts(" foo bar baz"), Arrays.asList(1, 5, 9));
|
||||
Assert.assertEquals(TextFormattingUtils.getWordStarts(" foo bar baz "), Arrays.asList(1, 5, 9));
|
||||
Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz "), Arrays.asList(4, 8));
|
||||
Assert.assertEquals(TextFormattingUtils.getWordStarts("\tfoo\tbar\tbaz\t"), Arrays.asList(1, 5, 9));
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = ReviewedStingException.class)
|
||||
public void testSplitFixedWidthNullLine() {
|
||||
TextFormattingUtils.splitFixedWidth(null, Collections.<Integer>emptyList());
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = ReviewedStingException.class)
|
||||
public void testSplitFixedWidthNullColumnStarts() {
|
||||
TextFormattingUtils.splitFixedWidth("foo bar baz", null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSplitFixedWidth() {
|
||||
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz", Arrays.asList(4, 8)), new String[] { "foo", "bar", "baz" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz", Arrays.asList(5, 10)), new String[] { "foo", "bar", "baz" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" foo bar baz", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" foo bar baz ", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz ", Arrays.asList(4, 8)), new String[] { "foo", "bar", "baz" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("\tfoo\tbar\tbaz\t", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitFixedWidth("f o b r b z", Arrays.asList(4, 8)), new String[] { "f o", "b r", "b z" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" f o b r b z", Arrays.asList(4, 8)), new String[] { "f o", "b r", "b z" });
|
||||
Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" f o b r b z", Arrays.asList(4, 8)), new String[] { "f", "o b", "r b z" });
|
||||
}
|
||||
}
|
||||
|
|
@ -92,45 +92,45 @@ public class VariantContextUnitTest {
|
|||
|
||||
// test INDELs
|
||||
alleles = Arrays.asList(Aref, ATC);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
|
||||
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
|
||||
|
||||
alleles = Arrays.asList(ATCref, A);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
|
||||
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
|
||||
|
||||
alleles = Arrays.asList(Tref, TA, TC);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
|
||||
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
|
||||
|
||||
alleles = Arrays.asList(ATCref, A, AC);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
|
||||
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
|
||||
|
||||
alleles = Arrays.asList(ATCref, A, Allele.create("ATCTC"));
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
|
||||
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
|
||||
|
||||
// test MIXED
|
||||
alleles = Arrays.asList(TAref, T, TC);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
|
||||
Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
|
||||
|
||||
alleles = Arrays.asList(TAref, T, AC);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
|
||||
Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
|
||||
|
||||
alleles = Arrays.asList(ACref, ATC, AT);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
|
||||
Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
|
||||
|
||||
alleles = Arrays.asList(Aref, T, symbolic);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
|
||||
Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
|
||||
|
||||
// test SYMBOLIC
|
||||
alleles = Arrays.asList(Tref, symbolic);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles);
|
||||
vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
|
||||
Assert.assertEquals(vc.getType(), VariantContext.Type.SYMBOLIC);
|
||||
}
|
||||
|
||||
|
|
@ -191,7 +191,7 @@ public class VariantContextUnitTest {
|
|||
@Test
|
||||
public void testCreatingDeletionVariantContext() {
|
||||
List<Allele> alleles = Arrays.asList(ATCref, del);
|
||||
VariantContext vc = new VariantContext("test", delLoc, delLocStart, delLocStop, alleles);
|
||||
VariantContext vc = new VariantContext("test", delLoc, delLocStart, delLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
|
||||
|
||||
Assert.assertEquals(vc.getChr(), delLoc);
|
||||
Assert.assertEquals(vc.getStart(), delLocStart);
|
||||
|
|
@ -218,7 +218,7 @@ public class VariantContextUnitTest {
|
|||
@Test
|
||||
public void testCreatingInsertionVariantContext() {
|
||||
List<Allele> alleles = Arrays.asList(delRef, ATC);
|
||||
VariantContext vc = new VariantContext("test", insLoc, insLocStart, insLocStop, alleles);
|
||||
VariantContext vc = new VariantContext("test", insLoc, insLocStart, insLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
|
||||
|
||||
Assert.assertEquals(vc.getChr(), insLoc);
|
||||
Assert.assertEquals(vc.getStart(), insLocStart);
|
||||
|
|
@ -251,7 +251,7 @@ public class VariantContextUnitTest {
|
|||
new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, del));
|
||||
}
|
||||
|
||||
@Test (expectedExceptions = IllegalArgumentException.class)
|
||||
@Test (expectedExceptions = IllegalStateException.class)
|
||||
public void testBadConstructorArgs3() {
|
||||
new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(del));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,10 +59,10 @@ class ExampleUnifiedGenotyper extends QScript {
|
|||
evalUnfiltered.rodBind :+= RodBind("eval", "VCF", genotyper.out)
|
||||
evalUnfiltered.out = swapExt(genotyper.out, "vcf", "eval")
|
||||
|
||||
variantFilter.rodBind :+= RodBind("vcf", "VCF", genotyper.out)
|
||||
variantFilter.rodBind :+= RodBind("variant", "VCF", genotyper.out)
|
||||
variantFilter.out = swapExt(qscript.bamFile, "bam", "filtered.vcf")
|
||||
variantFilter.filterName = filterNames
|
||||
variantFilter.filterExpression = filterExpressions
|
||||
variantFilter.filterExpression = filterExpressions.map("\"" + _ + "\"")
|
||||
|
||||
evalFiltered.rodBind :+= RodBind("eval", "VCF", variantFilter.out)
|
||||
evalFiltered.out = swapExt(variantFilter.out, "vcf", "eval")
|
||||
|
|
|
|||
|
|
@ -34,8 +34,8 @@ import org.broadinstitute.sting.BaseTest
|
|||
import org.broadinstitute.sting.MD5DB
|
||||
import org.broadinstitute.sting.queue.QCommandLine
|
||||
import org.broadinstitute.sting.queue.util.{Logging, ProcessController}
|
||||
import java.io.{FileNotFoundException, File}
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportParser
|
||||
import java.io.File
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport
|
||||
import org.apache.commons.io.FileUtils
|
||||
import org.broadinstitute.sting.queue.engine.CommandLinePluginManager
|
||||
|
||||
|
|
@ -118,12 +118,11 @@ object PipelineTest extends BaseTest with Logging {
|
|||
// write the report to the shared validation data location
|
||||
val formatter = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss")
|
||||
val reportLocation = "%s%s/%s/validation.%s.eval".format(validationReportsDataLocation, jobRunner, name, formatter.format(new Date))
|
||||
val report = new File(reportLocation)
|
||||
val reportFile = new File(reportLocation)
|
||||
|
||||
FileUtils.copyFile(new File(runDir(name, jobRunner) + evalSpec.evalReport), report);
|
||||
FileUtils.copyFile(new File(runDir(name, jobRunner) + evalSpec.evalReport), reportFile);
|
||||
|
||||
val parser = new GATKReportParser
|
||||
parser.parse(report)
|
||||
val report = new GATKReport(reportFile);
|
||||
|
||||
var allInRange = true
|
||||
|
||||
|
|
@ -131,7 +130,9 @@ object PipelineTest extends BaseTest with Logging {
|
|||
println(name + " validation values:")
|
||||
println(" value (min,target,max) table key metric")
|
||||
for (validation <- evalSpec.validations) {
|
||||
val value = parser.getValue(validation.table, validation.key, validation.metric)
|
||||
val table = report.getTable(validation.table)
|
||||
val key = table.getPrimaryKey(validation.key)
|
||||
val value = String.valueOf(table.get(key, validation.metric))
|
||||
val inRange = if (value == null) false else validation.inRange(value)
|
||||
val flag = if (!inRange) "*" else " "
|
||||
println(" %s %s (%s,%s,%s) %s %s %s".format(flag, value, validation.min, validation.target, validation.max, validation.table, validation.key, validation.metric))
|
||||
|
|
|
|||
Loading…
Reference in New Issue