Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2012-03-25 23:04:47 -04:00
commit 1fa66f76c9
53 changed files with 1577 additions and 2901 deletions

View File

@ -99,69 +99,77 @@ gsa.read.gatkreportv0 <- function(lines) {
# Load all GATKReport v1 tables from file
gsa.read.gatkreportv1 <- function(lines) {
#print("loading with optimized v1 reader")
nLines = length(lines)
tableEnv = new.env();
tableName = NA;
tableHeader = c();
tableRows = NULL;
version = "";
rowCount = 0
headerRowCount = -1;
finishTable <- function() {
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows[1:rowCount,], tableEnv);
}
for (line in lines) {
tableEnv = new.env();
tableName = NA;
tableHeader = c();
tableRows = c();
version = "";
headerRowCount = -1;
for (line in lines) {
if (length(grep("^#:GATKReport.v1", line, ignore.case=TRUE)) > 0) {
version = "v1.0";
headerRowCount = 0;
}
if ( (headerRowCount %% 2 == 1) && (version == "v1.0") ) {
#print("Trying to start a table with line:");
#print(line);
#Get table header
headerFields = unlist(strsplit(line, ":"));
if (!is.na(tableName)) {
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
}
tableName = headerFields[3];
tableHeader = c();
tableRows = c();
columnStarts = c();
}
if (length(grep("^#:GATKTable", line, ignore.case=TRUE)) > 0) {
headerRowCount = headerRowCount+1;
#print("Header Row count is at:")
#print(headerRowCount);
} else if (!is.na(tableName)) {
if ( version == "v1.0") {
if (length(tableHeader) == 0) {
headerChars = unlist(strsplit(line, ""));
# Find the first position of non space characters, excluding the first character
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
}
row = .gsa.splitFixedWidth(line, columnStarts);
}
if (length(tableHeader) == 0) {
tableHeader = row;
} else if ( nchar(line) > 0 ) {
tableRows = rbind(tableRows, row);
}
}
if (length(grep("^#:GATKReport.v1", line, ignore.case=TRUE)) > 0) {
version = "v1.0";
headerRowCount = 0;
}
if (!is.na(tableName)) {
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
if ( (headerRowCount %% 2 == 1) && (version == "v1.0") ) {
#print("Trying to start a table with line:");
#print(line);
#Get table header
headerFields = unlist(strsplit(line, ":"));
if (!is.na(tableName)) {
finishTable()
}
tableName = headerFields[3];
tableHeader = c();
tableRows = NULL
rowCount = 0
columnStarts = c();
}
gatkreport = as.list(tableEnv, all.names=TRUE);
if (length(grep("^#:GATKTable", line, ignore.case=TRUE)) > 0) {
headerRowCount = headerRowCount+1;
#print("Header Row count is at:")
#print(headerRowCount);
} else if (!is.na(tableName)) {
if ( version == "v1.0") {
if (length(tableHeader) == 0) {
headerChars = unlist(strsplit(line, ""));
# Find the first position of non space characters, excluding the first character
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
tableRows = matrix(nrow=nLines, ncol=length(columnStarts)+1);
}
row = .gsa.splitFixedWidth(line, columnStarts);
}
if (length(tableHeader) == 0) {
tableHeader = row;
} else if ( nchar(line) > 0 ) {
rowCount = rowCount + 1
tableRows[rowCount,] <- row
}
}
}
if (!is.na(tableName)) {
finishTable()
}
gatkreport = as.list(tableEnv, all.names=TRUE);
}
# Load all GATKReport tables from a file

View File

@ -1,138 +0,0 @@
titvFPEst <- function(titvExpected, titvObserved) { max(min(1 - (titvObserved - 0.5) / (titvExpected - 0.5), 1), 0.001) }
titvFPEstV <- function(titvExpected, titvs) {
sapply(titvs, function(x) titvFPEst(titvExpected, x))
}
calcHet <- function(nknown, knownTiTv, nnovel, novelTiTv, callable) {
TP <- nknown + (1-titvFPEst(knownTiTv, novelTiTv)) * nnovel
2 * TP / 3 / callable
}
marginalTiTv <- function( nx, titvx, ny, titvy ) {
tvx = nx / (titvx + 1)
tix = nx - tvx
tvy = ny / (titvy + 1)
tiy = ny - tvy
tiz = tix - tiy
tvz = tvx - tvy
return(tiz / tvz)
}
marginaldbSNPRate <- function( nx, dbx, ny, dby ) {
knownx = nx * dbx / 100
novelx = nx - knownx
knowny = ny * dby / 100
novely = ny - knowny
knownz = knownx - knowny
novelz = novelx - novely
return(knownz / ( knownz + novelz ) * 100)
}
numExpectedCalls <- function(L, theta, calledFractionOfRegion, nIndividuals, dbSNPRate) {
nCalls <- L * theta * calledFractionOfRegion * sum(1 / seq(1, 2 * nIndividuals))
return(list(nCalls = nCalls, nKnown = dbSNPRate * nCalls, nNovel = (1-dbSNPRate) * nCalls))
}
normalize <- function(x) {
x / sum(x)
}
normcumsum <- function(x) {
cumsum(normalize(x))
}
cumhist <- function(d, ...) {
plot(d[order(d)], type="b", col="orange", lwd=2, ...)
}
revcumsum <- function(x) {
return(rev(cumsum(rev(x))))
}
phred <- function(x) {
log10(max(x,10^(-9.9)))*-10
}
pOfB <- function(b, B, Q) {
#print(paste(b, B, Q))
p = 1 - 10^(-Q/10)
if ( b == B )
return(p)
else
return(1 - p)
}
pOfG <- function(bs, qs, G) {
a1 = G[1]
a2 = G[2]
log10p = 0
for ( i in 1:length(bs) ) {
b = bs[i]
q = qs[i]
p1 = pOfB(b, a1, q) / 2 + pOfB(b, a2, q) / 2
log10p = log10p + log10(p1)
}
return(log10p)
}
pOfGs <- function(nAs, nBs, Q) {
bs = c(rep("a", nAs), rep("t", nBs))
qs = rep(Q, nAs + nBs)
G1 = c("a", "a")
G2 = c("a", "t")
G3 = c("t", "t")
log10p1 = pOfG(bs, qs, G1)
log10p2 = pOfG(bs, qs, G2)
log10p3 = pOfG(bs, qs, G3)
Qsample = phred(1 - 10^log10p2 / sum(10^(c(log10p1, log10p2, log10p3))))
return(list(p1=log10p1, p2=log10p2, p3=log10p3, Qsample=Qsample))
}
QsampleExpected <- function(depth, Q) {
weightedAvg = 0
for ( d in 1:(depth*3) ) {
Qsample = 0
pOfD = dpois(d, depth)
for ( nBs in 0:d ) {
pOfnB = dbinom(nBs, d, 0.5)
nAs = d - nBs
Qsample = pOfGs(nAs, nBs, Q)$Qsample
#Qsample = 1
weightedAvg = weightedAvg + Qsample * pOfD * pOfnB
print(as.data.frame(list(d=d, nBs = nBs, pOfD=pOfD, pOfnB = pOfnB, Qsample=Qsample, weightedAvg = weightedAvg)))
}
}
return(weightedAvg)
}
plotQsamples <- function(depths, Qs, Qmax) {
cols = rainbow(length(Qs))
plot(depths, rep(Qmax, length(depths)), type="n", ylim=c(0,Qmax), xlab="Average sequencing coverage", ylab="Qsample", main = "Expected Qsample values, including depth and allele sampling")
for ( i in 1:length(Qs) ) {
Q = Qs[i]
y = as.numeric(lapply(depths, function(x) QsampleExpected(x, Q)))
points(depths, y, col=cols[i], type="b")
}
legend("topleft", paste("Q", Qs), fill=cols)
}
pCallHetGivenDepth <- function(depth, nallelesToCall) {
depths = 0:(2*depth)
pNoAllelesToCall = apply(as.matrix(depths),1,function(d) sum(dbinom(0:nallelesToCall,d,0.5)))
dpois(depths,depth)*(1-pNoAllelesToCall)
}
pCallHets <- function(depth, nallelesToCall) {
sum(pCallHetGivenDepth(depth,nallelesToCall))
}
pCallHetMultiSample <- function(depth, nallelesToCall, nsamples) {
1-(1-pCallHets(depth,nallelesToCall))^nsamples
}

View File

@ -47,7 +47,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
/**
* An exception that's occurred in this traversal. If null, no exception has occurred.
*/
private Throwable error = null;
private RuntimeException error = null;
/**
* Queue of incoming shards.
@ -345,22 +345,22 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
return error != null;
}
private synchronized StingException getTraversalError() {
private synchronized RuntimeException getTraversalError() {
if(!hasTraversalErrorOccurred())
throw new ReviewedStingException("User has attempted to retrieve a traversal error when none exists");
// If the error is already a StingException, pass it along as is. Otherwise, wrap it.
if(error instanceof StingException)
return (StingException)error;
else
return new ReviewedStingException("An error occurred during the traversal.",error);
return error;
}
/**
* Allows other threads to notify of an error during traversal.
*/
protected synchronized void notifyOfTraversalError(Throwable error) {
this.error = error;
// If the error is already a Runtime, pass it along as is. Otherwise, wrap it.
if (error instanceof RuntimeException)
this.error = (RuntimeException)error;
else
this.error = new ReviewedStingException("An error occurred during the traversal.", error);
}

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMFormatException;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -23,7 +22,7 @@ public class MalformedBAMErrorReformatingIterator implements CloseableIterator<S
public boolean hasNext() {
try {
return this.it.hasNext();
} catch ( SAMFormatException e ) {
} catch ( RuntimeException e ) { // we need to catch RuntimeExceptions here because the Picard code is throwing them (among SAMFormatExceptions) sometimes
throw new UserException.MalformedBAM(source, e.getMessage());
}
}
@ -31,7 +30,7 @@ public class MalformedBAMErrorReformatingIterator implements CloseableIterator<S
public SAMRecord next() {
try {
return it.next();
} catch ( SAMFormatException e ) {
} catch ( RuntimeException e ) { // we need to catch RuntimeExceptions here because the Picard code is throwing them (among SAMFormatExceptions) sometimes
throw new UserException.MalformedBAM(source, e.getMessage());
}
}

View File

@ -28,10 +28,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintStream;
import java.io.*;
import java.util.Collection;
import java.util.TreeMap;
@ -85,36 +82,32 @@ public class GATKReport {
* @param file the file to load
*/
private void loadReport(File file) {
BufferedReader reader;
String reportHeader;
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
String reportHeader = reader.readLine();
// Read the first line for the version and number of tables.
version = GATKReportVersion.fromHeader(reportHeader);
if (version.equals(GATKReportVersion.V0_1) ||
version.equals(GATKReportVersion.V0_2))
throw new UserException("The GATK no longer supports reading legacy GATK Reports. Please use v1.0 or newer.");
int nTables = Integer.parseInt(reportHeader.split(":")[2]);
// Read each tables according ot the number of tables
for (int i = 0; i < nTables; i++) {
addTable(new GATKReportTable(reader, version));
/*
if ( !blankLine.equals("") ) {
throw new StingException("The GATK Report File is corrupted or not formatted correctly");
}
*/
}
reader = new BufferedReader(new FileReader(file));
reportHeader = reader.readLine();
} catch (FileNotFoundException e) {
throw new ReviewedStingException("Could not open file : " + file);
} catch (IOException e) {
throw new ReviewedStingException("Could not read file : " + file);
}
} catch (Exception e) {
// todo - improve exception handling
//throw new StingException("Cannot read GATKReport: " + e);
e.printStackTrace();
// Read the first line for the version and number of tables.
version = GATKReportVersion.fromHeader(reportHeader);
if (version.equals(GATKReportVersion.V0_1) ||
version.equals(GATKReportVersion.V0_2))
throw new UserException("The GATK no longer supports reading legacy GATK Reports. Please use v1.0 or newer.");
int nTables = Integer.parseInt(reportHeader.split(":")[2]);
// Read each tables according ot the number of tables
for (int i = 0; i < nTables; i++) {
addTable(new GATKReportTable(reader, version));
}
}
/**

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.*;
import java.util.regex.Matcher;
@ -54,85 +55,107 @@ public class GATKReportTable {
private GATKReportColumns columns;
private static final String COULD_NOT_READ_HEADER = "Could not read the header of this file -- ";
private static final String COULD_NOT_READ_COLUMN_NAMES = "Could not read the column names of this file -- ";
private static final String COULD_NOT_READ_DATA_LINE = "Could not read a data line of this table -- ";
private static final String COULD_NOT_READ_EMPTY_LINE = "Could not read the last empty line of this table -- ";
private static final String OLD_GATK_TABLE_VERSION = "We no longer support older versions of the GATK Tables";
public GATKReportTable(BufferedReader reader, GATKReportVersion version) {
try {
int counter = 0;
int counter = 0;
switch (version) {
case V1_0:
int nHeaders = 2;
String[] tableHeaders = new String[nHeaders];
// Read in the headers
for (int i = 0; i < nHeaders; i++) {
switch (version) {
case V1_0:
int nHeaders = 2;
String[] tableHeaders = new String[nHeaders];
// Read in the headers
for (int i = 0; i < nHeaders; i++) {
try {
tableHeaders[i] = reader.readLine();
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_HEADER + e.getMessage());
}
String[] tableData = tableHeaders[0].split(":");
String[] userData = tableHeaders[1].split(":");
// Fill in the fields
tableName = userData[2];
tableDescription = userData[3];
primaryKeyDisplay = Boolean.parseBoolean(tableData[2]);
columns = new GATKReportColumns();
int nColumns = Integer.parseInt(tableData[3]);
int nRows = Integer.parseInt(tableData[4]);
// Read column names
String columnLine = reader.readLine();
List<Integer> columnStarts = TextFormattingUtils.getWordStarts(columnLine);
String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts);
if (primaryKeyDisplay) {
addPrimaryKey(columnNames[0]);
} else {
sortByPrimaryKey = true;
addPrimaryKey("id", false);
counter = 1;
}
String[] tableData = tableHeaders[0].split(":");
String[] userData = tableHeaders[1].split(":");
// Fill in the fields
tableName = userData[2];
tableDescription = (userData.length <= 3) ? "" : userData[3]; // table may have no description! (and that's okay)
primaryKeyDisplay = Boolean.parseBoolean(tableData[2]);
columns = new GATKReportColumns();
int nColumns = Integer.parseInt(tableData[3]);
int nRows = Integer.parseInt(tableData[4]);
// Read column names
String columnLine;
try {
columnLine = reader.readLine();
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_COLUMN_NAMES);
}
List<Integer> columnStarts = TextFormattingUtils.getWordStarts(columnLine);
String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts);
if (primaryKeyDisplay) {
addPrimaryKey(columnNames[0]);
} else {
sortByPrimaryKey = true;
addPrimaryKey("id", false);
counter = 1;
}
// Put in columns using the format string from the header
for (int i = 0; i < nColumns; i++) {
String format = tableData[5 + i];
if (primaryKeyDisplay)
addColumn(columnNames[i + 1], true, format);
else
addColumn(columnNames[i], true, format);
}
for (int i = 0; i < nRows; i++) {
// read line
String dataLine;
try {
dataLine = reader.readLine();
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_DATA_LINE + e.getMessage());
}
// Put in columns using the format string from the header
for (int i = 0; i < nColumns; i++) {
String format = tableData[5 + i];
if (primaryKeyDisplay)
addColumn(columnNames[i + 1], true, format);
else
addColumn(columnNames[i], true, format);
}
for (int i = 0; i < nRows; i++) {
// read line
List<String> lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(reader.readLine(), columnStarts));
for (int columnIndex = 0; columnIndex < nColumns; columnIndex++) {
//Input all the remaining values
GATKReportDataType type = getColumns().getByIndex(columnIndex).getDataType();
if (primaryKeyDisplay) {
String columnName = columnNames[columnIndex + 1];
String primaryKey = lineSplits.get(0);
set(primaryKey, columnName, type.Parse(lineSplits.get(columnIndex + 1)));
} else {
String columnName = columnNames[columnIndex];
set(counter, columnName, type.Parse(lineSplits.get(columnIndex)));
}
List<String> lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(dataLine, columnStarts));
for (int columnIndex = 0; columnIndex < nColumns; columnIndex++) {
//Input all the remaining values
GATKReportDataType type = getColumns().getByIndex(columnIndex).getDataType();
if (primaryKeyDisplay) {
String columnName = columnNames[columnIndex + 1];
String primaryKey = lineSplits.get(0);
set(primaryKey, columnName, type.Parse(lineSplits.get(columnIndex + 1)));
} else {
String columnName = columnNames[columnIndex];
set(counter, columnName, type.Parse(lineSplits.get(columnIndex)));
}
counter++;
}
counter++;
}
try {
reader.readLine();
// When you see empty line or null, quit out
}
} catch (Exception e) {
//throw new StingException("Cannot read GATKReport: " + e);
e.printStackTrace();
} catch (IOException e) {
throw new ReviewedStingException(COULD_NOT_READ_EMPTY_LINE + e.getMessage());
}
break;
default:
throw new ReviewedStingException(OLD_GATK_TABLE_VERSION);
}
}
@ -408,8 +431,7 @@ public class GATKReportTable {
} catch (Exception e) {
}
}
if (column.getDataType().equals(GATKReportDataType.Byte) &&
((String) value).length() == 1) {
if (column.getDataType().equals(GATKReportDataType.Byte) && ((String) value).length() == 1) {
newValue = ((String) value).charAt(0);
}
@ -418,12 +440,11 @@ public class GATKReportTable {
if (newValue != null)
value = newValue;
if (column.getDataType().equals(GATKReportDataType.fromObject(value)) ||
column.getDataType().equals(GATKReportDataType.Unknown) )
// todo -- Types have to be more flexible. For example, %d should accept Integers, Shorts and Bytes.
if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || column.getDataType().equals(GATKReportDataType.Unknown) )
columns.get(columnName).put(primaryKey, value);
else
throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s",
GATKReportDataType.fromObject(value).name(), column.getDataType().name()));
throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", GATKReportDataType.fromObject(value).name(), column.getDataType().name()));
}
/**

View File

@ -31,8 +31,10 @@ public class LowMQ extends InfoFieldAnnotation {
double total = 0;
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() )
{
ReadBackedPileup pileup = sample.getValue().getBasePileup();
for (PileupElement p : pileup )
if ( !sample.getValue().hasBasePileup() )
continue;
for ( PileupElement p : sample.getValue().getBasePileup() )
{
if ( p.getMappingQual() == 0 ) { mq0 += 1; }
if ( p.getMappingQual() <= 10 ) { mq10 += 1; }

View File

@ -2,10 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.BitSetUtils;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.LinkedList;
import java.util.List;
import java.util.*;
/**
* This class provides all the functionality for the BitSet representation of the keys to the hash table of BQSR
@ -30,6 +27,7 @@ import java.util.List;
public class BQSRKeyManager {
private List<RequiredCovariateInfo> requiredCovariates;
private List<OptionalCovariateInfo> optionalCovariates;
private Map<String, Short> covariateNameToIDMap;
private int nRequiredBits; // Number of bits used to represent the required covariates
private int nOptionalBits; // Number of bits used to represent the standard covaraites
@ -48,6 +46,7 @@ public class BQSRKeyManager {
public BQSRKeyManager(List<Covariate> requiredCovariates, List<Covariate> optionalCovariates) {
this.requiredCovariates = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
this.optionalCovariates = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
this.covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
nRequiredBits = 0;
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
@ -57,14 +56,16 @@ public class BQSRKeyManager {
nRequiredBits += nBits;
}
short i = 0;
short id = 0;
nOptionalBits = 0;
for (Covariate optional : optionalCovariates) {
int nBits = optional.numberOfBits(); // number of bits used by this covariate
nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate
BitSet optionalID = BitSetUtils.bitSetFrom(i); // calculate the optional covariate ID for this covariate
BitSet optionalID = BitSetUtils.bitSetFrom(id); // calculate the optional covariate ID for this covariate
this.optionalCovariates.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
i++;
String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
this.covariateNameToIDMap.put(covariateName, id);
id++;
}
nOptionalIDBits = BitSetUtils.numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID
@ -92,7 +93,7 @@ public class BQSRKeyManager {
* @return one key in bitset representation per covariate
*/
public List<BitSet> bitSetsFromAllKeys(BitSet[] allKeys, EventType eventType) {
List<BitSet> allBitSets = new LinkedList<BitSet>(); // Generate one key per optional covariate
List<BitSet> allBitSets = new LinkedList<BitSet>(); // Generate one key per optional covariate
BitSet eventBitSet = BitSetUtils.bitSetFrom(eventType.index); // create a bitset with the event type
int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // Location in the bit set to add the event type bits
@ -147,7 +148,7 @@ public class BQSRKeyManager {
if (optionalCovariates.size() > 0) {
int optionalCovariate = requiredCovariates.size(); // the optional covariate index in the key array
int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's
int covariateID = (Short) key[covariateIDIndex]; // get the optional covariate id
int covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
OptionalCovariateInfo infoOptional = optionalCovariates.get(covariateID); // so we can get the optional covariate information
BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface
@ -162,7 +163,17 @@ public class BQSRKeyManager {
return bitSetKey;
}
/**
* Covariate id can be either the covariate name (String) or the actual id (short). This method
* finds it's type and converts accordingly to the short notation.
*
* @param id the string or short representation of the optional covariate id
* @return the short representation of the optional covariate id.
*/
private short parseCovariateID(Object id) {
return (id instanceof String) ? covariateNameToIDMap.get(id.toString()) : (Short) id;
}
/**
* Generates a key set of objects from a combined bitset key.
@ -185,13 +196,27 @@ public class BQSRKeyManager {
short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short
Covariate covariate = optionalCovariates.get(id).covariate; // get the corresponding optional covariate object
objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set
objectKeys.add(id); // add the covariate id
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
}
objectKeys.add(eventFromBitSet(key)); // add the event type object to the key set
return objectKeys;
}
public List<Covariate> getRequiredCovariates() {
ArrayList<Covariate> list = new ArrayList<Covariate>(requiredCovariates.size());
for (RequiredCovariateInfo info : requiredCovariates)
list.add(info.covariate);
return list;
}
public List<Covariate> getOptionalCovariates() {
ArrayList<Covariate> list = new ArrayList<Covariate>(optionalCovariates.size());
for (OptionalCovariateInfo info : optionalCovariates)
list.add(info.covariate);
return list;
}
/**
* Translates a masked bitset into a bitset starting at 0
*
@ -253,7 +278,6 @@ public class BQSRKeyManager {
return chopNBitsFrom(bitSet, leadingBits);
}
/**
* Aggregate information for each Covariate
*/

View File

@ -188,7 +188,7 @@ public class CycleCovariate implements StandardCovariate {
@Override
public BitSet bitSetFromKey(Object key) {
return BitSetUtils.bitSetFrom((Short) key);
return (key instanceof String) ? BitSetUtils.bitSetFrom(Short.parseShort((String) key)) : BitSetUtils.bitSetFrom((Short) key);
}
@Override

View File

@ -79,8 +79,8 @@ public class QualityScoreCovariate implements RequiredCovariate {
}
@Override
public BitSet bitSetFromKey(Object key) {
return BitSetUtils.bitSetFrom((Byte) key);
public BitSet bitSetFromKey(Object key) {
return (key instanceof String) ? BitSetUtils.bitSetFrom(Byte.parseByte((String) key)) : BitSetUtils.bitSetFrom((Byte) key);
}
@Override

View File

@ -1,11 +1,13 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.BitSetUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
import java.util.regex.Pattern;
/*
* Copyright (c) 2009 The Broad Institute
@ -45,6 +47,10 @@ public class ReadGroupCovariate implements RequiredCovariate {
private final HashMap<String, Short> readGroupLookupTable = new HashMap<String, Short>();
private final HashMap<Short, String> readGroupReverseLookupTable = new HashMap<Short, String>();
private short nextId = 0;
private static final String LANE_TAG = "LN";
private static final String SAMPLE_TAG = "SM";
// Initialize any member variables using the command-line arguments passed to the walkers
@Override
@ -54,14 +60,13 @@ public class ReadGroupCovariate implements RequiredCovariate {
@Override
public CovariateValues getValues(final GATKSAMRecord read) {
final int l = read.getReadLength();
final String readGroupId = read.getReadGroup().getReadGroupId();
BitSet rg = bitSetForReadGroup(readGroupId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset
final String readGroupId = readGroupValueFromRG(read.getReadGroup());
BitSet rg = bitSetForReadGroup(readGroupId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset
BitSet[] readGroups = new BitSet[l];
Arrays.fill(readGroups, rg);
return new CovariateValues(readGroups, readGroups, readGroups);
}
// Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override
public final Object getValue(final String str) {
return str;
@ -77,15 +82,15 @@ public class ReadGroupCovariate implements RequiredCovariate {
return bitSetForReadGroup((String) key);
}
public final String decodeReadGroup(final short id) {
return readGroupReverseLookupTable.get(id);
}
@Override
public int numberOfBits() {
return BitSetUtils.numberOfBitsToRepresent(Short.MAX_VALUE);
}
private String decodeReadGroup(final short id) {
return readGroupReverseLookupTable.get(id);
}
private BitSet bitSetForReadGroup(String readGroupId) {
short shortId;
if (readGroupLookupTable.containsKey(readGroupId))
@ -98,6 +103,35 @@ public class ReadGroupCovariate implements RequiredCovariate {
}
return BitSetUtils.bitSetFrom(shortId);
}
/**
* Gather the sample and lane information from the read group record and return sample.lane
*
* If the bam file is missing the lane information, it tries to use the id regex standardized
* by the Broad Institute to extract the lane information
*
* If it fails to find either of the two pieces of information, will return the read group id instead.
*
* @param rg the read group record
* @return sample.lane or id if information is missing.
*/
private String readGroupValueFromRG(GATKSAMReadGroupRecord rg) {
String lane = rg.getLane(); // take the sample's lane from the read group lane tag
String sample = rg.getSample(); // take the sample's name from the read group sample tag
String value = rg.getId(); // initialize the return value with the read group ID in case we can't find the sample or the lane
if (lane == null) { // if this bam doesn't have the lane annotation in the read group try to take it from the read group id
String [] splitID = rg.getId().split(Pattern.quote("."));
if (splitID.length > 1) // if the id doesn't follow the BROAD defined regex (PU.LANE), fall back to the read group id
lane = splitID[splitID.length - 1]; // take the lane from the readgroup id
}
if (sample != null && lane != null)
value = sample + "." + lane; // the read group covariate is sample.lane (where the inforamtion is available)
return value;
}
}

View File

@ -26,10 +26,15 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import net.sf.samtools.SAMUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.walkers.recalibration.EmpiricalQual;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
@ -37,10 +42,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
* Created by IntelliJ IDEA.
@ -53,18 +55,28 @@ import java.util.Map;
*/
public class RecalDataManager {
public final NestedHashMap nestedHashMap; // The full dataset
private final HashMap<EventType, NestedHashMap> dataCollapsedReadGroup; // Table where everything except read group has been collapsed
private final HashMap<EventType, NestedHashMap> dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
private final HashMap<EventType, ArrayList<NestedHashMap>> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed
public final static String ARGUMENT_REPORT_TABLE_TITLE = "Arguments";
public final static String QUANTIZED_REPORT_TABLE_TITLE = "Quantized";
public final static String READGROUP_REPORT_TABLE_TITLE = "RecalTable0";
public final static String QUALITY_SCORE_REPORT_TABLE_TITLE = "RecalTable1";
public final static String ALL_COVARIATES_REPORT_TABLE_TITLE = "RecalTable2";
public final static String ARGUMENT_VALUE_COLUMN_NAME = "Value";
public final static String QUANTIZED_VALUE_COLUMN_NAME = "QuantizedScore";
public static final String QUANTIZED_COUNT_COLUMN_NAME = "Count";
public final static String READGROUP_COLUMN_NAME = "ReadGroup";
public final static String EVENT_TYPE_COLUMN_NAME = "EventType";
public final static String EMPIRICAL_QUALITY_COLUMN_NAME = "EmpiricalQuality";
public final static String ESTIMATED_Q_REPORTED_COLUMN_NAME = "EstimatedQReported";
public final static String QUALITY_SCORE_COLUMN_NAME = "QualityScore";
public final static String COVARIATE_VALUE_COLUMN_NAME = "CovariateValue";
public final static String COVARIATE_NAME_COLUMN_NAME = "CovariateName";
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
private static boolean warnUserNullPlatform = false;
private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
public enum SOLID_RECAL_MODE {
/**
@ -82,7 +94,20 @@ public class RecalDataManager {
/**
* Look at the color quality scores and probabilistically decide to change the reference inserted base to be the base which is implied by the original color space instead of the reference.
*/
REMOVE_REF_BIAS
REMOVE_REF_BIAS;
public static SOLID_RECAL_MODE recalModeFromString(String recalMode) {
if (recalMode.equals("DO_NOTHING"))
return SOLID_RECAL_MODE.DO_NOTHING;
if (recalMode.equals("SET_Q_ZERO"))
return SOLID_RECAL_MODE.SET_Q_ZERO;
if (recalMode.equals("SET_Q_ZERO_BASE_N"))
return SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N;
if (recalMode.equals("REMOVE_REF_BIAS"))
return SOLID_RECAL_MODE.REMOVE_REF_BIAS;
throw new UserException.BadArgumentValue(recalMode, "is not a valid SOLID_RECAL_MODE value");
}
}
public enum SOLID_NOCALL_STRATEGY {
@ -97,78 +122,125 @@ public class RecalDataManager {
/**
* Mark these reads as failing vendor quality checks so they can be filtered out by downstream analyses.
*/
PURGE_READ
}
PURGE_READ;
public RecalDataManager() {
nestedHashMap = new NestedHashMap();
dataCollapsedReadGroup = null;
dataCollapsedQualityScore = null;
dataCollapsedByCovariate = null;
}
public static SOLID_NOCALL_STRATEGY nocallStrategyFromString(String nocallStrategy) {
if (nocallStrategy.equals("THROW_EXCEPTION"))
return SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
if (nocallStrategy.equals("LEAVE_READ_UNRECALIBRATED"))
return SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED;
if (nocallStrategy.equals("PURGE_READ"))
return SOLID_NOCALL_STRATEGY.PURGE_READ;
public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) {
if (createCollapsedTables) { // Initialize all the collapsed tables, only used by on-the-fly recalibration
nestedHashMap = null;
dataCollapsedReadGroup = new HashMap<EventType, NestedHashMap>();
dataCollapsedQualityScore = new HashMap<EventType, NestedHashMap>();
dataCollapsedByCovariate = new HashMap<EventType, ArrayList<NestedHashMap>>();
for (final EventType errorModel : EventType.values()) {
dataCollapsedReadGroup.put(errorModel, new NestedHashMap());
dataCollapsedQualityScore.put(errorModel, new NestedHashMap());
dataCollapsedByCovariate.put(errorModel, new ArrayList<NestedHashMap>());
for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate
dataCollapsedByCovariate.get(errorModel).add(new NestedHashMap());
}
}
}
else {
nestedHashMap = new NestedHashMap();
dataCollapsedReadGroup = null;
dataCollapsedQualityScore = null;
dataCollapsedByCovariate = null;
throw new UserException.BadArgumentValue(nocallStrategy, "is not a valid SOLID_NOCALL_STRATEGY value");
}
}
public static ReadCovariates covariateKeySetFrom(GATKSAMRecord read) {
return (ReadCovariates) read.getTemporaryAttribute(COVARS_ATTRIBUTE);
}
public static void listAvailableCovariates(Logger logger) {
// Get a list of all available covariates
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
private void checkForSingletons(final Map data) {
// todo -- this looks like it's better just as a data.valueSet() call?
for (Object comp : data.keySet()) {
final Object val = data.get(comp);
if (val instanceof RecalDatum) { // We are at the end of the nested hash maps
if (data.keySet().size() == 1) {
data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done ...
// in a previous step of the sequential calculation model
}
}
else { // Another layer in the nested hash map
checkForSingletons((Map) val);
}
}
// Print and exit if that's what was requested
logger.info("Available covariates:");
for (Class<?> covClass : covariateClasses)
logger.info(covClass.getSimpleName());
logger.info("");
}
/**
* Get the appropriate collapsed table out of the set of all the tables held by this Object
*
* @param covariate Which covariate indexes the desired collapsed HashMap
* @return The desired collapsed HashMap
* Generates two lists : required covariates and optional covariates based on the user's requests.
*
* Performs the following tasks in order:
* 1. Adds all requierd covariates in order
* 2. Check if the user asked to use the standard covariates and adds them all if that's the case
* 3. Adds all covariates requested by the user that were not already added by the two previous steps
*
* @param argumentCollection the argument collection object for the recalibration walker
* @return a pair of ordered lists : required covariates (first) and optional covariates (second)
*/
public final NestedHashMap getCollapsedTable(final int covariate, final EventType errorModel) {
if (covariate == 0) {
return dataCollapsedReadGroup.get(errorModel); // Table where everything except read group has been collapsed
}
else if (covariate == 1) {
return dataCollapsedQualityScore.get(errorModel); // Table where everything except read group and quality score has been collapsed
}
else {
return dataCollapsedByCovariate.get(errorModel).get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed
public static Pair<ArrayList<Covariate>, ArrayList<Covariate>> initializeCovariates(RecalibrationArgumentCollection argumentCollection) {
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
if (argumentCollection.USE_STANDARD_COVARIATES)
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified
for (String requestedCovariateString : argumentCollection.COVARIATES) {
boolean foundClass = false;
for (Class<? extends Covariate> covClass : covariateClasses) {
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
foundClass = true;
if (!requiredClasses.contains(covClass) &&
(!argumentCollection.USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
try {
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
optionalCovariates.add(covariate);
} catch (Exception e) {
throw new DynamicClassResolutionException(covClass, e);
}
}
}
}
if (!foundClass) {
throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
}
}
}
return new Pair<ArrayList<Covariate>, ArrayList<Covariate>>(requiredCovariates, optionalCovariates);
}
/**
* Initializes the recalibration table -> key manager map
*
* @param requiredCovariates list of required covariates (in order)
* @param optionalCovariates list of optional covariates (in order)
* @return a map with each key manager and it's corresponding recalibration table properly initialized
*/
public static LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> initializeTables(ArrayList<Covariate> requiredCovariates, ArrayList<Covariate> optionalCovariates) {
final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> tablesAndKeysMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>>();
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
for (Covariate covariate : requiredCovariates) {
requiredCovariatesToAdd.add(covariate);
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively)
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
}
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
return tablesAndKeysMap;
}
/**
* Initializes the table -> key manager map (unfortunate copy of the above code with minor modifications to accomodate the different return types (RecalDatum vs EmpiricalQual objects)
*
* @param requiredCovariates list of required covariates (in order)
* @param optionalCovariates list of optional covariates (in order)
* @return a map with each key manager and it's corresponding recalibration table properly initialized
*/
public static LinkedHashMap<BQSRKeyManager, Map<BitSet, EmpiricalQual>> initializeEmpiricalTables(ArrayList<Covariate> requiredCovariates, ArrayList<Covariate> optionalCovariates) {
final LinkedHashMap<BQSRKeyManager, Map<BitSet, EmpiricalQual>> tablesAndKeysMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, EmpiricalQual>>();
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
for (Covariate covariate : requiredCovariates) {
requiredCovariatesToAdd.add(covariate);
final Map<BitSet, EmpiricalQual> recalTable = new HashMap<BitSet, EmpiricalQual>(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively)
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
}
final Map<BitSet, EmpiricalQual> recalTable = new HashMap<BitSet, EmpiricalQual>(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
return tablesAndKeysMap;
}
/**
* Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
*
@ -526,8 +598,9 @@ public class RecalDataManager {
*
* @param read The read for which to compute covariate values.
* @param requestedCovariates The list of requested covariates.
* @return a matrix with all the covariates calculated for every base in the read
*/
public static void computeCovariates(final GATKSAMRecord read, final List<Covariate> requestedCovariates) {
public static ReadCovariates computeCovariates(final GATKSAMRecord read, final List<Covariate> requestedCovariates) {
final int numRequestedCovariates = requestedCovariates.size();
final int readLength = read.getReadLength();
final ReadCovariates readCovariates = new ReadCovariates(readLength, numRequestedCovariates);
@ -536,7 +609,7 @@ public class RecalDataManager {
for (Covariate covariate : requestedCovariates)
readCovariates.addCovariate(covariate.getValues(read));
read.setTemporaryAttribute(COVARS_ATTRIBUTE, readCovariates);
return readCovariates;
}
/**
@ -613,4 +686,42 @@ public class RecalDataManager {
return base;
}
}
/**
* Adds the required covariates to a covariate list
*
* Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand.
*
* @param classes list of classes to add to the covariate list
* @return the covariate list
*/
private static ArrayList<Covariate> addRequiredCovariatesToList(List<Class<? extends RequiredCovariate>> classes) {
ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
if (classes.size() != 2)
throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next.
dest.add(new QualityScoreCovariate());
return dest;
}
/**
* Adds the standard covariates to a covariate list
*
* @param classes list of classes to add to the covariate list
* @return the covariate list
*/
private static ArrayList<Covariate> addStandardCovariatesToList(List<Class<? extends StandardCovariate>> classes) {
ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
for (Class<?> covClass : classes) {
try {
final Covariate covariate = (Covariate) covClass.newInstance();
dest.add(covariate);
} catch (Exception e) {
throw new DynamicClassResolutionException(covClass, e);
}
}
return dest;
}
}

View File

@ -87,6 +87,10 @@ public class RecalDatum extends RecalDatumOptimized {
public final void calcCombinedEmpiricalQuality(final int smoothing, final int maxQual) {
this.empiricalQuality = empiricalQualDouble(smoothing, maxQual); // cache the value so we don't call log over and over again
}
public final void calcEstimatedReportedQuality() {
this.estimatedQReported = -10 * Math.log10(calcExpectedErrors() / (double) numObservations);
}
//---------------------------------------------------------------------------------------------------------------
//

View File

@ -50,7 +50,7 @@ public class RecalibrationArgumentCollection {
* Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument.
*/
@Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false)
protected List<RodBinding<Feature>> knownSites = Collections.emptyList();
public List<RodBinding<Feature>> knownSites = Collections.emptyList();
/**
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
@ -60,25 +60,25 @@ public class RecalibrationArgumentCollection {
*/
@Gather(BQSRGatherer.class)
@Output
protected PrintStream RECAL_FILE;
public PrintStream RECAL_FILE;
/**
* List all implemented covariates.
*/
@Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
protected boolean LIST_ONLY = false;
public boolean LIST_ONLY = false;
/**
* Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you. See the list of covariates with -list.
*/
@Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false)
protected String[] COVARIATES = null;
public String[] COVARIATES = null;
/*
* Use the standard set of covariates in addition to the ones listed using the -cov argument
*/
@Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false)
protected boolean USE_STANDARD_COVARIATES = true;
public boolean USE_STANDARD_COVARIATES = true;
/////////////////////////////
// Debugging-only Arguments
@ -88,7 +88,7 @@ public class RecalibrationArgumentCollection {
*/
@Hidden
@Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
protected boolean RUN_WITHOUT_DBSNP = false;
public boolean RUN_WITHOUT_DBSNP = false;
/**
* CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
@ -152,6 +152,9 @@ public class RecalibrationArgumentCollection {
@Hidden
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null;
@Hidden
@Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output")
public int QUANTIZING_LEVELS = 16;
}

View File

@ -240,6 +240,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
refAllele = Allele.create(refBases, true);
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
}
else continue; // don't go on with this allele if refBases are non-standard
} else {
// insertion case
if (Allele.acceptableAlleleBases(s)) {
@ -247,6 +248,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
altAllele = Allele.create(s, false);
stop = loc.getStart();
}
else continue; // go on to next allele if consensus insertion has any non-standard base.
}

View File

@ -36,8 +36,10 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.HasGenomeLocation;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -121,8 +123,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
public int MIN_MAPPING_QUALITY_SCORE = 20;
@Argument(fullName = "sampleToPhase", shortName = "sampleToPhase", doc = "Only include these samples when phasing", required = false)
protected Set
<String> samplesToPhase = null;
protected Set<String> samplesToPhase = null;
@Hidden
@Argument(fullName = "permitNoSampleOverlap", shortName = "permitNoSampleOverlap", doc = "Don't exit (just WARN) when the VCF and BAMs do not overlap in samples", required = false)
private boolean permitNoSampleOverlap = false;
@Argument(fullName = "respectPhaseInInput", shortName = "respectPhaseInInput", doc = "Will only phase genotypes in cases where the resulting output will necessarily be consistent with any existing phase (for example, from trios)", required = false)
private boolean respectPhaseInInput = false;
private GenomeLoc mostDownstreamLocusReached = null;
@ -205,8 +213,18 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
// todo -- fix samplesToPhase
String trackName = variantCollection.variants.getName();
Map<String, VCFHeader> rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
Set<String> samples = new TreeSet<String>(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase);
writer.writeHeader(new VCFHeader(hInfo, samples));
Set<String> vcfSamples = new TreeSet<String>(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase);
writer.writeHeader(new VCFHeader(hInfo, vcfSamples));
Set<String> readSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
readSamples.retainAll(vcfSamples);
if (readSamples.isEmpty()) {
String noPhaseString = "No common samples in VCF and BAM headers" + (samplesToPhase == null ? "" : " (limited to sampleToPhase parameters)") + ", so nothing could possibly be phased!";
if (permitNoSampleOverlap)
logger.warn(noPhaseString);
else
throw new UserException(noPhaseString);
}
}
public boolean generateExtendedEvents() {
@ -472,6 +490,13 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
private int phasingSiteIndex = -1;
private Map<String, PhasingRead> readsAtHetSites = null;
private void clearFields() {
hetGenotypes = null;
prevHetAndInteriorIt = null;
phasingSiteIndex = -1;
readsAtHetSites = null;
}
public boolean hasPreviousHets() {
return phasingSiteIndex > 0;
}
@ -498,12 +523,20 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
}
phasingSiteIndex = listHetGenotypes.size();
if (phasingSiteIndex == 0) { // no previous sites against which to phase
hetGenotypes = null;
prevHetAndInteriorIt = null;
clearFields();
return;
}
prevHetAndInteriorIt.previous(); // so that it points to the previous het site [and NOT one after it, due to the last call to next()]
if (respectPhaseInInput) {
Genotype prevHetGenotype = prevHetAndInteriorIt.clone().next().unfinishedVariant.getGenotype(sample);
if (!prevHetGenotype.isPhased()) {
// Make this genotype unphaseable, since its previous het is not already phased [as required by respectPhaseInInput]:
clearFields();
return;
}
}
// Add the (het) position to be phased:
GenomeLoc phaseLocus = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vr.variant);
GenotypeAndReadBases grbPhase = new GenotypeAndReadBases(vr.variant.getGenotype(sample), vr.sampleReadBases.get(sample), phaseLocus);

View File

@ -0,0 +1,72 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
/**
* a walker that simply throws errors. Allows us to test that the engine is behaving as expected with error handling
*/
public class ErrorThrowingWalker extends RodWalker<Integer,Integer> implements TreeReducible<Integer> {
@Input(fullName="exception", shortName = "E", doc="Java class of exception to throw", required=true)
public String exceptionToThrow;
//
// Template code to allow us to build the walker, doesn't actually do anything
//
@Override
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( exceptionToThrow.equals("UserException") ) {
throw new UserException("UserException");
} else if ( exceptionToThrow.equals("NullPointerException") ) {
throw new NullPointerException();
} else if ( exceptionToThrow.equals("ReviewedStingException") ) {
throw new ReviewedStingException("ReviewedStingException");
} else {
throw new UserException.BadArgumentValue("exception", "exception isn't a recognized value " + exceptionToThrow);
}
}
@Override
public Integer reduceInit() {
return 0;
}
@Override
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
public Integer treeReduce(final Integer lhs, final Integer rhs) {
return lhs + rhs;
}
}

View File

@ -0,0 +1,55 @@
package org.broadinstitute.sting.gatk.walkers.recalibration;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* Created by IntelliJ IDEA.
* User: carneiro
* Date: Mar 22, 2012
*
* Object that holds the empirical quality and estimated reported quality values for on-the-fly recalibration. This is a simplification of the RecalDatum object
*/
public class EmpiricalQual {
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
private EmpiricalQual() {}
public EmpiricalQual(final double estimatedQReported, final double empiricalQuality) {
this.estimatedQReported = estimatedQReported;
this.empiricalQuality = empiricalQuality;
}
public final double getEstimatedQReported() {
return estimatedQReported;
}
public final double getEmpiricalQuality() {
return empiricalQuality;
}
}

View File

@ -93,6 +93,7 @@ import java.util.*;
*/
@Reference(window=@Window(start=-50, stop=50))
public class VariantEvalWalker extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
public static final String IS_SINGLETON_KEY = "ISSINGLETON";
@Output
protected PrintStream out;
@ -213,6 +214,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// Public constants
private static String ALL_SAMPLE_NAME = "all";
// the number of processed bp for this walker
long nProcessedLoci = 0;
// Utility class
private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this);
@ -325,10 +329,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
*/
@Override
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( NewEvaluationContext nec : evaluationContexts.values() ) {
synchronized (nec) {
nec.update0(tracker, ref, context);
}
// we track the processed bp and expose this for modules instead of wasting CPU power on calculating
// the same thing over and over in evals that want the processed bp
synchronized (this) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
if (tracker != null) {
@ -454,7 +458,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
if ( lenientMatch == null ) lenientMatch = comp;
break;
case NO_MATCH:
;
// do nothing
}
}
@ -494,7 +498,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
if (field.get(ve) instanceof TableType) {
TableType t = (TableType) field.get(ve);
String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
final String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
final DataPoint dataPointAnn = datamap.get(field);
GATKReportTable table;
@ -509,17 +513,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
table.addColumn(vs.getName(), "unknown");
}
table.addColumn("row", "unknown");
for ( Object o : t.getColumnKeys() ) {
String c;
if (o instanceof String) {
c = (String) o;
} else {
c = o.toString();
}
table.addColumn(t.getRowName(), "unknown");
for ( final Object o : t.getColumnKeys() ) {
final String c = o.toString();
table.addColumn(c, 0.0);
}
} else {
@ -527,7 +524,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
}
for (int row = 0; row < t.getRowKeys().length; row++) {
String r = (String) t.getRowKeys()[row];
final String r = t.getRowKeys()[row].toString();
for ( VariantStratifier vs : stratificationObjects ) {
final String columnName = vs.getName();
@ -535,17 +532,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
}
for (int col = 0; col < t.getColumnKeys().length; col++) {
String c;
if (t.getColumnKeys()[col] instanceof String) {
c = (String) t.getColumnKeys()[col];
} else {
c = t.getColumnKeys()[col].toString();
}
String newStateKey = stateKey.toString() + r;
final String c = t.getColumnKeys()[col].toString();
final String newStateKey = stateKey.toString() + r;
table.set(newStateKey, c, t.getCell(row, col));
table.set(newStateKey, "row", r);
table.set(newStateKey, t.getRowName(), r);
}
}
} else {
@ -594,6 +584,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
public Set<SortableJexlVCMatchExp> getJexlExpressions() { return jexlExpressions; }
public long getnProcessedLoci() {
return nProcessedLoci;
}
public Set<String> getContigNames() {
final TreeSet<String> contigs = new TreeSet<String>();
for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) {

View File

@ -30,7 +30,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
@DataPoint(description = "Number of variants per base", format = "%.8f")
public double variantRatePerBp = 0;
@DataPoint(description = "Number of snp loci", format = "%d")
public long nSNPs = 0;
@DataPoint(description = "Number of mnp loci", format = "%d")
@ -47,7 +46,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
@DataPoint(description = "Number of mixed loci (loci that can't be classified as a SNP, Indel or MNP)", format = "%d")
public long nMixed = 0;
@DataPoint(description = "Number of no calls loci", format = "%d")
public long nNoCalls = 0;
@DataPoint(description = "Number of het loci", format = "%d")
@ -72,8 +70,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
public double indelRate = 0;
@DataPoint(description = "indel rate per base pair", format = "%.2f")
public double indelRatePerBp = 0;
@DataPoint(description = "deletion to insertion ratio", format = "%.2f")
public double deletionInsertionRatio = 0;
@DataPoint(description = "insertion to deletion ratio", format = "%.2f")
public double insertionDeletionRatio = 0;
private double perLocusRate(long n) {
return rate(n, nProcessedLoci);
@ -91,10 +89,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
return 1; // we only need to see each eval track
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nCalledLoci++;
@ -113,12 +107,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
case SNP:
nVariantLoci++;
nSNPs++;
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
if (variantWasSingleton(vc1)) nSingletons++;
break;
case MNP:
nVariantLoci++;
nMNPs++;
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
if (variantWasSingleton(vc1)) nSingletons++;
break;
case INDEL:
nVariantLoci++;
@ -194,6 +188,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
}
public void finalizeEvaluation() {
nProcessedLoci = getWalker().getnProcessedLoci();
variantRate = perLocusRate(nVariantLoci);
variantRatePerBp = perLocusRInverseRate(nVariantLoci);
heterozygosity = perLocusRate(nHets);
@ -201,6 +196,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
hetHomRatio = ratio(nHets, nHomVar);
indelRate = perLocusRate(nDeletions + nInsertions + nComplex);
indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions + nComplex);
deletionInsertionRatio = ratio(nDeletions, nInsertions);
insertionDeletionRatio = ratio(nInsertions, nDeletions);
}
}

View File

@ -59,7 +59,7 @@ public class GenotypeConcordance extends VariantEvaluator {
private boolean discordantInteresting = false;
static class FrequencyStats implements TableType {
static class FrequencyStats extends TableType {
class Stats {
public Stats(int found, int missed) { nFound = found; nMissed = missed; }
public long nFound = 0;
@ -103,7 +103,7 @@ public class GenotypeConcordance extends VariantEvaluator {
}
}
static class QualityScoreHistograms implements TableType {
static class QualityScoreHistograms extends TableType {
final static int NUM_BINS = 20;
final HashMap<Integer,Integer> truePositiveQualityScoreMap = new HashMap<Integer,Integer>(); // A HashMap holds all the quality scores until we are able to bin them appropriately
final HashMap<Integer,Integer> falsePositiveQualityScoreMap = new HashMap<Integer,Integer>();
@ -362,7 +362,7 @@ public class GenotypeConcordance extends VariantEvaluator {
/**
* a table of sample names to genotype concordance figures
*/
class SampleStats implements TableType {
class SampleStats extends TableType {
private final int nGenotypeTypes;
// sample to concordance stats object
@ -448,7 +448,7 @@ class SampleStats implements TableType {
/**
* a table of sample names to genotype concordance summary statistics
*/
class SampleSummaryStats implements TableType {
class SampleSummaryStats extends TableType {
protected final static String ALL_SAMPLES_KEY = "allSamples";
protected final static String[] COLUMN_KEYS = new String[]{
"percent_comp_ref_called_ref",

View File

@ -60,6 +60,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
double minPhaseQuality = 10.0;
public void initialize(VariantEvalWalker walker) {
super.initialize(walker);
this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality());
this.samplePrevGenotypes = new SamplePreviousGenotypes();
}
@ -294,14 +295,6 @@ class CompEvalGenotypes {
public Genotype getEvalGenotype() {
return evalGt;
}
public void setCompGenotype(Genotype compGt) {
this.compGt = compGt;
}
public void setEvalGenotype(Genotype evalGt) {
this.evalGt = evalGt;
}
}
class SamplePreviousGenotypes {
@ -376,7 +369,7 @@ class PhaseStats {
/**
* a table of sample names to genotype phasing statistics
*/
class SamplePhasingStatistics implements TableType {
class SamplePhasingStatistics extends TableType {
private HashMap<String, PhaseStats> sampleStats = null;
private double minPhaseQuality;

View File

@ -1,111 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
/**
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
*
* @Author chartl
* @Date May 26, 2010
*/
@Analysis(name = "Indel length histograms", description = "Shows the distribution of insertion/deletion event lengths (negative for deletion, positive for insertion)")
public class IndelLengthHistogram extends VariantEvaluator {
private static final int SIZE_LIMIT = 100;
@DataPoint(description="Histogram of indel lengths")
IndelHistogram indelHistogram = new IndelHistogram(SIZE_LIMIT);
/*
* Indel length histogram table object
*/
static class IndelHistogram implements TableType {
private Integer[] colKeys;
private int limit;
private String[] rowKeys = {"EventLength"};
private Integer[] indelHistogram;
public IndelHistogram(int limit) {
colKeys = initColKeys(limit);
indelHistogram = initHistogram(limit);
this.limit = limit;
}
public Object[] getColumnKeys() {
return colKeys;
}
public Object[] getRowKeys() {
return rowKeys;
}
public Object getCell(int row, int col) {
return indelHistogram[col];
}
private Integer[] initColKeys(int size) {
Integer[] cK = new Integer[size*2+1];
int index = 0;
for ( int i = -size; i <= size; i ++ ) {
cK[index] = i;
index++;
}
return cK;
}
private Integer[] initHistogram(int size) {
Integer[] hist = new Integer[size*2+1];
for ( int i = 0; i < 2*size+1; i ++ ) {
hist[i] = 0;
}
return hist;
}
public String getName() { return "indelHistTable"; }
public void update(int eLength) {
indelHistogram[len2index(eLength)]++;
}
private int len2index(int len) {
if ( len > limit || len < -limit ) {
throw new ReviewedStingException("Indel length exceeds limit of "+limit+" please increase indel limit size");
}
return len + limit;
}
}
public boolean enabled() { return true; }
public String getName() { return "IndelLengthHistogram"; }
public int getComparisonOrder() { return 1; } // need only the evals
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc1.isIndel() && vc1.isPolymorphicInSamples() ) {
if ( ! vc1.isBiallelic() ) {
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
return vc1.toString(); // biallelic sites are output
}
// only count simple insertions/deletions, not complex indels
if ( vc1.isSimpleInsertion() ) {
indelHistogram.update(vc1.getAlternateAllele(0).length());
} else if ( vc1.isSimpleDeletion() ) {
indelHistogram.update(-vc1.getReference().length());
}
}
return null;
}
}

View File

@ -1,295 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
import org.broadinstitute.sting.utils.IndelUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
@Analysis(name = "IndelStatistics", description = "Shows various indel metrics and statistics")
public class IndelStatistics extends VariantEvaluator {
@DataPoint(description = "Indel Statistics")
IndelStats indelStats = null;
// @DataPoint(description = "Indel Classification")
IndelClasses indelClasses = null;
int numSamples = 0;
public void initialize(VariantEvalWalker walker) {
numSamples = walker.getNumSamples();
}
private static final int INDEL_SIZE_LIMIT = 100;
private static final int IND_HET = 0;
private static final int IND_INS = 1;
private static final int IND_DEL = 2;
private static final int IND_COMPLEX = 3;
private static final int IND_HET_INS = 4;
private static final int IND_HOM_INS = 5;
private static final int IND_HET_DEL = 6;
private static final int IND_HOM_DEL = 7;
private static final int IND_HOM_REF = 8;
private static final int IND_MIXED = 9;
private static final int IND_LONG = 10;
private static final int IND_AT_EXP = 11;
private static final int IND_CG_EXP = 12;
private static final int IND_FRAMESHIFT = 13;
private static final int NUM_SCALAR_COLUMNS = 14;
static int len2Index(int ind) {
return ind+INDEL_SIZE_LIMIT+NUM_SCALAR_COLUMNS;
}
static int index2len(int ind) {
return ind-INDEL_SIZE_LIMIT-NUM_SCALAR_COLUMNS;
}
static class IndelStats implements TableType {
protected final static String[] COLUMN_KEYS;
static {
COLUMN_KEYS= new String[NUM_SCALAR_COLUMNS+2*INDEL_SIZE_LIMIT+1];
COLUMN_KEYS[0] = "heterozygosity";
COLUMN_KEYS[1] = "insertions";
COLUMN_KEYS[2] = "deletions";
COLUMN_KEYS[3] = "complex";
COLUMN_KEYS[4] = "het_insertions";
COLUMN_KEYS[5] = "homozygous_insertions";
COLUMN_KEYS[6] = "het_deletions";
COLUMN_KEYS[7] = "homozygous_deletions";
COLUMN_KEYS[8] = "homozygous_reference_sites";
COLUMN_KEYS[9] = "complex_events";
COLUMN_KEYS[10] = "long_indels";
COLUMN_KEYS[11] = "AT_expansions";
COLUMN_KEYS[12] = "CG_expansions";
COLUMN_KEYS[13] = "frameshift_indels";
for (int k=NUM_SCALAR_COLUMNS; k < NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT+1; k++)
COLUMN_KEYS[k] = "indel_size_len"+Integer.valueOf(index2len(k));
}
// map of sample to statistics
protected final int[] indelSummary;
public IndelStats(final VariantContext vc) {
indelSummary = new int[COLUMN_KEYS.length];
}
/**
*
* @return one row per sample
*/
public Object[] getRowKeys() {
return new String[]{"all"};
}
public Object getCell(int x, int y) {
return String.format("%d",indelSummary[y]);
}
/**
* get the column keys
* @return a list of objects, in this case strings, that are the column names
*/
public Object[] getColumnKeys() {
return COLUMN_KEYS;
}
public String getName() {
return "IndelStats";
}
public int getComparisonOrder() {
return 1; // we only need to see each eval track
}
public String toString() {
return getName();
}
/*
* increment the specified value
*/
public void incrValue(VariantContext vc, ReferenceContext ref) {
int eventLength = 0;
boolean isInsertion = false, isDeletion = false;
if ( vc.isSimpleInsertion() ) {
eventLength = vc.getAlternateAllele(0).length();
indelSummary[IND_INS]++;
isInsertion = true;
} else if ( vc.isSimpleDeletion() ) {
indelSummary[IND_DEL]++;
eventLength = -vc.getReference().length();
isDeletion = true;
}
else if (vc.isComplexIndel()) {
indelSummary[IND_COMPLEX]++;
}
else if (vc.isMixed())
indelSummary[IND_MIXED]++;
if (IndelUtils.isATExpansion(vc,ref))
indelSummary[IND_AT_EXP]++;
if (IndelUtils.isCGExpansion(vc,ref))
indelSummary[IND_CG_EXP]++;
// make sure event doesn't overstep array boundaries
if (vc.isSimpleDeletion() || vc.isSimpleInsertion()) {
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT) {
indelSummary[len2Index(eventLength)]++;
if (eventLength % 3 != 0)
indelSummary[IND_FRAMESHIFT]++;
}
else
indelSummary[IND_LONG]++;
}
}
}
static class IndelClasses implements TableType {
protected final static String[] columnNames = IndelUtils.getIndelClassificationNames();
// map of sample to statistics
protected final int[] indelClassSummary;
public IndelClasses(final VariantContext vc) {
indelClassSummary = new int[columnNames.length];
}
/**
*
* @return one row per sample
*/
public Object[] getRowKeys() {
return new String[]{"all"};
}
public Object getCell(int x, int y) {
return String.format("%d",indelClassSummary[y]);
}
/**
* get the column keys
* @return a list of objects, in this case strings, that are the column names
*/
public Object[] getColumnKeys() {
return columnNames;
}
public String getName() {
return "IndelClasses";
}
public int getComparisonOrder() {
return 1; // we only need to see each eval track
}
public String toString() {
return getName();
}
private void incrementSampleStat(VariantContext vc, int index) {
indelClassSummary[index]++;
}
/*
* increment the specified value
*/
public void incrValue(VariantContext vc, ReferenceContext ref) {
ArrayList<Integer> indices = IndelUtils.findEventClassificationIndex(vc,ref);
//System.out.format("pos:%d \nREF: %s, ALT: %s\n",vc.getStart(), vc.getReference().getDisplayString(),
// vc.getAlternateAllele(0).getDisplayString());
byte[] refBases = ref.getBases();
//System.out.format("ref bef:%s\n",new String(Arrays.copyOfRange(refBases,0,refBases.length/2+1) ));
//System.out.format("ref aft:%s\n",new String(Arrays.copyOfRange(refBases,refBases.length/2+1,refBases.length) ));
for (int index: indices) {
incrementSampleStat(vc, index);
// System.out.println(IndelUtils.getIndelClassificationName(index));
}
}
}
//public IndelStatistics(VariantEvalWalker parent) {
//super(parent);
// don't do anything
//}
public String getName() {
return "IndelStatistics";
}
public int getComparisonOrder() {
return 1; // we only need to see each eval track
}
public boolean enabled() {
return true;
}
public String toString() {
return getName();
}
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (eval != null && eval.isPolymorphicInSamples()) {
if ( indelStats == null ) {
indelStats = new IndelStats(eval);
}
if ( indelClasses == null ) {
indelClasses = new IndelClasses(eval);
}
if ( eval.isIndel() || eval.isMixed() ) {
if (indelStats != null )
indelStats.incrValue(eval, ref);
if (indelClasses != null)
indelClasses.incrValue(eval, ref);
}
}
return null; // This module doesn't capture any interesting sites, so return null
}
public void finalizeEvaluation() {
int k=0;
}
}

View File

@ -0,0 +1,230 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.IndelHistogram;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@Analysis(description = "Evaluation summary for indels")
public class IndelSummary extends VariantEvaluator implements StandardEval {
final protected static Logger logger = Logger.getLogger(IndelSummary.class);
@DataPoint(description = "Number of SNPs", format = "%d")
public int n_SNPs = 0;
@DataPoint(description = "Number of singleton SNPs", format = "%d")
public int n_singleton_SNPs = 0;
@DataPoint(description = "Number of Indels", format = "%d")
public int n_indels = 0;
// Number of Indels Sites (counts one for any number of alleles at site)
public int nIndelSites = 0;
@DataPoint(description = "Number of singleton Indels", format = "%d")
public int n_singleton_indels = 0;
// counts 1 for each site where the number of alleles > 2
public int nMultiIndelSites = 0;
@DataPoint(description = "Percent of indel sites that are multi-allelic")
public String percent_of_sites_with_more_than_2_alleles;
@DataPoint(description = "SNP to indel ratio")
public String SNP_to_indel_ratio;
@DataPoint(description = "Singleton SNP to indel ratio")
public String SNP_to_indel_ratio_for_singletons;
@DataPoint(description = "Indel novelty rate")
public String indel_novelty_rate;
@DataPoint(description = "1 to 2 bp indel ratio")
public String ratio_of_1_to_2_bp_indels;
@DataPoint(description = "1 to 3 bp indel ratio")
public String ratio_of_1_to_3_bp_indels;
@DataPoint(description = "2 to 3 bp indel ratio")
public String ratio_of_2_to_3_bp_indels;
@DataPoint(description = "1 and 2 to 3 bp indel ratio")
public String ratio_of_1_and_2_to_3_bp_indels;
@DataPoint(description = "Frameshift percent")
public String frameshift_rate_for_coding_indels;
//
// insertions to deletions
//
@DataPoint(description = "Insertion to deletion ratio")
public String insertion_to_deletion_ratio;
@DataPoint(description = "Insertion to deletion ratio for 1 bp events")
public String insertion_to_deletion_ratio_for_1bp_indels;
//
// Frameshifts
//
@DataPoint(description = "Number of indels in protein-coding regions labeled as frameshift")
public int n_coding_indels_frameshifting = 0;
@DataPoint(description = "Number of indels in protein-coding regions not labeled as frameshift")
public int n_coding_indels_in_frame = 0;
//
// Het : hom ratios
//
@DataPoint(description = "Het to hom ratio for SNPs")
public String SNP_het_to_hom_ratio;
@DataPoint(description = "Het to hom ratio for indels")
public String indel_het_to_hom_ratio;
int nSNPHets = 0, nSNPHoms = 0, nIndelHets = 0, nIndelHoms = 0;
int nKnownIndels = 0, nInsertions = 0;
int n1bpInsertions = 0, n1bpDeletions = 0;
int[] countByLength = new int[]{0, 0, 0, 0}; // note that the first element isn't used
public final static int MAX_SIZE_FOR_HISTOGRAM = 10;
@DataPoint(description = "Histogram of indel lengths")
IndelHistogram lengthHistogram = new IndelHistogram(MAX_SIZE_FOR_HISTOGRAM, true);
@DataPoint(description = "Number of large (>10 bp) deletions")
public int n_large_deletions = 0;
@DataPoint(description = "Number of large (>10 bp) insertions")
public int n_large_insertions = 0;
@DataPoint(description = "Ratio of large (>10 bp) insertions to deletions")
public String insertion_to_deletion_ratio_for_large_indels;
@Override public boolean enabled() { return true; }
@Override public int getComparisonOrder() { return 2; }
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( eval == null || eval.isMonomorphicInSamples() )
return null;
// update counts
switch ( eval.getType() ) {
case SNP:
n_SNPs += eval.getNAlleles() - 1; // -1 for ref
if ( variantWasSingleton(eval) ) n_singleton_SNPs++;
// collect information about het / hom ratio
for ( final Genotype g : eval.getGenotypes() ) {
if ( g.isHet() ) nSNPHets++;
if ( g.isHomVar() ) nSNPHoms++;
}
break;
case INDEL:
if ( eval.isComplexIndel() ) break; // don't count complex substitutions
nIndelSites++;
if ( ! eval.isBiallelic() ) nMultiIndelSites++;
if ( variantWasSingleton(eval) ) n_singleton_indels++;
// collect information about het / hom ratio
for ( final Genotype g : eval.getGenotypes() ) {
if ( g.isHet() ) nIndelHets++;
if ( g.isHomVar() ) nIndelHoms++;
}
for ( Allele alt : eval.getAlternateAlleles() ) {
n_indels++; // +1 for each alt allele
if ( comp != null ) nKnownIndels++; // TODO -- make this test allele specific?
// ins : del ratios
final int alleleSize = alt.length() - eval.getReference().length();
if ( alleleSize == 0 ) throw new ReviewedStingException("Allele size not expected to be zero for indel: alt = " + alt + " ref = " + eval.getReference());
if ( alleleSize > 0 ) nInsertions++;
if ( alleleSize == 1 ) n1bpInsertions++;
if ( alleleSize == -1 ) n1bpDeletions++;
// update the length histogram
lengthHistogram.update(eval.getReference(), alt);
// requires snpEFF annotations
if ( eval.getAttributeAsString("SNPEFF_GENE_BIOTYPE", "missing").equals("protein_coding") ) {
final String effect = eval.getAttributeAsString("SNPEFF_EFFECT", "missing");
if ( effect.equals("missing") )
throw new ReviewedStingException("Saw SNPEFF_GENE_BIOTYPE but unexpected no SNPEFF_EFFECT at " + eval);
if ( effect.equals("FRAME_SHIFT") )
n_coding_indels_frameshifting++;
else if ( effect.startsWith("CODON") )
n_coding_indels_in_frame++;
else
; // lots of protein coding effects that shouldn't be counted, such as INTRON
}
// update the baby histogram
final int absSize = Math.abs(alleleSize);
if ( absSize < countByLength.length ) countByLength[absSize]++;
}
break;
default:
throw new UserException.BadInput("Unexpected variant context type: " + eval);
}
return null; // we don't capture any interesting sites
}
public void finalizeEvaluation() {
percent_of_sites_with_more_than_2_alleles = formattedRatio(nMultiIndelSites, nIndelSites);
SNP_to_indel_ratio = formattedRatio(n_SNPs, n_indels);
SNP_to_indel_ratio_for_singletons = formattedRatio(n_singleton_SNPs, n_singleton_indels);
indel_novelty_rate = formattedNoveltyRate(nKnownIndels, n_indels);
ratio_of_1_to_2_bp_indels = formattedRatio(countByLength[1], countByLength[2]);
ratio_of_1_to_3_bp_indels = formattedRatio(countByLength[1], countByLength[3]);
ratio_of_2_to_3_bp_indels = formattedRatio(countByLength[2], countByLength[3]);
ratio_of_1_and_2_to_3_bp_indels = formattedRatio(countByLength[1] + countByLength[2], countByLength[3]);
frameshift_rate_for_coding_indels = formattedPercent(n_coding_indels_frameshifting, n_coding_indels_in_frame + n_coding_indels_frameshifting);
SNP_het_to_hom_ratio = formattedRatio(nSNPHets, nSNPHoms);
indel_het_to_hom_ratio = formattedRatio(nIndelHets, nIndelHoms);
n_large_deletions = lengthHistogram.getnTooBigDeletions();
n_large_insertions = lengthHistogram.getnTooBigInsertions();
insertion_to_deletion_ratio = formattedRatio(nInsertions, n_indels - nInsertions);
insertion_to_deletion_ratio_for_1bp_indels = formattedRatio(n1bpInsertions, n1bpDeletions);
insertion_to_deletion_ratio_for_large_indels = formattedRatio(n_large_insertions, n_large_deletions);
}
}

View File

@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.Map;
import java.util.Set;
@ -60,17 +59,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
@DataPoint(description = "Number of mendelian violations found", format = "%d")
long nViolations;
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant", format = "%d")
long KidHomRef_ParentHomVar;
@DataPoint(description = "number of child het calls where the parent was hom ref", format = "%d")
long KidHet_ParentsHomRef;
@DataPoint(description = "number of child het calls where the parent was hom variant", format = "%d")
long KidHet_ParentsHomVar;
@DataPoint(description = "number of child hom variant calls where the parent was hom ref", format = "%d")
long KidHomVar_ParentHomRef;
*/
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR", format = "%d")
long mvRefRef_Var;
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET", format = "%d")
@ -88,12 +76,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET", format = "%d")
long mvVarVar_Het;
/*@DataPoint(description ="Number of inherited var alleles from het parents", format = "%d")
long nInheritedVar;
@DataPoint(description ="Number of inherited ref alleles from het parents", format = "%d")
long nInheritedRef;*/
@DataPoint(description="Number of HomRef/HomRef/HomRef trios", format = "%d")
long HomRefHomRef_HomRef;
@DataPoint(description="Number of Het/Het/Het trios", format = "%d")
@ -120,18 +102,15 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
long HomVarHet_inheritedVar;
MendelianViolation mv;
PrintStream mvFile;
Map<String,Set<Sample>> families;
public void initialize(VariantEvalWalker walker) {
//Changed by Laurent Francioli - 2011-06-07
//mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
super.initialize(walker);
mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false);
families = walker.getSampleDB().getFamilies();
}
public boolean enabled() {
//return getVEWalker().FAMILY_STRUCTURE != null;
return true;
}

View File

@ -1,154 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
@Analysis(description = "Evaluation summary for multi-allelic variants")
public class MultiallelicAFs extends VariantEvaluator {
final protected static Logger logger = Logger.getLogger(MultiallelicAFs.class);
public enum Type {
SNP, INDEL
}
@DataPoint(description="Histogram of allele frequencies for most common SNP alternate allele")
AFHistogram AFhistogramMaxSnp = new AFHistogram();
@DataPoint(description="Histogram of allele frequencies for less common SNP alternate alleles")
AFHistogram AFhistogramMinSnp = new AFHistogram();
@DataPoint(description="Histogram of allele frequencies for most common Indel alternate allele")
AFHistogram AFhistogramMaxIndel = new AFHistogram();
@DataPoint(description="Histogram of allele frequencies for less common Indel alternate alleles")
AFHistogram AFhistogramMinIndel = new AFHistogram();
/*
* AF histogram table object
*/
static class AFHistogram implements TableType {
private Object[] rowKeys, colKeys = {"count"};
private int[] AFhistogram;
private static final double AFincrement = 0.01;
private static final int numBins = (int)(1.00 / AFincrement);
public AFHistogram() {
rowKeys = initRowKeys();
AFhistogram = new int[rowKeys.length];
}
public Object[] getColumnKeys() {
return colKeys;
}
public Object[] getRowKeys() {
return rowKeys;
}
public Object getCell(int row, int col) {
return AFhistogram[row];
}
private static Object[] initRowKeys() {
ArrayList<String> keyList = new ArrayList<String>(numBins + 1);
for ( double a = 0.00; a <= 1.01; a += AFincrement ) {
keyList.add(String.format("%.2f", a));
}
return keyList.toArray();
}
public String getName() { return "AFHistTable"; }
public void update(final double AF) {
final int bin = (int)(numBins * MathUtils.round(AF, 2));
AFhistogram[bin]++;
}
}
public void initialize(VariantEvalWalker walker) {}
@Override public boolean enabled() { return true; }
public int getComparisonOrder() {
return 2;
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( eval == null || eval.isMonomorphicInSamples() )
return null;
if ( !eval.isBiallelic() )
return null;
// update counts
switch ( eval.getType() ) {
case SNP:
updateAFhistogram(eval, AFhistogramMaxSnp, AFhistogramMinSnp);
break;
case INDEL:
updateAFhistogram(eval, AFhistogramMaxIndel, AFhistogramMinIndel);
break;
default:
throw new UserException.BadInput("Unexpected variant context type: " + eval);
}
return null; // we don't capture any interesting sites
}
private void updateAFhistogram(VariantContext vc, AFHistogram max, AFHistogram min) {
final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null);
if ( obj == null || !(obj instanceof List) )
return;
List<String> list = (List<String>)obj;
ArrayList<Double> AFs = new ArrayList<Double>(list.size());
for ( String str : list ) {
AFs.add(Double.valueOf(str));
}
Collections.sort(AFs);
max.update(AFs.get(AFs.size()-1));
for ( int i = 0; i < AFs.size() - 1; i++ )
min.update(AFs.get(i));
}
}

View File

@ -28,11 +28,12 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@Analysis(description = "Evaluation summary for multi-allelic variants")
public class MultiallelicSummary extends VariantEvaluator implements StandardEval {
@ -87,17 +88,8 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
public String indelNoveltyRate = "NA";
public void initialize(VariantEvalWalker walker) {}
@Override public boolean enabled() { return true; }
public int getComparisonOrder() {
return 2;
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
@Override public int getComparisonOrder() { return 2; }
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( eval == null || eval.isMonomorphicInSamples() )
@ -156,13 +148,8 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
// TODO -- implement me
}
private final String noveltyRate(final int all, final int known) {
final int novel = all - known;
final double rate = (novel / (1.0 * all));
return all == 0 ? "NA" : String.format("%.2f", rate);
}
public void finalizeEvaluation() {
nProcessedLoci = getWalker().getnProcessedLoci();
processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci;
variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs;
processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci;
@ -170,7 +157,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
TiTvRatio = (double)nTi / (double)nTv;
SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete);
indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete);
SNPNoveltyRate = formattedNoveltyRate(knownSNPsPartial + knownSNPsComplete, nMultiSNPs);
indelNoveltyRate = formattedNoveltyRate(knownIndelsPartial + knownIndelsComplete, nMultiSNPs);
}
}

View File

@ -1,54 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
/**
* Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings
* | File Templates.
*/
class NewPhaseStats {
public int neitherPhased;
public int onlyCompPhased;
public int onlyEvalPhased;
public int phasesAgree;
public int phasesDisagree;
public NewPhaseStats() {
this.neitherPhased = 0;
this.onlyCompPhased = 0;
this.onlyEvalPhased = 0;
this.phasesAgree = 0;
this.phasesDisagree = 0;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree);
return sb.toString();
}
public static String[] getFieldNamesArray() {
return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"};
}
public Object getField(int index) {
switch (index) {
case (0):
return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree);
case (1):
return neitherPhased;
case (2):
return onlyCompPhased;
case (3):
return onlyEvalPhased;
case (4):
return (phasesAgree + phasesDisagree);
case (5):
return phasesAgree;
case (6):
return phasesDisagree;
case (7):
return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree));
default:
return -1;
}
}
}

View File

@ -1,30 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.util.HashMap;
/**
* Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings
* | File Templates.
*/
class NewSamplePreviousGenotypes {
private HashMap<String, CompEvalGenotypes> sampleGenotypes = null;
public NewSamplePreviousGenotypes() {
this.sampleGenotypes = new HashMap<String, CompEvalGenotypes>();
}
public CompEvalGenotypes get(String sample) {
return sampleGenotypes.get(sample);
}
public void put(String sample, CompEvalGenotypes compEvalGts) {
sampleGenotypes.put(sample, compEvalGts);
}
public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) {
sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt));
}
}

View File

@ -4,14 +4,18 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Collection;
public abstract class VariantEvaluator {
public void initialize(VariantEvalWalker walker) {}
private VariantEvalWalker walker;
public void initialize(VariantEvalWalker walker) {
this.walker = walker;
}
public VariantEvalWalker getWalker() {
return walker;
}
public abstract boolean enabled();
@ -19,9 +23,8 @@ public abstract class VariantEvaluator {
public abstract int getComparisonOrder();
// called at all sites, regardless of eval context itself; useful for counting processed bases
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
}
// No longer available. The processed bp is kept in VEW itself for performance reasons
// public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return null;
@ -45,8 +48,46 @@ public abstract class VariantEvaluator {
return ((double)num) / (Math.max(denom, 1));
}
public boolean stateIsApplicable(StateKey stateKey) {
return true;
/**
* Returns true if the variant in vc was a singleton in the original input evaluation
* set, regardless of variant context subsetting that has occurred.
* @param eval the VariantContext being assessed for this previous status as a singleton
* @return true if eval was originally a singleton site
*/
protected static boolean variantWasSingleton(final VariantContext eval) {
return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false);
}
/**
* Convenience function that formats the novelty rate as a %.2f string
*
* @param known number of variants from all that are known
* @param all number of all variants
* @return a String novelty rate, or NA if all == 0
*/
protected static String formattedNoveltyRate(final int known, final int all) {
return formattedPercent(all - known, all);
}
/**
* Convenience function that formats the novelty rate as a %.2f string
*
* @param x number of objects part of total that meet some criteria
* @param total count of all objects, including x
* @return a String percent rate, or NA if total == 0
*/
protected static String formattedPercent(final int x, final int total) {
return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total));
}
/**
* Convenience function that formats a ratio as a %.2f string
*
* @param num number of observations in the numerator
* @param denom number of observations in the denumerator
* @return a String formatted ratio, or NA if all == 0
*/
protected static String formattedRatio(final int num, final int denom) {
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
}
}

View File

@ -54,7 +54,7 @@ public class VariantQualityScore extends VariantEvaluator {
@DataPoint(description = "average variant quality for each allele count")
AlleleCountStats alleleCountStats = null;
static class TiTvStats implements TableType {
static class TiTvStats extends TableType {
final static int NUM_BINS = 20;
final HashMap<Integer, Pair<Long,Long>> qualByIsTransition = new HashMap<Integer, Pair<Long,Long>>(); // A hashMap holds all the qualities until we are able to bin them appropriately
final long transitionByQuality[] = new long[NUM_BINS];
@ -73,10 +73,6 @@ public class VariantQualityScore extends VariantEvaluator {
return columnKeys;
}
public String getName() {
return "TiTvStats";
}
public String getCell(int x, int y) {
return String.valueOf(titvByQuality[y]);
}
@ -143,7 +139,7 @@ public class VariantQualityScore extends VariantEvaluator {
}
}
class AlleleCountStats implements TableType {
class AlleleCountStats extends TableType {
final HashMap<Integer, ArrayList<Double>> qualityListMap = new HashMap<Integer, ArrayList<Double>>();
final HashMap<Integer, Double> qualityMap = new HashMap<Integer, Double>();
@ -163,10 +159,6 @@ public class VariantQualityScore extends VariantEvaluator {
return new String[]{"alleleCount","avgQual"};
}
public String getName() {
return "AlleleCountStats";
}
public String getCell(int x, int y) {
int iii = 0;
for( final Integer key : qualityListMap.keySet() ) {

View File

@ -49,7 +49,6 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
/** Indels with size greater than this value are tallied in the CNV column */
private final static int MAX_INDEL_LENGTH = 50;
private final static double MIN_CNV_OVERLAP = 0.5;
private VariantEvalWalker walker;
public enum Type {
SNP, INDEL, CNV
@ -152,7 +151,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
public void initialize(VariantEvalWalker walker) {
this.walker = walker;
super.initialize(walker);
nSamples = walker.getSampleNamesForEvaluation().size();
countsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
@ -176,11 +175,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
return 2; // we only need to see each eval track
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
private final Type getType(VariantContext vc) {
private Type getType(VariantContext vc) {
switch (vc.getType()) {
case SNP:
return Type.SNP;
@ -196,9 +191,9 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
}
}
private final boolean overlapsKnownCNV(VariantContext cnv) {
private boolean overlapsKnownCNV(VariantContext cnv) {
if ( knownCNVs != null ) {
final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
final GenomeLoc loc = getWalker().getGenomeLocParser().createGenomeLoc(cnv, true);
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
@ -252,15 +247,14 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
return null; // we don't capture any interesting sites
}
private final String noveltyRate(Type type) {
private String noveltyRate(Type type) {
final int all = allVariantCounts.all(type);
final int known = knownVariantCounts.all(type);
final int novel = all - known;
final double rate = (novel / (1.0 * all));
return all == 0 ? "NA" : String.format("%.2f", rate);
return formattedNoveltyRate(known, all);
}
public void finalizeEvaluation() {
nProcessedLoci = getWalker().getnProcessedLoci();
nSNPs = allVariantCounts.all(Type.SNP);
nIndels = allVariantCounts.all(Type.INDEL);
nSVs = allVariantCounts.all(Type.CNV);

View File

@ -0,0 +1,113 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.util;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.*;
/**
* Simple utility for histogramming indel lengths
*
* Based on code from chartl
*
* @author Mark DePristo
* @since 3/21/12
*/
public class IndelHistogram extends TableType {
private final boolean asFrequencies;
int nIndels = 0, nTooBigDeletions = 0, nTooBigInsertions = 0;
private final Integer[] rowKeys;
private Map<Integer, Double> frequencies = null;
private final Map<Integer, Integer> counts = new HashMap<Integer, Integer>();
public IndelHistogram(int maxSize, boolean asFrequencies) {
this.asFrequencies = asFrequencies;
initializeCounts(maxSize);
this.rowKeys = new ArrayList<Integer>(counts.keySet()).toArray(new Integer[maxSize]);
}
private void initializeCounts(int size) {
for ( int i = -size; i <= size; i++ ) {
if ( i != 0 ) counts.put(i, 0);
}
}
@Override
public String getRowName() {
return "Length";
}
@Override
public Object[] getColumnKeys() {
return new String[]{"Count"};
}
@Override
public Object[] getRowKeys() {
return rowKeys;
}
@Override
public Object getCell(int row, int col) {
final int key = (Integer)getRowKeys()[row];
if ( asFrequencies ) {
if ( frequencies == null ) {
frequencies = new HashMap<Integer, Double>();
for ( final int len : counts.keySet() ) {
final double value = nIndels == 0 ? 0.0 : counts.get(len) / (1.0 * nIndels);
frequencies.put(len, value);
}
}
return frequencies.get(key);
}
return counts.get(key);
}
public int getnTooBigDeletions() {
return nTooBigDeletions;
}
public int getnTooBigInsertions() {
return nTooBigInsertions;
}
public void update(final Allele ref, final Allele alt) {
final int alleleSize = alt.length() - ref.length();
update(alleleSize);
}
public void update(int len) {
if ( counts.containsKey(len) ) {
nIndels++;
counts.put(len, counts.get(len) + 1);
} else if ( len < 0 ) {
nTooBigDeletions++;
} else {
nTooBigInsertions++;
}
}
}

View File

@ -10,34 +10,20 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.HashMap;
import java.util.Set;
import java.util.TreeMap;
import java.util.*;
public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
public TreeMap<String, VariantEvaluator> evaluationInstances;
public String toString() {
String value = "";
for ( VariantStratifier key : this.keySet() ) {
value += "\t" + key.getName() + ":" + this.get(key) + "\n";
}
return value;
}
private Map<String, VariantEvaluator> evaluationInstances;
public void addEvaluationClassList(VariantEvalWalker walker, StateKey stateKey, Set<Class<? extends VariantEvaluator>> evaluationClasses) {
evaluationInstances = new TreeMap<String, VariantEvaluator>();
evaluationInstances = new LinkedHashMap<String, VariantEvaluator>(evaluationClasses.size());
for ( Class<? extends VariantEvaluator> c : evaluationClasses ) {
for ( final Class<? extends VariantEvaluator> c : evaluationClasses ) {
try {
VariantEvaluator eval = c.newInstance();
final VariantEvaluator eval = c.newInstance();
eval.initialize(walker);
if (eval.stateIsApplicable(stateKey)) {
evaluationInstances.put(c.getSimpleName(), eval);
}
evaluationInstances.put(c.getSimpleName(), eval);
} catch (InstantiationException e) {
throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'");
} catch (IllegalAccessException e) {
@ -47,13 +33,11 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
}
public TreeMap<String, VariantEvaluator> getEvaluationClassList() {
return evaluationInstances;
return new TreeMap<String, VariantEvaluator>(evaluationInstances);
}
public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
// we always call update0 in case the evaluation tracks things like number of bases covered
for ( final VariantEvaluator evaluation : evaluationInstances.values() ) {
// the other updateN methods don't see a null context
if ( tracker == null )
continue;
@ -77,10 +61,4 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
}
}
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
evaluation.update0(tracker, ref, context);
}
}
}

View File

@ -9,9 +9,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util;
*
* an interface for turning arbritary objects into tables
*/
public interface TableType {
public Object[] getRowKeys();
public Object[] getColumnKeys();
public Object getCell(int x, int y);
public String getName();
public abstract class TableType {
public abstract Object[] getRowKeys();
public abstract Object[] getColumnKeys();
public abstract Object getCell(int x, int y);
public String getName() { return getClass().getSimpleName(); }
public String getRowName() { return "row"; }
}

View File

@ -191,7 +191,7 @@ public class VariantEvalUtils {
* @return a map of all the evaluation contexts
*/
public HashMap<StateKey, NewEvaluationContext> initializeEvaluationContexts(Set<VariantStratifier> stratificationObjects, Set<Class<? extends VariantEvaluator>> evaluationObjects, Stack<VariantStratifier> stratStack, NewEvaluationContext ec) {
HashMap<StateKey, NewEvaluationContext> ecs = new HashMap<StateKey, NewEvaluationContext>();
HashMap<StateKey, NewEvaluationContext> ecs = new LinkedHashMap<StateKey, NewEvaluationContext>();
if (stratStack == null) {
stratStack = new Stack<VariantStratifier>();
@ -310,7 +310,7 @@ public class VariantEvalUtils {
final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount();
if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) {
builder.attribute("ISSINGLETON", true);
builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true);
}
VariantContextUtils.calculateChromosomeCounts(builder, true);

View File

@ -184,6 +184,8 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
}
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(RECAL_FILE, e);
} catch ( Exception e ) {
throw new UserException.MalformedFile(RECAL_FILE, "Could not parse LOD and annotation information in input recal file. File is somehow malformed.");
}
}

View File

@ -243,6 +243,19 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
// TODO -- clean this up in a refactoring
// merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type)
if ( VCsByType.containsKey(VariantContext.Type.NO_VARIATION) && VCsByType.size() > 1 ) {
final List<VariantContext> refs = VCsByType.remove(VariantContext.Type.NO_VARIATION);
for ( VariantContext.Type type : VariantContext.Type.values() ) {
if ( VCsByType.containsKey(type) ) {
VCsByType.get(type).addAll(refs);
break;
}
}
}
// iterate over the types so that it's deterministic
for (VariantContext.Type type : VariantContext.Type.values()) {
if (VCsByType.containsKey(type))

View File

@ -25,248 +25,267 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
import org.broadinstitute.sting.gatk.walkers.recalibration.EmpiricalQual;
import org.broadinstitute.sting.utils.BitSetUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Pattern;
import java.util.*;
/**
* Utility methods to facilitate on-the-fly base quality score recalibration.
*
* User: rpoplin
* User: carneiro and rpoplin
* Date: 2/4/12
*/
public class BaseRecalibration {
private List<Byte> qualQuantizationMap; // histogram containing the map for qual quantization (calculated after recalibration is done)
private LinkedHashMap<BQSRKeyManager, Map<BitSet, EmpiricalQual>> keysAndTablesMap; // quick access reference to the read group table and its key manager
private ArrayList<HashMap<BitSet, RecalDatum>> collapsedHashes = new ArrayList<HashMap<BitSet, RecalDatum>> (); // All the collapsed data tables
private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of all covariates to be used in this calculation
private final ArrayList<Covariate> requiredCovariates = new ArrayList<Covariate>(); // List of required covariates to be used in this calculation
private final ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>(); // List of optional covariates to be used in this calculation
public static final Pattern REQUIRED_COVARIATE_PATTERN = Pattern.compile("^# Required Covariates.*");
public static final Pattern OPTIONAL_COVARIATE_PATTERN = Pattern.compile("^# Optional Covariates.*");
public static final String EOF_MARKER = "EOF";
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
private static String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here";
private static final byte SMOOTHING_CONSTANT = 1;
ArrayList<BQSRKeyManager> keyManagers = new ArrayList<BQSRKeyManager>();
/**
* Should ALWAYS use the constructor with the GATK Report file
*/
private BaseRecalibration() {}
/**
* Constructor using a GATK Report file
*
* @param RECAL_FILE a GATK Report file containing the recalibration information
*/
public BaseRecalibration(final File RECAL_FILE) {
// Get a list of all available covariates
final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // todo -- initialize with the parameters from the csv file!
GATKReport report = new GATKReport(RECAL_FILE);
int lineNumber = 0;
GATKReportTable argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE);
RecalibrationArgumentCollection RAC = initializeArgumentCollectionTable(argumentTable);
boolean foundRequiredCovariates = false;
boolean foundOptionalCovariates = false;
boolean initializedKeyManagers = false;
GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE);
qualQuantizationMap = initializeQuantizationTable(quantizedTable);
// Read in the data from the csv file and populate the data map and covariates list
boolean sawEOF = false;
try {
for (String line : new XReadLines(RECAL_FILE)) {
lineNumber++;
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
requestedCovariates.addAll(requiredCovariates); // add all required covariates to the list of requested covariates
requestedCovariates.addAll(optionalCovariates); // add all optional covariates to the list of requested covariates
sawEOF = EOF_MARKER.equals(line);
if (sawEOF)
break;
boolean requiredCovariatesLine = REQUIRED_COVARIATE_PATTERN.matcher(line).matches();
boolean optionalCovariatesLine = OPTIONAL_COVARIATE_PATTERN.matcher(line).matches();
if (requiredCovariatesLine && foundRequiredCovariates)
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Duplicate required covariates line");
if (optionalCovariatesLine && foundOptionalCovariates)
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Duplicate optional covariates line");
if (optionalCovariatesLine && !foundRequiredCovariates)
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Optional covariates reported before Required covariates");
if (requiredCovariatesLine || optionalCovariatesLine) {
String [] covariateNames = line.split(": ")[1].split(","); // take the second half of the string (past the ":") and split it by "," to get the list of required covariates
List<Covariate> covariateList = requiredCovariatesLine ? requiredCovariates : optionalCovariates; // set the appropriate covariate list to update
for (String covariateName : covariateNames) {
boolean foundClass = false;
for (Class<?> covClass : classes) {
if ((covariateName + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) {
foundClass = true;
try {
Covariate covariate = (Covariate) covClass.newInstance();
covariate.initialize(RAC);
requestedCovariates.add(covariate);
covariateList.add(covariate);
} catch (Exception e) {
throw new DynamicClassResolutionException(covClass, e);
}
}
}
if (!foundClass)
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (covariateName + "Covariate") + ") isn't a valid covariate option.");
}
foundRequiredCovariates = foundRequiredCovariates || requiredCovariatesLine;
foundOptionalCovariates = foundOptionalCovariates || optionalCovariatesLine;
}
else if (!line.startsWith("#")) { // if this is not a comment line that we don't care about, it is DATA!
if (!foundRequiredCovariates || !foundOptionalCovariates) // At this point all the covariates should have been found and initialized
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE);
if (!initializedKeyManagers) {
ArrayList<Covariate> emptyList = new ArrayList<Covariate>(0);
ArrayList<Covariate> requiredCovariatesUpToThis = new ArrayList<Covariate>(); // Initialize one key manager for each table of required covariate
for (Covariate covariate : requiredCovariates) { // Every required covariate table includes all preceding required covariates (e.g. RG ; RG,Q )
requiredCovariatesUpToThis.add(covariate);
keyManagers.add(new BQSRKeyManager(requiredCovariatesUpToThis, emptyList));
}
keyManagers.add(new BQSRKeyManager(requiredCovariates, optionalCovariates)); // One master key manager for the collapsed tables
initializedKeyManagers = true;
}
addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
}
}
} catch (FileNotFoundException e) {
throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
} catch (NumberFormatException e) {
throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
}
if (!sawEOF) {
final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
}
generateEmpiricalQualities(SMOOTHING_CONSTANT);
}
/**
* For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
*
* @param file The CSV file we read the line from (for exception throwing purposes)
* @param line A line of CSV data read from the recalibration table data file
*/
private void addCSVData(final File file, final String line) {
final String[] vals = line.split(",");
boolean hasOptionalCovariates = optionalCovariates.size() > 0; // Do we have optional covariates in this key?
int addOptionalCovariates = hasOptionalCovariates ? 2 : 0; // If we have optional covariates at all, add two to the size of the array (to acommodate the covariate and the id)
final Object[] key = new Object[requiredCovariates.size() + addOptionalCovariates + 1]; // Reserve enough space for the required covariates, optional covariate, id and eventType
int indexCovariateValue = key.length - 3; // In the order of keys, the optional covariate comes right after the required covariates
int indexCovariateID = key.length - 2; // followed by the covariate ID
int indexEventType = key.length - 1; // and the event type
addKeysToArray(key, vals, requiredCovariates, 0); // Add the required covariates keys
if (hasOptionalCovariates) {
key[indexCovariateID] = Short.parseShort(vals[indexCovariateID]); // Add the optional covariate ID
Covariate covariate = optionalCovariates.get((Short) key[indexCovariateID]); // Get the covariate object for this ID
key[indexCovariateValue] = covariate.getValue(vals[indexCovariateValue]); // Add the optional covariate value, given the ID
}
key[indexEventType] = EventType.eventFrom(vals[indexEventType]); // Add the event type
int datumIndex = key.length; // The recal datum starts at the end of the key (after the event type)
long count = Long.parseLong(vals[datumIndex]); // Number of observations
long errors = Long.parseLong(vals[datumIndex + 1]); // Number of errors observed
double reportedQual = Double.parseDouble(vals[1]); // The reported Q score --> todo -- I don't like having the Q score hard coded in vals[1]. Generalize it!
final RecalDatum datum = new RecalDatum(count, errors, reportedQual, 0.0); // Create a new datum using the number of observations, number of mismatches, and reported quality score
addToAllTables(key, datum); // Add that datum to all the collapsed tables which will be used in the sequential calculation
}
/**
* Add the given mapping to all of the collapsed hash tables
*
* @param key The list of comparables that is the key for this mapping
* @param fullDatum The RecalDatum which is the data for this mapping
*/
private void addToAllTables(final Object[] key, final RecalDatum fullDatum) {
int nHashes = requiredCovariates.size(); // We will always need one hash per required covariate
if (optionalCovariates.size() > 0) // If we do have optional covariates
nHashes += 1; // we will need one extra hash table with the optional covariate encoded in the key set on top of the required covariates
for (Covariate cov : requestedCovariates)
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
for (int hashIndex = 0; hashIndex < nHashes; hashIndex++) {
HashMap<BitSet, RecalDatum> table; // object to hold the hash table we are going to manipulate
if (hashIndex >= collapsedHashes.size()) { // if we haven't yet created the collapsed hash table for this index, create it now!
table = new HashMap<BitSet, RecalDatum>();
collapsedHashes.add(table); // Because this is the only place where we add tables to the ArrayList, they will always be in the order we want.
}
else
table = collapsedHashes.get(hashIndex); // if the table has been previously created, just assign it to the "table" object for manipulation
int copyTo = hashIndex + 1; // this will copy the covariates up to the index of the one we are including now (1 for RG, 2 for QS,...)
if (copyTo > requiredCovariates.size()) // only in the case where we have optional covariates we need to increase the size of the array
copyTo = requiredCovariates.size() + 2; // if we have optional covarites, add the optional covariate and it's id to the size of the key
Object[] tableKey = new Object[copyTo + 1]; // create a new array that will hold as many keys as hashIndex (1 for RG hash, 2 for QualityScore hash, 3 for covariate hash plus the event type
System.arraycopy(key, 0, tableKey, 0, copyTo); // copy the keys for the corresponding covariates into the tableKey.
tableKey[tableKey.length-1] = key[key.length - 1]; // add the event type. The event type is always the last key, on both key sets.
keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, EmpiricalQual>>();
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
for (Covariate covariate : requiredCovariates) {
requiredCovariatesToAdd.add(covariate);
final Map<BitSet, EmpiricalQual> table; // initializing a new recal table for each required covariate (cumulatively)
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
BitSet hashKey = keyManagers.get(hashIndex).bitSetFromKey(tableKey); // Add bitset key with fullDatum to the appropriate hash
RecalDatum datum = table.get(hashKey);
if (datum == null)
datum = fullDatum;
else if (hashIndex == 0) // Special case for the ReadGroup covariate
datum.combine(fullDatum);
int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
table = parseReadGroupTable(keyManager, reportTable);
}
else if (nRequiredCovariates == 2 && optionalCovariatesToAdd.isEmpty()) { // when we have both required covariates and no optional covariates we're at the QUAL table
final GATKReportTable reportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE);
table = parseQualityScoreTable(keyManager, reportTable);
}
else
datum.increment(fullDatum);
table.put(hashKey, datum);
throw new ReviewedStingException(UNRECOGNIZED_REPORT_TABLE_EXCEPTION);
keysAndTablesMap.put(keyManager, table); // adding the pair key+table to the map
}
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
final Map<BitSet, EmpiricalQual> table = parseAllCovariatesTable(keyManager, reportTable);
keysAndTablesMap.put(keyManager, table); // adding the pair table+key to the map
}
/**
* Loop over all the collapsed tables and turn the recalDatums found there into an empirical quality score
* that will be used in the sequential calculation in TableRecalibrationWalker
* Compiles the list of keys for the Covariates table and uses the shared parsing utility to produce the actual table
*
* @param smoothing The smoothing parameter that goes into empirical quality score calculation
* @param keyManager the key manager for this table
* @param reportTable the GATKReport table containing data for this table
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
*/
private void generateEmpiricalQualities(final int smoothing) {
for (final HashMap<BitSet, RecalDatum> table : collapsedHashes)
for (final RecalDatum datum : table.values())
datum.calcCombinedEmpiricalQuality(smoothing, QualityUtils.MAX_QUAL_SCORE);
private Map<BitSet, EmpiricalQual> parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(5);
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
columnNamesOrderedList.add(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
columnNamesOrderedList.add(RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList);
}
/**
*
* Compiles the list of keys for the QualityScore table and uses the shared parsing utility to produce the actual table
* @param keyManager the key manager for this table
* @param reportTable the GATKReport table containing data for this table
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
*/
private Map<BitSet, EmpiricalQual> parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(3);
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList);
}
/**
* Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table
*
* @param keyManager the key manager for this table
* @param reportTable the GATKReport table containing data for this table
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
*/
private Map<BitSet, EmpiricalQual> parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(2);
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList);
}
/**
* Shared parsing functionality for all tables.
*
* @param keyManager the key manager for this table
* @param reportTable the GATKReport table containing data for this table
* @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
*/
private Map<BitSet, EmpiricalQual> genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList<String> columnNamesOrderedList) {
Map<BitSet, EmpiricalQual> result = new HashMap<BitSet, EmpiricalQual>(reportTable.getNumRows()*2);
public void recalibrateRead(final GATKSAMRecord read) {
//compute all covariate values for this read
RecalDataManager.computeCovariates(read, requestedCovariates);
final ReadCovariates readCovariates = RecalDataManager.covariateKeySetFrom(read);
for (Object primaryKey : reportTable.getPrimaryKeys()) {
int nKeys = columnNamesOrderedList.size();
Object [] keySet = new Object[nKeys];
for (int i = 0; i < nKeys; i++)
keySet[i] = reportTable.get(primaryKey, columnNamesOrderedList.get(i)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below)
keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager).
BitSet bitKey = keyManager.bitSetFromKey(keySet);
for (final EventType errorModel : EventType.values()) {
double estimatedQReported = (Double) reportTable.get(primaryKey, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME);
double empiricalQuality = (Double) reportTable.get(primaryKey, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
EmpiricalQual empiricalQual = new EmpiricalQual(estimatedQReported, empiricalQuality);
result.put(bitKey, empiricalQual);
}
return result;
}
/**
* Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores
*
* @param table the GATKReportTable containing the quantization mappings
* @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE
*/
private List<Byte> initializeQuantizationTable(GATKReportTable table) {
Byte[] result = new Byte[QualityUtils.MAX_QUAL_SCORE + 1];
for (Object primaryKey : table.getPrimaryKeys()) {
Object value = table.get(primaryKey, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
byte originalQual = Byte.parseByte(primaryKey.toString());
byte quantizedQual = Byte.parseByte(value.toString());
result[originalQual] = quantizedQual;
}
return Arrays.asList(result);
}
/**
* Parses the arguments table from the GATK Report and creates a RAC object with the proper initialization values
*
* @param table the GATKReportTable containing the arguments and its corresponding values
* @return a RAC object properly initialized with all the objects in the table
*/
private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) {
RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
for (Object primaryKey : table.getPrimaryKeys()) {
Object value = table.get(primaryKey, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
if (value.equals("null"))
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
if (primaryKey.equals("covariate") && value != null)
RAC.COVARIATES = value.toString().split(",");
else if (primaryKey.equals("standard_covs"))
RAC.USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value);
else if (primaryKey.equals("solid_recal_mode"))
RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value);
else if (primaryKey.equals("solid_nocall_strategy"))
RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);
else if (primaryKey.equals("mismatches_context_size"))
RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value);
else if (primaryKey.equals("insertions_context_size"))
RAC.INSERTIONS_CONTEXT_SIZE = Integer.parseInt((String) value);
else if (primaryKey.equals("deletions_context_size"))
RAC.DELETIONS_CONTEXT_SIZE = Integer.parseInt((String) value);
else if (primaryKey.equals("mismatches_default_quality"))
RAC.MISMATCHES_DEFAULT_QUALITY = Byte.parseByte((String) value);
else if (primaryKey.equals("insertions_default_quality"))
RAC.INSERTIONS_DEFAULT_QUALITY = Byte.parseByte((String) value);
else if (primaryKey.equals("deletions_default_quality"))
RAC.DELETIONS_DEFAULT_QUALITY = Byte.parseByte((String) value);
else if (primaryKey.equals("low_quality_tail"))
RAC.LOW_QUAL_TAIL = Byte.parseByte((String) value);
else if (primaryKey.equals("default_platform"))
RAC.DEFAULT_PLATFORM = (String) value;
else if (primaryKey.equals("force_platform"))
RAC.FORCE_PLATFORM = (String) value;
else if (primaryKey.equals("quantizing_levels"))
RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value);
}
return RAC;
}
/**
* Recalibrates the base qualities of a read
*
* It updates the base qualities of the read with the new recalibrated qualities (for all event types)
*
* @param read the read to recalibrate
*/
public void recalibrateRead(final GATKSAMRecord read) {
final ReadCovariates readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates); // compute all covariates for the read
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
final byte[] originalQuals = read.getBaseQualities(errorModel);
final byte[] recalQuals = originalQuals.clone();
// For each base in the read
for (int offset = 0; offset < read.getReadLength(); offset++) {
final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel);
final byte qualityScore = performSequentialQualityCalculation(keySet, errorModel);
for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read
byte qualityScore = originalQuals[offset];
if (qualityScore > QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
qualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
}
recalQuals[offset] = qualityScore;
}
preserveQScores(originalQuals, recalQuals); // Overwrite the work done if original quality score is too low
read.setBaseQualities(recalQuals, errorModel);
}
}
@ -286,86 +305,66 @@ public class BaseRecalibration {
*
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
*
* todo -- I extremely dislike the way all this math is hardcoded... should rethink the data structures for this method in particular.
*
* @param key The list of Comparables that were calculated from the covariates
* @param key The list of Comparables that were calculated from the covariates
* @param errorModel the event type
* @return A recalibrated quality score as a byte
*/
private byte performSequentialQualityCalculation(BitSet[] key, EventType errorModel) {
final byte qualFromRead = (byte) BitSetUtils.shortFrom(key[1]);
final int readGroupKeyIndex = 0;
final int qualKeyIndex = 1;
final int covariatesKeyIndex = 2;
// The global quality shift (over the read group only)
List<BitSet> bitKeys = keyManagers.get(readGroupKeyIndex).bitSetsFromAllKeys(key, errorModel);
if (bitKeys.size() > 1)
throw new ReviewedStingException("There should only be one key for the RG collapsed table, something went wrong here");
final RecalDatum globalRecalDatum = collapsedHashes.get(readGroupKeyIndex).get(bitKeys.get(0));
double globalDeltaQ = 0.0;
if (globalRecalDatum != null) {
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
}
// The shift in quality between reported and empirical
bitKeys = keyManagers.get(qualKeyIndex).bitSetsFromAllKeys(key, errorModel);
if (bitKeys.size() > 1)
throw new ReviewedStingException("There should only be one key for the Qual collapsed table, something went wrong here");
final RecalDatum qReportedRecalDatum = collapsedHashes.get(qualKeyIndex).get(bitKeys.get(0));
double deltaQReported = 0.0;
if (qReportedRecalDatum != null) {
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
}
// The shift in quality due to each covariate by itself in turn
bitKeys = keyManagers.get(covariatesKeyIndex).bitSetsFromAllKeys(key, errorModel);
double deltaQCovariates = 0.0;
double deltaQCovariateEmpirical;
for (BitSet k : bitKeys) {
final RecalDatum covariateRecalDatum = collapsedHashes.get(covariatesKeyIndex).get(k);
if (covariateRecalDatum != null) {
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
for (Map.Entry<BQSRKeyManager, Map<BitSet, EmpiricalQual>> mapEntry : keysAndTablesMap.entrySet()) {
BQSRKeyManager keyManager = mapEntry.getKey();
Map<BitSet, EmpiricalQual> table = mapEntry.getValue();
switch(keyManager.getRequiredCovariates().size()) {
case 1: // this is the ReadGroup table
List<BitSet> bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group
if (bitKeys.size() > 1)
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
final EmpiricalQual empiricalQualRG = table.get(bitKeys.get(0));
if (empiricalQualRG != null) {
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
final double aggregrateQReported = empiricalQualRG.getEstimatedQReported();
globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
}
break;
case 2:
if (keyManager.getOptionalCovariates().isEmpty()) { // this is the QualityScore table
bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to the reported quality score
if (bitKeys.size() > 1)
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
final EmpiricalQual empiricalQualQS = table.get(bitKeys.get(0));
if (empiricalQualQS != null) {
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
}
}
else { // this is the table with all the covariates
bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to each covariate by itself in turn
for (BitSet k : bitKeys) {
final EmpiricalQual empiricalQualCO = table.get(k);
if (empiricalQualCO != null) {
double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
}
}
}
break;
default:
throw new ReviewedStingException(UNRECOGNIZED_REPORT_TABLE_EXCEPTION);
}
}
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_QUAL_SCORE);
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_QUAL_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
return qualQuantizationMap.get((int) recalibratedQual); // return the quantized version of the recalibrated quality
}
/**
* Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
*
* @param originalQuals The list of original base quality scores
* @param recalQuals A list of the new recalibrated quality scores
*/
private void preserveQScores(final byte[] originalQuals, final byte[] recalQuals) {
for (int iii = 0; iii < recalQuals.length; iii++) {
if (originalQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE) { //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
recalQuals[iii] = originalQuals[iii];
}
}
}
/**
* Shared functionality to add keys
*
* @param array the target array we are creating the keys in
* @param keys the actual keys we're using as a source
* @param covariateList the covariate list to loop through
* @param keyIndex the index in the keys and the arrays objects to run from
*/
private void addKeysToArray(final Object[] array, final String[] keys, List<Covariate> covariateList, int keyIndex) {
for (Covariate covariate : covariateList) {
array[keyIndex] = covariate.getValue(keys[keyIndex]);
keyIndex++;
}
}
}

View File

@ -13,6 +13,8 @@ import org.broadinstitute.sting.utils.NGSPlatform;
*/
public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
public static String LANE_TAG = "LN";
// the SAMReadGroupRecord data we're caching
private String mSample = null;
private String mPlatform = null;
@ -79,4 +81,12 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
return mNGSPlatform;
}
public String getLane() {
return this.getAttribute(LANE_TAG);
}
public void setLane(String lane) {
this.setAttribute(LANE_TAG, lane);
}
}

View File

@ -52,8 +52,8 @@ public class GATKSAMRecord extends BAMRecord {
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end
// Base Quality Score Recalibrator specific attribute tags
public static final String BQSR_BASE_INSERTION_QUALITIES = "BI";
public static final String BQSR_BASE_DELETION_QUALITIES = "BD";
public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions
public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions
// the SAMRecord data we're caching
private String mReadString = null;

View File

@ -698,6 +698,13 @@ public class ReadUtils {
return bases;
}
public static GATKSAMRecord createRandomRead(int length) {
byte[] quals = ReadUtils.createRandomReadQuals(length);
byte[] bbases = ReadUtils.createRandomReadBases(length, true);
return ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
}
public static String prettyPrintSequenceRecords ( SAMSequenceDictionary sequenceDictionary ) {
String[] sequenceRecordNames = new String[sequenceDictionary.size()];
int sequenceRecordIndex = 0;

View File

@ -222,7 +222,10 @@ public class GenotypeLikelihoods {
/*
* a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles
*/
private static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = new GenotypeLikelihoodsAllelePair[]{ new GenotypeLikelihoodsAllelePair(0, 0) };
private static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex;
static {
calculatePLcache(65); // start with data for 10 alternate alleles
}
private static void calculatePLcache(final int minIndex) {
// how many alternate alleles do we need to calculate for?
@ -266,6 +269,7 @@ public class GenotypeLikelihoods {
*/
public static GenotypeLikelihoodsAllelePair getAllelePair(final int PLindex) {
// make sure that we've cached enough data
// TODO -- this is not thread-safe!
if ( PLindex >= PLIndexToAlleleIndex.length )
calculatePLcache(PLindex);

View File

@ -25,9 +25,13 @@
package org.broadinstitute.sting.gatk;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
/**
*
*/
@ -73,4 +77,45 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
@Test() private void testMissingInterval() {
testMissingFile("missing interval", "-T UnifiedGenotyper -L missing.interval_list -I " + b37GoodBAM);
}
// --------------------------------------------------------------------------------
//
// Test that our exceptions are coming back as we expect
//
// --------------------------------------------------------------------------------
private class EngineErrorHandlingTestProvider extends TestDataProvider {
Class expectedException;
boolean multiThreaded;
public EngineErrorHandlingTestProvider(Class exceptedException, final boolean multiThreaded) {
super(EngineErrorHandlingTestProvider.class);
this.expectedException = exceptedException;
this.multiThreaded = multiThreaded;
setName(String.format("Engine error handling: expected %s, is-multithreaded %b", exceptedException, multiThreaded));
}
}
@DataProvider(name = "EngineErrorHandlingTestProvider")
public Object[][] makeEngineErrorHandlingTestProvider() {
for ( final boolean multiThreaded : Arrays.asList(true, false)) {
new EngineErrorHandlingTestProvider(NullPointerException.class, multiThreaded);
new EngineErrorHandlingTestProvider(UserException.class, multiThreaded);
new EngineErrorHandlingTestProvider(ReviewedStingException.class, multiThreaded);
}
return EngineErrorHandlingTestProvider.getTests(EngineErrorHandlingTestProvider.class);
}
//
// Loop over errors to throw, make sure they are the errors we get back from the engine, regardless of NT type
//
@Test(dataProvider = "EngineErrorHandlingTestProvider")
public void testEngineErrorHandlingTestProvider(EngineErrorHandlingTestProvider cfg) {
final String root = "-T ErrorThrowing -R " + b37KGReference;
final String args = root + (cfg.multiThreaded ? " -nt 2" : "") + " -E " + cfg.expectedException.getSimpleName();
WalkerTestSpec spec = new WalkerTestSpec(args, 0, cfg.expectedException);
executeTest(cfg.toString(), spec);
}
}

View File

@ -1,9 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
@ -11,7 +9,6 @@ import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.BitSet;
import java.util.Random;
/**
* @author Mauricio Carneiro
@ -20,22 +17,18 @@ import java.util.Random;
public class ContextCovariateUnitTest {
ContextCovariate covariate;
RecalibrationArgumentCollection RAC;
Random random;
@BeforeClass
public void init() {
RAC = new RecalibrationArgumentCollection();
covariate = new ContextCovariate();
random = GenomeAnalysisEngine.getRandomGenerator();
covariate.initialize(RAC);
}
@Test(enabled = true)
public void testSimpleContexts() {
byte[] quals = ReadUtils.createRandomReadQuals(10000);
byte[] bbases = ReadUtils.createRandomReadBases(10000, true);
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
GATKSAMRecord read = ReadUtils.createRandomRead(1000);
GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
CovariateValues values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, stringFrom(clippedRead.getReadBases()));

View File

@ -1,7 +1,5 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -10,7 +8,6 @@ import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.BitSet;
import java.util.Random;
/**
* @author Mauricio Carneiro
@ -19,22 +16,18 @@ import java.util.Random;
public class CycleCovariateUnitTest {
CycleCovariate covariate;
RecalibrationArgumentCollection RAC;
Random random;
@BeforeClass
public void init() {
RAC = new RecalibrationArgumentCollection();
covariate = new CycleCovariate();
random = GenomeAnalysisEngine.getRandomGenerator();
covariate.initialize(RAC);
}
@Test(enabled = true)
public void testSimpleCycles() {
short readLength = 10;
byte[] quals = ReadUtils.createRandomReadQuals(readLength);
byte[] bbases = ReadUtils.createRandomReadBases(readLength, true);
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
short readLength = 10;
GATKSAMRecord read = ReadUtils.createRandomRead(readLength);
read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
read.getReadGroup().setPlatform("illumina");

View File

@ -0,0 +1,67 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.BitSet;
/**
* @author Mauricio Carneiro
* @since 3/1/12
*/
public class ReadGroupCovariateUnitTest {
ReadGroupCovariate covariate;
RecalibrationArgumentCollection RAC;
@BeforeClass
public void init() {
RAC = new RecalibrationArgumentCollection();
covariate = new ReadGroupCovariate();
covariate.initialize(RAC);
}
@Test(enabled = true)
public void testSingleRecord() {
final String expected = "SAMPLE.1";
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.ID");
rg.setSample("SAMPLE");
rg.setLane("1");
runTest(rg, expected);
}
@Test(enabled = true)
public void testMissingLane() {
final String expected = "SAMPLE.7";
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.7");
rg.setSample("SAMPLE");
runTest(rg, expected);
}
@Test(enabled = true)
public void testMissingSample() {
final String expected = "MY.ID";
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.ID");
rg.setLane("1");
runTest(rg, expected);
}
private void runTest(GATKSAMReadGroupRecord rg, String expected) {
GATKSAMRecord read = ReadUtils.createRandomRead(10);
read.setReadGroup(rg);
CovariateValues values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), expected);
}
private void verifyCovariateArray(BitSet[] values, String expected) {
for (BitSet value : values) {
String actual = covariate.keyFromBitSet(value);
Assert.assertEquals(actual, expected);
}
}
}

View File

@ -80,4 +80,24 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
}
@Test
public void test7() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "CEU.trio.2010_03.genotypes.hg18.vcf", 20000, 10, 10)
+ " -L chr20:332341-802503",
1,
Arrays.asList("c37548b333b65f58d0edfc5c2a62a28a"));
executeTest("Use trio-phased VCF, but ignore its phasing [TEST SEVEN]", spec);
}
@Test
public void test8() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "CEU.trio.2010_03.genotypes.hg18.vcf", 20000, 10, 10)
+ " -L chr20:332341-802503" + " -respectPhaseInInput",
1,
Arrays.asList("dfc7cdddd702e63d46d04f61a3ecd720"));
executeTest("Use trio-phased VCF, and respect its phasing [TEST EIGHT]", spec);
}
}

View File

@ -94,7 +94,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("94fb8cba9e236131c6fbf1d7fee738fe")
Arrays.asList("7a726ecbedd722fa7cd4de3e023b7a82")
);
executeTest("testFundamentalsCountVariantsSNPsandIndels", spec);
}
@ -115,7 +115,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("670979268b05c3024297ba98d67d89ab")
Arrays.asList("95bb4a4267a8f29dd7a8169561499f20")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
}
@ -137,7 +137,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("c38ce9c872a76ae7dd26c3e353bf0765")
Arrays.asList("9b51029083495935823fb0447a2857b9")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
}
@ -158,7 +158,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("2c37f23bf6114a2b27f21ed445806fd2")
Arrays.asList("318b5fbbc61e2fc11d49369359812edd")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec);
}
@ -179,7 +179,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("206f0d629de9af0b97340cb22d34a81b")
Arrays.asList("74c02df2ef69dda231a2aec2a948747b")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec);
}
@ -200,7 +200,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("bd869725429deae8f56175ba9a8ab390")
Arrays.asList("2d97b1fe15e532e89803ba7ba347ff20")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec);
}
@ -221,7 +221,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("9c7f6783a57ad681bb754b5e71de27dc")
Arrays.asList("474cbc231ddbc4ba299ffe61a17405b6")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec);
}
@ -244,7 +244,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("a2d280440aa3771937f3d2d10f1eea74")
Arrays.asList("2cc9bc4bbe8b4edb6dc27642ec41f66e")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec);
}
@ -269,7 +269,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("2925d811dd521beb00059f8c8e818d83")
Arrays.asList("00c94cf3e14bc2855d39bbefa27f9bb2")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec);
}
@ -288,7 +288,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("4b79bf2dfd73ddac0ceb0838a352bf9a")
Arrays.asList("a0c0d4805db1245aa30a306aa506096f")
);
executeTest("testFundamentalsCountVariantsNoCompRod", spec);
}
@ -301,7 +301,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("1739654de350541edf429888b708ae01"));
1, Arrays.asList("2192418a70a8e018a1675d4f425155f3"));
executeTestParallel("testSelect1", spec);
}
@ -329,7 +329,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d57cf846bc26d338edcf181fb0c85535"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("2282523336c24d434d1cc0eb1697b4f9"));
executeTestParallel("testCompVsEvalAC",spec);
}
@ -359,7 +359,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --dbsnp " + b37dbSNP132 +
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("b663745a39f62bfa5b5d486811cf57ec"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ec321fcc424fbad74a4a74e739173d03"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
}
@ -371,7 +371,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("f1e1b1469dca86d72ae79a2d3e10612c"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ccaea6245086552cd63f828eabddfaf3"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
}
@ -449,7 +449,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("0c632b5be8a54e43afa576510b40c4da")
Arrays.asList("9954c769ef37c47d3b61481ab0807be0")
);
executeTest("testAlleleCountStrat", spec);
}
@ -470,7 +470,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("92404820a94e7cfb854ae73450a0fbd9")
Arrays.asList("c0d69ce7647a575d166d8bab5aa16299")
);
executeTest("testIntervalStrat", spec);
}
@ -487,7 +487,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("417875ab1924b7e7950fa10daee393d2")
Arrays.asList("9a8ffb506118c1bde6f7bfadc4fb6f10")
);
executeTest("testModernVCFWithLargeIndels", spec);
}

View File

@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"),
1,
Arrays.asList("ab72f4bfb16d3894942149173a087647"));
Arrays.asList("ee43a558fd3faeaa447acab89f0001d5"));
executeTest("threeWayWithRefs", spec);
}

View File

@ -1,5 +1,10 @@
package org.broadinstitute.sting.utils.recalibration;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.annotations.Test;
import java.io.File;
@ -12,10 +17,13 @@ import java.io.File;
*/
public class BaseRecalibrationUnitTest {
@Test(enabled=true)
public void testReadingCSV() {
File csv = new File("public/testdata/exampleCSV.csv");
@Test(enabled=false)
public void testReadingReport() {
File csv = new File("public/testdata/exampleGATKREPORT.grp");
BaseRecalibration baseRecalibration = new BaseRecalibration(csv);
GATKSAMRecord read = ReadUtils.createRandomRead(1000);
read.setReadGroup(new GATKSAMReadGroupRecord(new SAMReadGroupRecord("exampleBAM.bam.bam"), NGSPlatform.ILLUMINA));
baseRecalibration.recalibrateRead(read);
System.out.println("Success");
}
}

File diff suppressed because it is too large Load Diff