Merge branches to get new RodBinding framework
Conflicts: private/java/src/org/broadinstitute/sting/gatk/walkers/replication_validation/ReplicationValidationWalker.java
This commit is contained in:
commit
bb557266ca
10
build.xml
10
build.xml
|
|
@ -468,6 +468,10 @@
|
|||
</javadoc>
|
||||
</target>
|
||||
|
||||
<target name="clean.gatkdocs">
|
||||
<delete dir="gatkdocs"/>
|
||||
</target>
|
||||
|
||||
<target name="gatkdocs" depends="gatk.compile"
|
||||
description="Extract help key/value pair file from the JavaDoc tags.">
|
||||
<path id="doclet.classpath">
|
||||
|
|
@ -520,6 +524,8 @@
|
|||
<fileset dir="${java.classes}">
|
||||
<include name="**/utils/codecs/**/*.class"/>
|
||||
<include name="**/utils/variantcontext/**/*.class"/>
|
||||
<include name="org/broadinstitute/sting/utils/exceptions/**"/>
|
||||
<include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>
|
||||
</fileset>
|
||||
</jar>
|
||||
</target>
|
||||
|
|
@ -1074,7 +1080,7 @@
|
|||
</findbugs>
|
||||
</target>
|
||||
|
||||
<target name="clean" description="clean up" depends="tribble.clean,clean.javadoc">
|
||||
<target name="clean" description="clean up" depends="tribble.clean,clean.javadoc,clean.gatkdocs">
|
||||
<delete dir="out"/>
|
||||
<delete dir="${build.dir}"/>
|
||||
<delete dir="${lib.dir}"/>
|
||||
|
|
@ -1087,7 +1093,7 @@
|
|||
<!-- Build gsalib R module -->
|
||||
<target name="gsalib">
|
||||
<exec executable="R" failonerror="true">
|
||||
<arg line="R CMD INSTALL -l private/R/ private/R/src/gsalib/" />
|
||||
<arg line="R CMD INSTALL -l public/R/ public/R/src/gsalib/" />
|
||||
</exec>
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,10 @@
|
|||
Package: gsalib
|
||||
Type: Package
|
||||
Title: Utility functions
|
||||
Version: 1.0
|
||||
Date: 2010-10-02
|
||||
Author: Kiran Garimella
|
||||
Maintainer: Kiran Garimella <kiran@broadinstitute.org>
|
||||
Description: Utility functions for GATK NGS analyses
|
||||
License: BSD
|
||||
LazyLoad: yes
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
gsa.error <- function(message) {
|
||||
message("");
|
||||
gsa.message("Error: **********");
|
||||
gsa.message(sprintf("Error: %s", message));
|
||||
gsa.message("Error: **********");
|
||||
message("");
|
||||
|
||||
traceback();
|
||||
|
||||
message("");
|
||||
stop(message, call. = FALSE);
|
||||
}
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
.gsa.getargs.usage <- function(argspec, doc) {
|
||||
cargs = commandArgs();
|
||||
|
||||
usage = "Usage:";
|
||||
|
||||
fileIndex = grep("--file=", cargs);
|
||||
if (length(fileIndex) > 0) {
|
||||
progname = gsub("--file=", "", cargs[fileIndex[1]]);
|
||||
|
||||
usage = sprintf("Usage: Rscript %s [arguments]", progname);
|
||||
|
||||
if (!is.na(doc)) {
|
||||
message(sprintf("%s: %s\n", progname, doc));
|
||||
}
|
||||
}
|
||||
|
||||
message(usage);
|
||||
|
||||
for (argname in names(argspec)) {
|
||||
key = argname;
|
||||
defaultValue = 0;
|
||||
doc = "";
|
||||
|
||||
if (is.list(argspec[[argname]])) {
|
||||
defaultValue = argspec[[argname]]$value;
|
||||
doc = argspec[[argname]]$doc;
|
||||
}
|
||||
|
||||
message(sprintf(" -%-10s\t[default: %s]\t%s", key, defaultValue, doc));
|
||||
}
|
||||
|
||||
message("");
|
||||
|
||||
stop(call. = FALSE);
|
||||
}
|
||||
|
||||
gsa.getargs <- function(argspec, doc = NA) {
|
||||
argsenv = new.env();
|
||||
|
||||
for (argname in names(argspec)) {
|
||||
value = 0;
|
||||
if (is.list(argspec[[argname]])) {
|
||||
value = argspec[[argname]]$value;
|
||||
} else {
|
||||
value = argspec[[argname]];
|
||||
}
|
||||
|
||||
assign(argname, value, envir=argsenv);
|
||||
}
|
||||
|
||||
if (interactive()) {
|
||||
for (argname in names(argspec)) {
|
||||
value = get(argname, envir=argsenv);
|
||||
|
||||
if (is.na(value) | is.null(value)) {
|
||||
if (exists("cmdargs")) {
|
||||
assign(argname, cmdargs[[argname]], envir=argsenv);
|
||||
} else {
|
||||
assign(argname, readline(sprintf("Please enter a value for '%s': ", argname)), envir=argsenv);
|
||||
}
|
||||
} else {
|
||||
assign(argname, value, envir=argsenv);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cargs = commandArgs(TRUE);
|
||||
|
||||
if (length(cargs) == 0) {
|
||||
.gsa.getargs.usage(argspec, doc);
|
||||
}
|
||||
|
||||
for (i in 1:length(cargs)) {
|
||||
if (length(grep("^-", cargs[i], ignore.case=TRUE)) > 0) {
|
||||
key = gsub("-", "", cargs[i]);
|
||||
value = cargs[i+1];
|
||||
|
||||
if (key == "h" | key == "help") {
|
||||
.gsa.getargs.usage(argspec, doc);
|
||||
}
|
||||
|
||||
if (length(grep("^[\\d\\.e\\+\\-]+$", value, perl=TRUE, ignore.case=TRUE)) > 0) {
|
||||
value = as.numeric(value);
|
||||
}
|
||||
|
||||
assign(key, value, envir=argsenv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
args = as.list(argsenv);
|
||||
|
||||
isMissingArgs = 0;
|
||||
missingArgs = c();
|
||||
|
||||
for (arg in names(argspec)) {
|
||||
if (is.na(args[[arg]]) | is.null(args[[arg]])) {
|
||||
gsa.warn(sprintf("Value for required argument '-%s' was not specified", arg));
|
||||
|
||||
isMissingArgs = 1;
|
||||
missingArgs = c(missingArgs, arg);
|
||||
}
|
||||
}
|
||||
|
||||
if (isMissingArgs) {
|
||||
gsa.error(
|
||||
paste(
|
||||
"Missing required arguments: -",
|
||||
paste(missingArgs, collapse=" -"),
|
||||
". Specify -h or -help to this script for a list of available arguments.",
|
||||
sep=""
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
args;
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
gsa.message <- function(message) {
|
||||
message(sprintf("[gsalib] %s", message));
|
||||
}
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
gsa.plot.venn <-
|
||||
function(a, b, c=0, a_and_b, a_and_c=0, b_and_c=0,
|
||||
col=c("#FF6342", "#63C6DE", "#ADDE63"),
|
||||
pos=c(0.20, 0.20, 0.80, 0.82),
|
||||
debug=0
|
||||
) {
|
||||
library(png);
|
||||
library(graphics);
|
||||
|
||||
# Set up properties
|
||||
for (i in 1:length(col)) {
|
||||
rgbcol = col2rgb(col[i]);
|
||||
col[i] = sprintf("%02X%02X%02X", rgbcol[1], rgbcol[2], rgbcol[3]);
|
||||
}
|
||||
|
||||
chco = paste(col[1], col[2], col[3], sep=",");
|
||||
chd = paste(a, b, c, a_and_b, a_and_c, b_and_c, sep=",");
|
||||
|
||||
props = c(
|
||||
'cht=v',
|
||||
'chs=525x525',
|
||||
'chds=0,10000000000',
|
||||
paste('chco=', chco, sep=""),
|
||||
paste('chd=t:', chd, sep="")
|
||||
);
|
||||
proplist = paste(props[1], props[2], props[3], props[4], props[5], sep='&');
|
||||
|
||||
# Get the venn diagram (as a temporary file)
|
||||
filename = tempfile("venn");
|
||||
cmd = paste("wget -O ", filename, " 'http://chart.apis.google.com/chart?", proplist, "' > /dev/null 2>&1", sep="");
|
||||
|
||||
if (debug == 1) {
|
||||
print(cmd);
|
||||
}
|
||||
system(cmd);
|
||||
|
||||
# Render the temp png file into a plotting frame
|
||||
a = readPNG(filename);
|
||||
|
||||
plot(0, 0, type="n", xaxt="n", yaxt="n", bty="n", xlim=c(0, 1), ylim=c(0, 1), xlab="", ylab="");
|
||||
if (c == 0 || a >= b) {
|
||||
rasterImage(a, pos[1], pos[2], pos[3], pos[4]);
|
||||
} else {
|
||||
rasterImage(a, 0.37+pos[1], 0.37+pos[2], 0.37+pos[3], 0.37+pos[4], angle=180);
|
||||
}
|
||||
|
||||
# Clean up!
|
||||
unlink(filename);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
.gsa.attemptToLoadFile <- function(filename) {
|
||||
file = NA;
|
||||
|
||||
if (file.exists(filename) & file.info(filename)$size > 500) {
|
||||
file = read.csv(filename, header=TRUE, comment.char="#");
|
||||
}
|
||||
|
||||
file;
|
||||
}
|
||||
|
||||
gsa.read.eval <-
|
||||
function(evalRoot) {
|
||||
fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep="");
|
||||
fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep="");
|
||||
fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep="");
|
||||
fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep="");
|
||||
fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep="");
|
||||
fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep="");
|
||||
fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep="");
|
||||
fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep="");
|
||||
fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep="");
|
||||
fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep="");
|
||||
fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep="");
|
||||
fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep="");
|
||||
fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep="");
|
||||
fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep="");
|
||||
|
||||
eval = list(
|
||||
AlleleCountStats = NA,
|
||||
CompOverlap = NA,
|
||||
CountVariants = NA,
|
||||
GenotypeConcordance = NA,
|
||||
MetricsByAc = NA,
|
||||
MetricsBySample = NA,
|
||||
Quality_Metrics_by_allele_count = NA,
|
||||
QualityScoreHistogram = NA,
|
||||
SampleStatistics = NA,
|
||||
SampleSummaryStatistics = NA,
|
||||
SimpleMetricsBySample = NA,
|
||||
TiTv = NA,
|
||||
TiTvStats = NA,
|
||||
Variant_Quality_Score = NA,
|
||||
|
||||
CallsetNames = c(),
|
||||
CallsetOnlyNames = c(),
|
||||
CallsetFilteredNames = c()
|
||||
);
|
||||
|
||||
eval$AlleleCountStats = .gsa.attemptToLoadFile(fileAlleleCountStats);
|
||||
eval$CompOverlap = .gsa.attemptToLoadFile(fileCompOverlap);
|
||||
eval$CountVariants = .gsa.attemptToLoadFile(fileCountVariants);
|
||||
eval$GenotypeConcordance = .gsa.attemptToLoadFile(fileGenotypeConcordance);
|
||||
eval$MetricsByAc = .gsa.attemptToLoadFile(fileMetricsByAc);
|
||||
eval$MetricsBySample = .gsa.attemptToLoadFile(fileMetricsBySample);
|
||||
eval$Quality_Metrics_by_allele_count = .gsa.attemptToLoadFile(fileQuality_Metrics_by_allele_count);
|
||||
eval$QualityScoreHistogram = .gsa.attemptToLoadFile(fileQualityScoreHistogram);
|
||||
eval$SampleStatistics = .gsa.attemptToLoadFile(fileSampleStatistics);
|
||||
eval$SampleSummaryStatistics = .gsa.attemptToLoadFile(fileSampleSummaryStatistics);
|
||||
eval$SimpleMetricsBySample = .gsa.attemptToLoadFile(fileSimpleMetricsBySample);
|
||||
eval$TiTv = .gsa.attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator);
|
||||
eval$TiTvStats = .gsa.attemptToLoadFile(fileTiTvStats);
|
||||
eval$Variant_Quality_Score = .gsa.attemptToLoadFile(fileVariant_Quality_Score);
|
||||
|
||||
uniqueJexlExpressions = unique(eval$TiTv$jexl_expression);
|
||||
eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]);
|
||||
eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames));
|
||||
eval$CallsetFilteredNames = as.vector(c(
|
||||
paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), sep=""),
|
||||
paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), sep=""))
|
||||
);
|
||||
|
||||
if (!(eval$CallsetFilteredNames[1] %in% unique(eval$TiTv$jexl_expression))) {
|
||||
eval$CallsetFilteredNames[1] = paste("In", eval$CallsetNames[1], "-FilteredIn", eval$CallsetNames[2], sep="");
|
||||
}
|
||||
|
||||
if (!(eval$CallsetFilteredNames[2] %in% unique(eval$TiTv$jexl_expression))) {
|
||||
eval$CallsetFilteredNames[2] = paste("In", eval$CallsetNames[2], "-FilteredIn", eval$CallsetNames[1], sep="");
|
||||
#eval$CallsetFilteredNames[2] = paste(gsub("^(\\w)", "In", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In", eval$CallsetNames[1], perl=TRUE), sep="");
|
||||
}
|
||||
|
||||
eval;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
# Load a table into the specified environment. Make sure that each new table gets a unique name (this allows one to cat a bunch of tables with the same name together and load them into R without each table overwriting the last.
|
||||
.gsa.assignGATKTableToEnvironment <- function(tableName, tableHeader, tableRows, tableEnv) {
|
||||
d = data.frame(tableRows, row.names=NULL, stringsAsFactors=FALSE);
|
||||
colnames(d) = tableHeader;
|
||||
|
||||
for (i in 1:ncol(d)) {
|
||||
v = suppressWarnings(as.numeric(d[,i]));
|
||||
|
||||
if (length(na.omit(as.numeric(v))) == length(d[,i])) {
|
||||
d[,i] = v;
|
||||
}
|
||||
}
|
||||
|
||||
usedNames = ls(envir=tableEnv, pattern=tableName);
|
||||
|
||||
if (length(usedNames) > 0) {
|
||||
tableName = paste(tableName, ".", length(usedNames), sep="");
|
||||
}
|
||||
|
||||
assign(tableName, d, envir=tableEnv);
|
||||
}
|
||||
|
||||
# Read a fixed width line of text into a list.
|
||||
.gsa.splitFixedWidth <- function(line, columnStarts) {
|
||||
splitStartStop <- function(x) {
|
||||
x = substring(x, starts, stops);
|
||||
x = gsub("^[[:space:]]+|[[:space:]]+$", "", x);
|
||||
x;
|
||||
}
|
||||
|
||||
starts = c(1, columnStarts);
|
||||
stops = c(columnStarts - 1, nchar(line));
|
||||
|
||||
sapply(line, splitStartStop)[,1];
|
||||
}
|
||||
|
||||
# Load all GATKReport tables from a file
|
||||
gsa.read.gatkreport <- function(filename) {
|
||||
con = file(filename, "r", blocking = TRUE);
|
||||
lines = readLines(con);
|
||||
close(con);
|
||||
|
||||
tableEnv = new.env();
|
||||
|
||||
tableName = NA;
|
||||
tableHeader = c();
|
||||
tableRows = c();
|
||||
version = NA;
|
||||
|
||||
for (line in lines) {
|
||||
if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) {
|
||||
headerFields = unlist(strsplit(line, "[[:space:]]+"));
|
||||
|
||||
if (!is.na(tableName)) {
|
||||
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
|
||||
}
|
||||
|
||||
tableName = headerFields[2];
|
||||
tableHeader = c();
|
||||
tableRows = c();
|
||||
|
||||
# For differences in versions see
|
||||
# $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
|
||||
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
|
||||
version = "v0.1";
|
||||
|
||||
} else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) {
|
||||
version = "v0.2";
|
||||
columnStarts = c();
|
||||
|
||||
}
|
||||
|
||||
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
|
||||
# do nothing
|
||||
} else if (!is.na(tableName)) {
|
||||
|
||||
if (version == "v0.1") {
|
||||
row = unlist(strsplit(line, "[[:space:]]+"));
|
||||
|
||||
} else if (version == "v0.2") {
|
||||
if (length(tableHeader) == 0) {
|
||||
headerChars = unlist(strsplit(line, ""));
|
||||
# Find the first position of non space characters, excluding the first character
|
||||
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
|
||||
}
|
||||
|
||||
row = .gsa.splitFixedWidth(line, columnStarts);
|
||||
}
|
||||
|
||||
if (length(tableHeader) == 0) {
|
||||
tableHeader = row;
|
||||
} else {
|
||||
tableRows = rbind(tableRows, row);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!is.na(tableName)) {
|
||||
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
|
||||
}
|
||||
|
||||
gatkreport = as.list(tableEnv);
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
gsa.read.squidmetrics = function(project, bylane = FALSE) {
|
||||
suppressMessages(library(ROracle));
|
||||
|
||||
drv = dbDriver("Oracle");
|
||||
con = dbConnect(drv, "REPORTING/REPORTING@ora01:1521/SEQPROD");
|
||||
|
||||
if (bylane) {
|
||||
statement = paste("SELECT * FROM ILLUMINA_PICARD_METRICS WHERE \"Project\" = '", project, "'", sep="");
|
||||
print(statement);
|
||||
|
||||
rs = dbSendQuery(con, statement = statement);
|
||||
d = fetch(rs, n=-1);
|
||||
dbHasCompleted(rs);
|
||||
dbClearResult(rs);
|
||||
} else {
|
||||
statement = paste("SELECT * FROM ILLUMINA_SAMPLE_STATUS_AGG WHERE \"Project\" = '", project, "'", sep="");
|
||||
print(statement);
|
||||
|
||||
rs = dbSendQuery(con, statement = statement);
|
||||
d = fetch(rs, n=-1);
|
||||
dbHasCompleted(rs);
|
||||
dbClearResult(rs);
|
||||
}
|
||||
|
||||
oraCloseDriver(drv);
|
||||
|
||||
subset(d, Project == project);
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
gsa.read.vcf <- function(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE) {
|
||||
headers = readLines(vcffile, n=100);
|
||||
headerline = headers[grep("#CHROM", headers)];
|
||||
header = unlist(strsplit(gsub("#", "", headerline), "\t"))
|
||||
|
||||
d = read.table(vcffile, header=FALSE, skip=skip, nrows=nrows, stringsAsFactors=FALSE);
|
||||
colnames(d) = header;
|
||||
|
||||
if (expandGenotypeFields) {
|
||||
columns = ncol(d);
|
||||
|
||||
offset = columns + 1;
|
||||
for (sampleIndex in 10:columns) {
|
||||
gt = unlist(lapply(strsplit(d[,sampleIndex], ":"), function(x) x[1]));
|
||||
d[,offset] = gt;
|
||||
colnames(d)[offset] = sprintf("%s.GT", colnames(d)[sampleIndex]);
|
||||
|
||||
offset = offset + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return(d);
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
gsa.warn <- function(message) {
|
||||
gsa.message(sprintf("Warning: %s", message));
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
* Edit the help file skeletons in 'man', possibly combining help files
|
||||
for multiple functions.
|
||||
* Put any C/C++/Fortran code in 'src'.
|
||||
* If you have compiled code, add a .First.lib() function in 'R' to load
|
||||
the shared library.
|
||||
* Run R CMD build to build the package tarball.
|
||||
* Run R CMD check to check the package tarball.
|
||||
|
||||
Read "Writing R Extensions" for more information.
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 49 KiB |
|
|
@ -0,0 +1,49 @@
|
|||
\name{gsa.error}
|
||||
\alias{gsa.error}
|
||||
\title{
|
||||
GSA error
|
||||
}
|
||||
\description{
|
||||
Write an error message to standard out with the prefix '[gsalib] Error:', print a traceback, and exit.
|
||||
}
|
||||
\usage{
|
||||
gsa.error(message)
|
||||
}
|
||||
%- maybe also 'usage' for other objects documented here.
|
||||
\arguments{
|
||||
\item{message}{
|
||||
The error message to write.
|
||||
}
|
||||
}
|
||||
\details{
|
||||
%% ~~ If necessary, more details than the description above ~~
|
||||
}
|
||||
\value{
|
||||
%% ~Describe the value returned
|
||||
%% If it is a LIST, use
|
||||
%% \item{comp1 }{Description of 'comp1'}
|
||||
%% \item{comp2 }{Description of 'comp2'}
|
||||
%% ...
|
||||
}
|
||||
\references{
|
||||
%% ~put references to the literature/web site here ~
|
||||
}
|
||||
\author{
|
||||
Kiran Garimella
|
||||
}
|
||||
\note{
|
||||
%% ~~further notes~~
|
||||
}
|
||||
|
||||
%% ~Make other sections like Warning with \section{Warning }{....} ~
|
||||
|
||||
\seealso{
|
||||
%% ~~objects to See Also as \code{\link{help}}, ~~~
|
||||
}
|
||||
\examples{
|
||||
gsa.error("This is a message");
|
||||
}
|
||||
% Add one or more standard keywords, see file 'KEYWORDS' in the
|
||||
% R documentation directory.
|
||||
\keyword{ ~kwd1 }
|
||||
\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
\name{gsa.getargs}
|
||||
\alias{gsa.getargs}
|
||||
\title{
|
||||
Get script arguments
|
||||
}
|
||||
\description{
|
||||
Get script arguments given a list object specifying arguments and documentation. Can be used in command-line or interactive mode. This is helpful when developing scripts in interactive mode that will eventually become command-line programs. If no arguments are specified or help is requested in command-line mode, the script will print out a usage statement with available arguments and exit.
|
||||
}
|
||||
\usage{
|
||||
gsa.getargs(argspec, doc = NA)
|
||||
}
|
||||
\arguments{
|
||||
\item{argspec}{
|
||||
A list object. Each key is an argument name. The value is another list object with a 'value' and 'doc' keys. For example:
|
||||
\preformatted{argspec = list(
|
||||
arg1 = list(value=10, doc="Info for optional arg1"),
|
||||
arg2 = list(value=NA, doc="Info for required arg2")
|
||||
);
|
||||
}
|
||||
|
||||
If the value provided is NA, the argument is considered required and must be specified when the script is invoked. For command-line mode, this means the argument must be specified on the command-line. In interactive mode, there are two ways of specifying these arguments. First, if a properly formatted list argument called 'cmdargs' is present in the current environment (i.e. the object returned by gsa.getargs() from a previous invocation), the value is taken from this object. Otherwise, the argument is prompted for.
|
||||
}
|
||||
|
||||
\item{doc}{
|
||||
An optional string succinctly documenting the purpose of the script.
|
||||
}
|
||||
}
|
||||
\details{
|
||||
Interactive scripts typically make use of hardcoded filepaths and parameter settings. This makes testing easy, but generalization to non-interactive mode more difficult. This utility provides a mechanism for writing scripts that work properly in both interactive and command-line modes.
|
||||
|
||||
To use this method, specify a list with key-value pairs representing the arguments as specified above. In command-line mode, if no arguments are specified or the user specifies '-h' or '-help' anywhere on the command string, a help message indicating available arguments, their default values, and some documentation about the argument are provided.
|
||||
}
|
||||
\value{
|
||||
Returns a list with keys matching the argspec and values representing the specified arguments.
|
||||
|
||||
\item{arg1 }{Value for argument 1}
|
||||
\item{arg2 }{Value for argument 2}
|
||||
...etc.
|
||||
}
|
||||
\references{
|
||||
%% ~put references to the literature/web site here ~
|
||||
}
|
||||
\author{
|
||||
Kiran Garimella
|
||||
}
|
||||
\examples{
|
||||
argspec = list(
|
||||
file = list(value="/my/test.vcf", doc="VCF file"),
|
||||
verbose = list(value=0, doc="If 1, set verbose mode"),
|
||||
test2 = list(value=2.3e9, doc="Another argument that does stuff")
|
||||
);
|
||||
|
||||
cmdargs = gsa.getargs(argspec, doc="My test program");
|
||||
|
||||
print(cmdargs$file); # will print '[1] "/my/test.vcf"'
|
||||
}
|
||||
\keyword{ ~kwd1 }
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
\name{gsa.message}
|
||||
\alias{gsa.message}
|
||||
\title{
|
||||
GSA message
|
||||
}
|
||||
\description{
|
||||
Write a message to standard out with the prefix '[gsalib]'.
|
||||
}
|
||||
\usage{
|
||||
gsa.message(message)
|
||||
}
|
||||
\arguments{
|
||||
\item{message}{
|
||||
The message to write.
|
||||
}
|
||||
}
|
||||
\details{
|
||||
%% ~~ If necessary, more details than the description above ~~
|
||||
}
|
||||
\value{
|
||||
%% ~Describe the value returned
|
||||
%% If it is a LIST, use
|
||||
%% \item{comp1 }{Description of 'comp1'}
|
||||
%% \item{comp2 }{Description of 'comp2'}
|
||||
%% ...
|
||||
}
|
||||
\references{
|
||||
%% ~put references to the literature/web site here ~
|
||||
}
|
||||
\author{
|
||||
Kiran Garimella
|
||||
}
|
||||
\note{
|
||||
%% ~~further notes~~
|
||||
}
|
||||
|
||||
\seealso{
|
||||
%% ~~objects to See Also as \code{\link{help}}, ~~~
|
||||
}
|
||||
\examples{
|
||||
## Write message to stdout
|
||||
gsa.message("This is a message");
|
||||
}
|
||||
\keyword{ ~kwd1 }
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
\name{gsa.plot.venn}
|
||||
\alias{gsa.plot.venn}
|
||||
\title{
|
||||
Plot a proportional venn diagram
|
||||
}
|
||||
\description{
|
||||
Plot a proportional venn diagram (two or three-way venns allowed)
|
||||
}
|
||||
\usage{
|
||||
gsa.plot.venn(a, b, c = 0, a_and_b, a_and_c = 0, b_and_c = 0, col = c("#FF6342", "#63C6DE", "#ADDE63"), pos = c(0.2, 0.2, 0.8, 0.82), debug = 0)
|
||||
}
|
||||
\arguments{
|
||||
\item{a}{
|
||||
size of 'a' circle
|
||||
}
|
||||
\item{b}{
|
||||
size of 'b' circle
|
||||
}
|
||||
\item{c}{
|
||||
size of 'c' circle
|
||||
}
|
||||
\item{a_and_b}{
|
||||
size of a and b overlap
|
||||
}
|
||||
\item{a_and_c}{
|
||||
size of a and c overlap
|
||||
}
|
||||
\item{b_and_c}{
|
||||
size of b and c overlap
|
||||
}
|
||||
\item{col}{
|
||||
vector of colors for each venn piece
|
||||
}
|
||||
\item{pos}{
|
||||
vector of positional elements
|
||||
}
|
||||
\item{debug}{
|
||||
if 1, set debug mode and print useful information
|
||||
}
|
||||
}
|
||||
\details{
|
||||
Plots a two-way or three-way proportional Venn diagram. Internally, this method uses the Google Chart API to generate the diagram, then renders it into the plot window where it can be annotated in interesting ways.
|
||||
}
|
||||
\value{
|
||||
%% ~Describe the value returned
|
||||
%% If it is a LIST, use
|
||||
%% \item{comp1 }{Description of 'comp1'}
|
||||
%% \item{comp2 }{Description of 'comp2'}
|
||||
%% ...
|
||||
}
|
||||
\references{
|
||||
}
|
||||
\author{
|
||||
Kiran Garimella
|
||||
}
|
||||
\note{
|
||||
%% ~~further notes~~
|
||||
}
|
||||
|
||||
%% ~Make other sections like Warning with \section{Warning }{....} ~
|
||||
|
||||
\seealso{
|
||||
%% ~~objects to See Also as \code{\link{help}}, ~~~
|
||||
}
|
||||
\examples{
|
||||
## Plot a two-way Venn diagram
|
||||
gsa.plot.venn(1000, 750, 0, 400);
|
||||
|
||||
## Plot a three-way Venn diagram
|
||||
gsa.plot.venn(1000, 750, 900, 400, 650, 500);
|
||||
}
|
||||
% Add one or more standard keywords, see file 'KEYWORDS' in the
|
||||
% R documentation directory.
|
||||
\keyword{ ~kwd1 }
|
||||
\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
\name{gsa.read.eval}
|
||||
\alias{gsa.read.eval}
|
||||
\title{
|
||||
Read a VariantEval file
|
||||
}
|
||||
\description{
|
||||
Read a VariantEval file that's output in R format.
|
||||
}
|
||||
\usage{
|
||||
gsa.read.eval(evalRoot)
|
||||
}
|
||||
%- maybe also 'usage' for other objects documented here.
|
||||
\arguments{
|
||||
\item{evalRoot}{
|
||||
%% ~~Describe \code{evalRoot} here~~
|
||||
}
|
||||
}
|
||||
\details{
|
||||
%% ~~ If necessary, more details than the description above ~~
|
||||
}
|
||||
\value{
|
||||
%% ~Describe the value returned
|
||||
%% If it is a LIST, use
|
||||
%% \item{comp1 }{Description of 'comp1'}
|
||||
%% \item{comp2 }{Description of 'comp2'}
|
||||
%% ...
|
||||
}
|
||||
\references{
|
||||
%% ~put references to the literature/web site here ~
|
||||
}
|
||||
\author{
|
||||
%% ~~who you are~~
|
||||
}
|
||||
\note{
|
||||
%% ~~further notes~~
|
||||
}
|
||||
|
||||
%% ~Make other sections like Warning with \section{Warning }{....} ~
|
||||
|
||||
\seealso{
|
||||
%% ~~objects to See Also as \code{\link{help}}, ~~~
|
||||
}
|
||||
\examples{
|
||||
##---- Should be DIRECTLY executable !! ----
|
||||
##-- ==> Define data, use random,
|
||||
##-- or do help(data=index) for the standard data sets.
|
||||
|
||||
## The function is currently defined as
|
||||
function(evalRoot) {
|
||||
fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep="");
|
||||
fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep="");
|
||||
fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep="");
|
||||
fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep="");
|
||||
fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep="");
|
||||
fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep="");
|
||||
fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep="");
|
||||
fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep="");
|
||||
fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep="");
|
||||
fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep="");
|
||||
fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep="");
|
||||
fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep="");
|
||||
fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep="");
|
||||
fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep="");
|
||||
|
||||
eval = list(
|
||||
AlleleCountStats = NA,
|
||||
CompOverlap = NA,
|
||||
CountVariants = NA,
|
||||
GenotypeConcordance = NA,
|
||||
MetricsByAc = NA,
|
||||
MetricsBySample = NA,
|
||||
Quality_Metrics_by_allele_count = NA,
|
||||
QualityScoreHistogram = NA,
|
||||
SampleStatistics = NA,
|
||||
SampleSummaryStatistics = NA,
|
||||
SimpleMetricsBySample = NA,
|
||||
TiTv = NA,
|
||||
TiTvStats = NA,
|
||||
Variant_Quality_Score = NA,
|
||||
|
||||
CallsetNames = c(),
|
||||
CallsetOnlyNames = c(),
|
||||
CallsetFilteredNames = c()
|
||||
);
|
||||
|
||||
eval$AlleleCountStats = .attemptToLoadFile(fileAlleleCountStats);
|
||||
eval$CompOverlap = .attemptToLoadFile(fileCompOverlap);
|
||||
eval$CountVariants = .attemptToLoadFile(fileCountVariants);
|
||||
eval$GenotypeConcordance = .attemptToLoadFile(fileGenotypeConcordance);
|
||||
eval$MetricsByAc = .attemptToLoadFile(fileMetricsByAc);
|
||||
eval$MetricsBySample = .attemptToLoadFile(fileMetricsBySample);
|
||||
eval$Quality_Metrics_by_allele_count = .attemptToLoadFile(fileQuality_Metrics_by_allele_count);
|
||||
eval$QualityScoreHistogram = .attemptToLoadFile(fileQualityScoreHistogram);
|
||||
eval$SampleStatistics = .attemptToLoadFile(fileSampleStatistics);
|
||||
eval$SampleSummaryStatistics = .attemptToLoadFile(fileSampleSummaryStatistics);
|
||||
eval$SimpleMetricsBySample = .attemptToLoadFile(fileSimpleMetricsBySample);
|
||||
eval$TiTv = .attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator);
|
||||
eval$TiTvStats = .attemptToLoadFile(fileTiTvStats);
|
||||
eval$Variant_Quality_Score = .attemptToLoadFile(fileVariant_Quality_Score);
|
||||
|
||||
uniqueJexlExpressions = unique(eval$TiTv$jexl_expression);
|
||||
eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]);
|
||||
eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames));
|
||||
eval$CallsetFilteredNames = as.vector(c());
|
||||
eval;
|
||||
}
|
||||
}
|
||||
% Add one or more standard keywords, see file 'KEYWORDS' in the
|
||||
% R documentation directory.
|
||||
\keyword{ ~kwd1 }
|
||||
\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
\name{gsa.read.gatkreport}
|
||||
\alias{gsa.read.gatkreport}
|
||||
\title{
|
||||
gsa.read.gatkreport
|
||||
}
|
||||
\description{
|
||||
Reads a GATKReport file - a multi-table document - and loads each table as a separate data.frame object in a list.
|
||||
}
|
||||
\usage{
|
||||
gsa.read.gatkreport(filename)
|
||||
}
|
||||
\arguments{
|
||||
\item{filename}{
|
||||
The path to the GATKReport file.
|
||||
}
|
||||
}
|
||||
\details{
|
||||
The GATKReport format replaces the multi-file output format used by many GATK tools and provides a single, consolidated file format. This format accomodates multiple tables and is still R-loadable - through this function.
|
||||
|
||||
The file format looks like this:
|
||||
\preformatted{##:GATKReport.v0.1 TableName : The description of the table
|
||||
col1 col2 col3
|
||||
0 0.007451835696110506 25.474613284804366
|
||||
1 0.002362777171937477 29.844949954504095
|
||||
2 9.087604507451836E-4 32.87590975254731
|
||||
3 5.452562704471102E-4 34.498999090081895
|
||||
4 9.087604507451836E-4 35.14831665150137
|
||||
}
|
||||
|
||||
}
|
||||
\value{
|
||||
Returns a list object, where each key is the TableName and the value is the data.frame object with the contents of the table. If multiple tables with the same name exist, each one after the first will be given names of "TableName.v1", "TableName.v2", ..., "TableName.vN".
|
||||
%% ~Describe the value returned
|
||||
%% If it is a LIST, use
|
||||
%% \item{comp1 }{Description of 'comp1'}
|
||||
%% \item{comp2 }{Description of 'comp2'}
|
||||
%% ...
|
||||
}
|
||||
\references{
|
||||
%% ~put references to the literature/web site here ~
|
||||
}
|
||||
\author{
|
||||
Kiran Garimella
|
||||
}
|
||||
\note{
|
||||
%% ~~further notes~~
|
||||
}
|
||||
|
||||
\seealso{
|
||||
%% ~~objects to See Also as \code{\link{help}}, ~~~
|
||||
}
|
||||
\examples{
|
||||
report = gsa.read.gatkreport("/path/to/my/output.gatkreport");
|
||||
}
|
||||
\keyword{ ~kwd1 }
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
\name{gsa.read.squidmetrics}
|
||||
\alias{gsa.read.squidmetrics}
|
||||
\title{
|
||||
gsa.read.squidmetrics
|
||||
}
|
||||
\description{
|
||||
Reads metrics for a specified SQUID project into a dataframe.
|
||||
}
|
||||
\usage{
|
||||
gsa.read.squidmetrics("C315")
|
||||
}
|
||||
\arguments{
|
||||
\item{project}{
|
||||
The project for which metrics should be obtained.
|
||||
}
|
||||
\item{bylane}{
|
||||
If TRUE, obtains per-lane metrics rather than the default per-sample metrics.
|
||||
}
|
||||
}
|
||||
\details{
|
||||
%% ~~ If necessary, more details than the description above ~~
|
||||
}
|
||||
\value{
|
||||
%% ~Describe the value returned
|
||||
%% If it is a LIST, use
|
||||
%% \item{comp1 }{Description of 'comp1'}
|
||||
%% \item{comp2 }{Description of 'comp2'}
|
||||
%% ...
|
||||
Returns a data frame with samples (or lanes) as the row and the metric as the column.
|
||||
}
|
||||
\references{
|
||||
%% ~put references to the literature/web site here ~
|
||||
}
|
||||
\author{
|
||||
Kiran Garimella
|
||||
}
|
||||
\note{
|
||||
This method will only work within the Broad Institute internal network.
|
||||
}
|
||||
|
||||
\seealso{
|
||||
%% ~~objects to See Also as \code{\link{help}}, ~~~
|
||||
}
|
||||
\examples{
|
||||
## Obtain metrics for project C315.
|
||||
d = gsa.read.squidmetrics("C315");
|
||||
}
|
||||
\keyword{ ~kwd1 }
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
\name{gsa.read.vcf}
|
||||
\alias{gsa.read.vcf}
|
||||
\title{
|
||||
gsa.read.vcf
|
||||
}
|
||||
\description{
|
||||
Reads a VCF file into a table. Optionally expands genotype columns into separate columns containing the genotype, separate from the other fields specified in the FORMAT field.
|
||||
}
|
||||
\usage{
|
||||
gsa.read.vcf(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{vcffile}{
|
||||
The path to the vcf file.
|
||||
}
|
||||
\item{skip}{
|
||||
The number of lines of the data file to skip before beginning to read data.
|
||||
}
|
||||
\item{nrows}{
|
||||
The maximum number of rows to read in. Negative and other invalid values are ignored.
|
||||
}
|
||||
\item{expandGenotypeFields}{
|
||||
If TRUE, adds an additional column per sample containing just the genotype.
|
||||
}
|
||||
}
|
||||
\details{
|
||||
The VCF format is the standard variant call file format used in the GATK. This function reads that data in as a table for easy analysis.
|
||||
}
|
||||
\value{
|
||||
Returns a data.frame object, where each column corresponds to the columns in the VCF file.
|
||||
%% ~Describe the value returned
|
||||
%% If it is a LIST, use
|
||||
%% \item{comp1 }{Description of 'comp1'}
|
||||
%% \item{comp2 }{Description of 'comp2'}
|
||||
%% ...
|
||||
}
|
||||
\references{
|
||||
%% ~put references to the literature/web site here ~
|
||||
}
|
||||
\author{
|
||||
Kiran Garimella
|
||||
}
|
||||
\note{
|
||||
%% ~~further notes~~
|
||||
}
|
||||
|
||||
\seealso{
|
||||
%% ~~objects to See Also as \code{\link{help}}, ~~~
|
||||
}
|
||||
\examples{
|
||||
vcf = gsa.read.vcf("/path/to/my/output.vcf");
|
||||
}
|
||||
\keyword{ ~kwd1 }
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
\name{gsa.warn}
|
||||
\alias{gsa.warn}
|
||||
\title{
|
||||
GSA warn
|
||||
}
|
||||
\description{
|
||||
Write a warning message to standard out with the prefix '[gsalib] Warning:'.
|
||||
}
|
||||
\usage{
|
||||
gsa.warn(message)
|
||||
}
|
||||
%- maybe also 'usage' for other objects documented here.
|
||||
\arguments{
|
||||
\item{message}{
|
||||
The warning message to write.
|
||||
}
|
||||
}
|
||||
\details{
|
||||
%% ~~ If necessary, more details than the description above ~~
|
||||
}
|
||||
\value{
|
||||
%% ~Describe the value returned
|
||||
%% If it is a LIST, use
|
||||
%% \item{comp1 }{Description of 'comp1'}
|
||||
%% \item{comp2 }{Description of 'comp2'}
|
||||
%% ...
|
||||
}
|
||||
\references{
|
||||
%% ~put references to the literature/web site here ~
|
||||
}
|
||||
\author{
|
||||
Kiran Garimella
|
||||
}
|
||||
\note{
|
||||
%% ~~further notes~~
|
||||
}
|
||||
|
||||
\seealso{
|
||||
%% ~~objects to See Also as \code{\link{help}}, ~~~
|
||||
}
|
||||
\examples{
|
||||
## Write message to stdout
|
||||
gsa.warn("This is a warning message");
|
||||
}
|
||||
\keyword{ ~kwd1 }
|
||||
\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
\name{gsalib-package}
|
||||
\alias{gsalib-package}
|
||||
\alias{gsalib}
|
||||
\docType{package}
|
||||
\title{
|
||||
GATK utility analysis functions
|
||||
}
|
||||
\description{
|
||||
Utility functions for analyzing GATK-processed NGS data
|
||||
}
|
||||
\details{
|
||||
This package contains functions for working with GATK-processed NGS data. These functions include a command-line parser that also allows a script to be used in interactive mode (good for developing scripts that will eventually be automated), a proportional Venn diagram generator, convenience methods for parsing VariantEval output, and more.
|
||||
}
|
||||
\author{
|
||||
Genome Sequencing and Analysis Group
|
||||
|
||||
Medical and Population Genetics Program
|
||||
|
||||
Maintainer: Kiran Garimella
|
||||
}
|
||||
\references{
|
||||
GSA wiki page: http://www.broadinstitute.org/gsa/wiki
|
||||
|
||||
GATK help forum: http://www.getsatisfaction.com/gsa
|
||||
}
|
||||
\examples{
|
||||
## get script arguments in interactive and non-interactive mode
|
||||
cmdargs = gsa.getargs( list(
|
||||
requiredArg1 = list(
|
||||
value = NA,
|
||||
doc = "Documentation for requiredArg1"
|
||||
),
|
||||
|
||||
optionalArg1 = list(
|
||||
value = 3e9,
|
||||
doc = "Documentation for optionalArg1"
|
||||
)
|
||||
) );
|
||||
|
||||
## plot a proportional Venn diagram
|
||||
gsa.plot.venn(500, 250, 0, 100);
|
||||
|
||||
## read a GATKReport file
|
||||
report = gsa.gatk.report("/path/to/my/output.gatkreport");
|
||||
|
||||
## emit a message
|
||||
gsa.message("This is a message");
|
||||
|
||||
## emit a warning message
|
||||
gsa.message("This is a warning message");
|
||||
|
||||
## emit an error message
|
||||
gsa.message("This is an error message");
|
||||
|
||||
## read the SQUID metrics for a given sequencing project (internal to the Broad only)
|
||||
s = gsa.read.squidmetrics("C427");
|
||||
|
||||
## read command-line arguments
|
||||
cmdargs = gsa.getargs(
|
||||
list(
|
||||
file = list(value="/my/test.vcf", doc="VCF file"),
|
||||
verbose = list(value=0, doc="If 1, set verbose mode"),
|
||||
test2 = list(value=2.3e9, doc="Another argument that does stuff")
|
||||
),
|
||||
doc="My test program"
|
||||
);
|
||||
}
|
||||
\keyword{ package }
|
||||
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package net.sf.picard.reference;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*;
|
||||
|
|
@ -39,8 +38,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
* Produces fai file with same output as samtools faidx
|
||||
*/
|
||||
public class FastaSequenceIndexBuilder {
|
||||
public File fastaFile;
|
||||
ReferenceDataSourceProgressListener progress; // interface that provides a method for updating user on progress of reading file
|
||||
final public File fastaFile;
|
||||
final boolean printProgress;
|
||||
|
||||
// keep track of location in file
|
||||
long bytesRead, endOfLastLine, lastTimestamp, fileLength; // initialized to -1 to keep 0-indexed position in file;
|
||||
|
|
@ -55,10 +54,10 @@ public class FastaSequenceIndexBuilder {
|
|||
public enum Status { NONE, CONTIG, FIRST_SEQ_LINE, SEQ_LINE, COMMENT }
|
||||
Status status = Status.NONE; // keeps state of what is currently being read. better to use int instead of enum?
|
||||
|
||||
public FastaSequenceIndexBuilder(File fastaFile, ReferenceDataSourceProgressListener progress) {
|
||||
this.progress = progress;
|
||||
public FastaSequenceIndexBuilder(File fastaFile, boolean printProgress) {
|
||||
this.fastaFile = fastaFile;
|
||||
fileLength = fastaFile.length();
|
||||
this.printProgress = printProgress;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -252,8 +251,8 @@ public class FastaSequenceIndexBuilder {
|
|||
|
||||
if (System.currentTimeMillis() - lastTimestamp > 10000) {
|
||||
int percentProgress = (int) (100*bytesRead/fileLength);
|
||||
if (progress != null)
|
||||
progress.percentProgress(percentProgress);
|
||||
if (printProgress)
|
||||
System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percentProgress));
|
||||
lastTimestamp = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -174,7 +174,8 @@ public class ArgumentDefinitions implements Iterable<ArgumentDefinition> {
|
|||
|
||||
static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() {
|
||||
public boolean matches( ArgumentDefinition definition, Object key ) {
|
||||
return definition.validation != null;
|
||||
// We can perform some sort of validation for anything that isn't a flag.
|
||||
return !definition.isFlag;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
public final String label;
|
||||
|
||||
/**
|
||||
* Maps indicies of command line arguments to values paired with that argument.
|
||||
* Maps indices of command line arguments to values paired with that argument.
|
||||
*/
|
||||
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,8 @@
|
|||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.gatk.walkers.Multiplex;
|
||||
import org.broadinstitute.sting.gatk.walkers.Multiplexer;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
|
|
@ -33,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.*;
|
||||
import java.util.*;
|
||||
|
|
@ -109,7 +112,7 @@ public abstract class ArgumentTypeDescriptor {
|
|||
* @return The parsed object.
|
||||
*/
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) {
|
||||
return parse(parsingEngine, source, source.field.getType(), matches);
|
||||
return parse(parsingEngine, source, source.field.getGenericType(), matches);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -131,18 +134,18 @@ public abstract class ArgumentTypeDescriptor {
|
|||
protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) {
|
||||
Annotation argumentAnnotation = getArgumentAnnotation(source);
|
||||
return new ArgumentDefinition( ArgumentIOType.getIOType(argumentAnnotation),
|
||||
source.field.getType(),
|
||||
ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
|
||||
ArgumentDefinition.getShortName(argumentAnnotation),
|
||||
ArgumentDefinition.getDoc(argumentAnnotation),
|
||||
source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
|
||||
source.isFlag(),
|
||||
source.isMultiValued(),
|
||||
source.isHidden(),
|
||||
getCollectionComponentType(source.field),
|
||||
ArgumentDefinition.getExclusiveOf(argumentAnnotation),
|
||||
ArgumentDefinition.getValidationRegex(argumentAnnotation),
|
||||
getValidOptions(source) );
|
||||
source.field.getType(),
|
||||
ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
|
||||
ArgumentDefinition.getShortName(argumentAnnotation),
|
||||
ArgumentDefinition.getDoc(argumentAnnotation),
|
||||
source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
|
||||
source.isFlag(),
|
||||
source.isMultiValued(),
|
||||
source.isHidden(),
|
||||
makeRawTypeIfNecessary(getCollectionComponentType(source.field)),
|
||||
ArgumentDefinition.getExclusiveOf(argumentAnnotation),
|
||||
ArgumentDefinition.getValidationRegex(argumentAnnotation),
|
||||
getValidOptions(source) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -151,7 +154,7 @@ public abstract class ArgumentTypeDescriptor {
|
|||
* @return The parameterized component type, or String.class if the parameterized type could not be found.
|
||||
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
|
||||
*/
|
||||
protected Class getCollectionComponentType( Field field ) {
|
||||
protected Type getCollectionComponentType( Field field ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
@ -162,7 +165,7 @@ public abstract class ArgumentTypeDescriptor {
|
|||
* @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection.
|
||||
* @return The individual parsed object matching the argument match with Class type.
|
||||
*/
|
||||
public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches );
|
||||
public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches );
|
||||
|
||||
/**
|
||||
* If the argument source only accepts a small set of options, populate the returned list with
|
||||
|
|
@ -273,6 +276,113 @@ public abstract class ArgumentTypeDescriptor {
|
|||
public static boolean isArgumentHidden(Field field) {
|
||||
return field.isAnnotationPresent(Hidden.class);
|
||||
}
|
||||
|
||||
public Class makeRawTypeIfNecessary(Type t) {
|
||||
if ( t == null )
|
||||
return null;
|
||||
else if ( t instanceof ParameterizedType )
|
||||
return (Class)((ParameterizedType) t).getRawType();
|
||||
else if ( t instanceof Class ) {
|
||||
return (Class)t;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parser for RodBinding objects
|
||||
*/
|
||||
class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
/**
|
||||
* We only want RodBinding class objects
|
||||
* @param type The type to check.
|
||||
* @return true if the provided class is a RodBinding.class
|
||||
*/
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
return isRodBinding(type);
|
||||
}
|
||||
|
||||
public static boolean isRodBinding( Class type ) {
|
||||
return RodBinding.class.isAssignableFrom(type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||
String value = getArgumentValue( defaultDefinition, matches );
|
||||
try {
|
||||
String name = defaultDefinition.fullName;
|
||||
String tribbleType = null;
|
||||
Tags tags = getArgumentTags(matches);
|
||||
// must have one or two tag values here
|
||||
if ( tags.getPositionalTags().size() > 2 ) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Unexpected number of positional tags for argument %s : %s. " +
|
||||
"Rod bindings only suport -X:type and -X:name,type argument styles",
|
||||
value, source.field.getName()));
|
||||
} if ( tags.getPositionalTags().size() == 2 ) {
|
||||
// -X:name,type style
|
||||
name = tags.getPositionalTags().get(0);
|
||||
tribbleType = tags.getPositionalTags().get(1);
|
||||
} else {
|
||||
// case with 0 or 1 positional tags
|
||||
FeatureManager manager = new FeatureManager();
|
||||
|
||||
// -X:type style is a type when we cannot determine the type dynamically
|
||||
String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
|
||||
if ( tag1 != null ) {
|
||||
if ( manager.getByName(tag1) != null ) // this a type
|
||||
tribbleType = tag1;
|
||||
else
|
||||
name = tag1;
|
||||
}
|
||||
|
||||
if ( tribbleType == null ) {
|
||||
// try to determine the file type dynamically
|
||||
File file = new File(value);
|
||||
if ( file.canRead() && file.isFile() ) {
|
||||
FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
|
||||
if ( featureDescriptor != null ) {
|
||||
tribbleType = featureDescriptor.getName();
|
||||
logger.warn("Dynamically determined type of " + file + " to be " + tribbleType);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( tribbleType == null ) // error handling
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Could not parse argument %s with value %s",
|
||||
defaultDefinition.fullName, value));
|
||||
|
||||
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
||||
Class parameterType = getParameterizedTypeClass(type);
|
||||
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||
parsingEngine.addTags(result,tags);
|
||||
parsingEngine.addRodBinding(result);
|
||||
return result;
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Failed to parse value %s for argument %s.",
|
||||
value, source.field.getName()));
|
||||
} catch (Exception e) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Failed to parse value %s for argument %s.",
|
||||
value, source.field.getName()));
|
||||
}
|
||||
}
|
||||
|
||||
private Class getParameterizedTypeClass(Type t) {
|
||||
if ( t instanceof ParameterizedType ) {
|
||||
ParameterizedType parameterizedType = (ParameterizedType)t;
|
||||
if ( parameterizedType.getActualTypeArguments().length != 1 )
|
||||
throw new ReviewedStingException("BUG: more than 1 generic type found on class" + t);
|
||||
return (Class)parameterizedType.getActualTypeArguments()[0];
|
||||
} else
|
||||
throw new ReviewedStingException("BUG: could not find generic type on class " + t);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -282,9 +392,10 @@ public abstract class ArgumentTypeDescriptor {
|
|||
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
if( type.isPrimitive() ) return true;
|
||||
if( type.isEnum() ) return true;
|
||||
if( primitiveToWrapperMap.containsValue(type) ) return true;
|
||||
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
|
||||
if ( type.isPrimitive() ) return true;
|
||||
if ( type.isEnum() ) return true;
|
||||
if ( primitiveToWrapperMap.containsValue(type) ) return true;
|
||||
|
||||
try {
|
||||
type.getConstructor(String.class);
|
||||
|
|
@ -298,7 +409,8 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type fulltype, ArgumentMatches matches) {
|
||||
Class type = makeRawTypeIfNecessary(fulltype);
|
||||
if (source.isFlag())
|
||||
return true;
|
||||
|
||||
|
|
@ -339,7 +451,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
throw e;
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new UserException.CommandLineException(String.format("Failed to parse value %s for argument %s. This is most commonly caused by providing an incorrect data type (e.g. a double when an int is required)",
|
||||
value, source.field.getName()));
|
||||
value, source.field.getName()));
|
||||
} catch (Exception e) {
|
||||
throw new DynamicClassResolutionException(String.class, e);
|
||||
}
|
||||
|
|
@ -351,7 +463,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* A mapping of the primitive types to their associated wrapper classes. Is there really no way to infer
|
||||
|
|
@ -382,10 +494,10 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) {
|
||||
Class componentType;
|
||||
public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Type fulltype, ArgumentMatches matches) {
|
||||
Class type = makeRawTypeIfNecessary(fulltype);
|
||||
Type componentType;
|
||||
Object result;
|
||||
Tags tags;
|
||||
|
||||
if( Collection.class.isAssignableFrom(type) ) {
|
||||
|
||||
|
|
@ -399,7 +511,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
componentType = getCollectionComponentType( source.field );
|
||||
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType);
|
||||
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
|
||||
|
||||
Collection collection;
|
||||
try {
|
||||
|
|
@ -428,7 +540,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
else if( type.isArray() ) {
|
||||
componentType = type.getComponentType();
|
||||
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType);
|
||||
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
|
||||
|
||||
// Assemble a collection of individual values used in this computation.
|
||||
Collection<ArgumentMatch> values = new ArrayList<ArgumentMatch>();
|
||||
|
|
@ -436,7 +548,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
for( ArgumentMatch value: match )
|
||||
values.add(value);
|
||||
|
||||
result = Array.newInstance(componentType,values.size());
|
||||
result = Array.newInstance(makeRawTypeIfNecessary(componentType),values.size());
|
||||
|
||||
int i = 0;
|
||||
for( ArgumentMatch value: values ) {
|
||||
|
|
@ -459,16 +571,16 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
|
||||
*/
|
||||
@Override
|
||||
protected Class getCollectionComponentType( Field field ) {
|
||||
// If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
|
||||
if( field.getGenericType() instanceof ParameterizedType) {
|
||||
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
|
||||
if( parameterizedType.getActualTypeArguments().length > 1 )
|
||||
throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
|
||||
return (Class)parameterizedType.getActualTypeArguments()[0];
|
||||
}
|
||||
else
|
||||
return String.class;
|
||||
protected Type getCollectionComponentType( Field field ) {
|
||||
// If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
|
||||
if( field.getGenericType() instanceof ParameterizedType) {
|
||||
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
|
||||
if( parameterizedType.getActualTypeArguments().length > 1 )
|
||||
throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
|
||||
return parameterizedType.getActualTypeArguments()[0];
|
||||
}
|
||||
else
|
||||
return String.class;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -515,7 +627,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
throw new ReviewedStingException("No multiplexed ids available");
|
||||
|
||||
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
|
||||
Class componentType = getCollectionComponentType(source.field);
|
||||
Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field));
|
||||
ArgumentTypeDescriptor componentTypeDescriptor = parsingEngine.selectBestTypeDescriptor(componentType);
|
||||
|
||||
for(Object id: multiplexedIds) {
|
||||
|
|
@ -529,13 +641,13 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||
if(multiplexedIds == null)
|
||||
throw new ReviewedStingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first.");
|
||||
|
||||
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
|
||||
|
||||
Class componentType = getCollectionComponentType(source.field);
|
||||
Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field));
|
||||
|
||||
|
||||
for(Object id: multiplexedIds) {
|
||||
|
|
@ -606,7 +718,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
|
||||
*/
|
||||
@Override
|
||||
protected Class getCollectionComponentType( Field field ) {
|
||||
protected Type getCollectionComponentType( Field field ) {
|
||||
// Multiplex arguments must resolve to maps from which the clp should extract the second type.
|
||||
if( field.getGenericType() instanceof ParameterizedType) {
|
||||
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
|
|
@ -41,6 +42,11 @@ import java.util.*;
|
|||
* A parser for Sting command-line arguments.
|
||||
*/
|
||||
public class ParsingEngine {
|
||||
/**
|
||||
* The loaded argument sources along with their back definitions.
|
||||
*/
|
||||
private Map<ArgumentDefinition,ArgumentSource> argumentSourcesByDefinition = new HashMap<ArgumentDefinition,ArgumentSource>();
|
||||
|
||||
/**
|
||||
* A list of defined arguments against which command lines are matched.
|
||||
* Package protected for testing access.
|
||||
|
|
@ -59,11 +65,17 @@ public class ParsingEngine {
|
|||
*/
|
||||
private List<ParsingMethod> parsingMethods = new ArrayList<ParsingMethod>();
|
||||
|
||||
/**
|
||||
* All of the RodBinding objects we've seen while parsing
|
||||
*/
|
||||
private List<RodBinding> rodBindings = new ArrayList<RodBinding>();
|
||||
|
||||
/**
|
||||
* Class reference to the different types of descriptors that the create method can create.
|
||||
* The type of set used must be ordered (but not necessarily sorted).
|
||||
*/
|
||||
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
|
||||
new RodBindingArgumentTypeDescriptor(),
|
||||
new CompoundArgumentTypeDescriptor(),
|
||||
new MultiplexArgumentTypeDescriptor()) );
|
||||
|
||||
|
|
@ -80,6 +92,7 @@ public class ParsingEngine {
|
|||
protected static Logger logger = Logger.getLogger(ParsingEngine.class);
|
||||
|
||||
public ParsingEngine( CommandLineProgram clp ) {
|
||||
RodBinding.resetNameCounter();
|
||||
parsingMethods.add( ParsingMethod.FullNameParsingMethod );
|
||||
parsingMethods.add( ParsingMethod.ShortNameParsingMethod );
|
||||
|
||||
|
|
@ -107,8 +120,13 @@ public class ParsingEngine {
|
|||
*/
|
||||
public void addArgumentSource( String sourceName, Class sourceClass ) {
|
||||
List<ArgumentDefinition> argumentsFromSource = new ArrayList<ArgumentDefinition>();
|
||||
for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) )
|
||||
argumentsFromSource.addAll( argumentSource.createArgumentDefinitions() );
|
||||
for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) {
|
||||
List<ArgumentDefinition> argumentDefinitions = argumentSource.createArgumentDefinitions();
|
||||
for(ArgumentDefinition argumentDefinition: argumentDefinitions) {
|
||||
argumentSourcesByDefinition.put(argumentDefinition,argumentSource);
|
||||
argumentsFromSource.add( argumentDefinition );
|
||||
}
|
||||
}
|
||||
argumentDefinitions.add( new ArgumentDefinitionGroup(sourceName, argumentsFromSource) );
|
||||
}
|
||||
|
||||
|
|
@ -199,16 +217,25 @@ public class ParsingEngine {
|
|||
throw new InvalidArgumentException( invalidArguments );
|
||||
}
|
||||
|
||||
// Find invalid argument values (arguments that fail the regexp test.
|
||||
// Find invalid argument values -- invalid arguments are either completely missing or fail the specified 'validation' regular expression.
|
||||
if( !skipValidationOf.contains(ValidationType.InvalidArgumentValue) ) {
|
||||
Collection<ArgumentDefinition> verifiableArguments =
|
||||
argumentDefinitions.findArgumentDefinitions( null, ArgumentDefinitions.VerifiableDefinitionMatcher );
|
||||
Collection<Pair<ArgumentDefinition,String>> invalidValues = new ArrayList<Pair<ArgumentDefinition,String>>();
|
||||
for( ArgumentDefinition verifiableArgument: verifiableArguments ) {
|
||||
ArgumentMatches verifiableMatches = argumentMatches.findMatches( verifiableArgument );
|
||||
// Check to see whether an argument value was specified. Argument values must be provided
|
||||
// when the argument name is specified and the argument is not a flag type.
|
||||
for(ArgumentMatch verifiableMatch: verifiableMatches) {
|
||||
ArgumentSource argumentSource = argumentSourcesByDefinition.get(verifiableArgument);
|
||||
if(verifiableMatch.values().size() == 0 && !verifiableArgument.isFlag && argumentSource.createsTypeDefault())
|
||||
invalidValues.add(new Pair<ArgumentDefinition,String>(verifiableArgument,null));
|
||||
}
|
||||
|
||||
// Ensure that the field contents meet the validation criteria specified by the regular expression.
|
||||
for( ArgumentMatch verifiableMatch: verifiableMatches ) {
|
||||
for( String value: verifiableMatch.values() ) {
|
||||
if( !value.matches(verifiableArgument.validation) )
|
||||
if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) )
|
||||
invalidValues.add( new Pair<ArgumentDefinition,String>(verifiableArgument, value) );
|
||||
}
|
||||
}
|
||||
|
|
@ -304,7 +331,17 @@ public class ParsingEngine {
|
|||
if(!tags.containsKey(key))
|
||||
return new Tags();
|
||||
return tags.get(key);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a RodBinding type argument to this parser. Called during parsing to allow
|
||||
* us to track all of the RodBindings discovered in the command line.
|
||||
* @param rodBinding the rodbinding to add. Must not be added twice
|
||||
*/
|
||||
@Requires("rodBinding != null")
|
||||
public void addRodBinding(final RodBinding rodBinding) {
|
||||
rodBindings.add(rodBinding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify the user that a deprecated command-line argument has been used.
|
||||
|
|
@ -344,6 +381,10 @@ public class ParsingEngine {
|
|||
}
|
||||
}
|
||||
|
||||
public Collection<RodBinding> getRodBindings() {
|
||||
return Collections.unmodifiableCollection(rodBindings);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a collection of the container instances of the given type stored within the given target.
|
||||
* @param source Argument source.
|
||||
|
|
@ -390,7 +431,6 @@ public class ParsingEngine {
|
|||
return ArgumentTypeDescriptor.selectBest(argumentTypeDescriptors,type);
|
||||
}
|
||||
|
||||
|
||||
private List<ArgumentSource> extractArgumentSources(Class sourceClass, Field[] parentFields) {
|
||||
// now simply call into the truly general routine extract argument bindings but with a null
|
||||
// object so bindings aren't computed
|
||||
|
|
@ -515,10 +555,14 @@ class InvalidArgumentValueException extends ArgumentException {
|
|||
private static String formatArguments( Collection<Pair<ArgumentDefinition,String>> invalidArgumentValues ) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for( Pair<ArgumentDefinition,String> invalidValue: invalidArgumentValues ) {
|
||||
sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
|
||||
invalidValue.first.fullName,
|
||||
invalidValue.second,
|
||||
invalidValue.first.validation) );
|
||||
if(invalidValue.getSecond() == null)
|
||||
sb.append( String.format("%nArgument '--%s' requires a value but none was provided",
|
||||
invalidValue.first.fullName) );
|
||||
else
|
||||
sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
|
||||
invalidValue.first.fullName,
|
||||
invalidValue.second,
|
||||
invalidValue.first.validation) );
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,187 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broad.tribble.Feature;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A RodBinding representing a walker argument that gets bound to a ROD track.
|
||||
*
|
||||
* The RodBinding<T> is a formal GATK argument that bridges between a walker and
|
||||
* the RefMetaDataTracker to obtain data about this rod track at runtime. The RodBinding
|
||||
* is explicitly typed with type of the Tribble.Feature expected to be produced by this
|
||||
* argument. The GATK Engine takes care of initializing the binding and connecting it
|
||||
* to the RMD system.
|
||||
*
|
||||
* It is recommended that optional RodBindings be initialized to the value returned
|
||||
* by the static method makeUnbound().
|
||||
*
|
||||
* Note that this class is immutable.
|
||||
*/
|
||||
public final class RodBinding<T extends Feature> {
|
||||
protected final static String UNBOUND_VARIABLE_NAME = "";
|
||||
protected final static String UNBOUND_SOURCE = "UNBOUND";
|
||||
protected final static String UNBOUND_TRIBBLE_TYPE = "";
|
||||
|
||||
/**
|
||||
* Create an unbound Rodbinding of type. This is the correct programming
|
||||
* style for an optional RodBinding<T>
|
||||
*
|
||||
* At Input()
|
||||
* RodBinding<T> x = RodBinding.makeUnbound(T.class)
|
||||
*
|
||||
* The unbound binding is guaranteed to never match any binding. It uniquely
|
||||
* returns false to isBound().
|
||||
*
|
||||
* @param type the Class type produced by this unbound object
|
||||
* @param <T> any class extending Tribble Feature
|
||||
* @return the UNBOUND RodBinding producing objects of type T
|
||||
*/
|
||||
@Requires("type != null")
|
||||
public final static <T extends Feature> RodBinding<T> makeUnbound(Class<T> type) {
|
||||
return new RodBinding<T>(type);
|
||||
}
|
||||
|
||||
/** The name of this binding. Often the name of the field itself, but can be overridden on cmdline */
|
||||
final private String name;
|
||||
/** where the data for this ROD is coming from. A file or special value if coming from stdin */
|
||||
final private String source;
|
||||
/** the string name of the tribble type, such as vcf, bed, etc. */
|
||||
final private String tribbleType;
|
||||
/** The command line tags associated with this RodBinding */
|
||||
final private Tags tags;
|
||||
/** The Java class expected for this RodBinding. Must correspond to the type emited by Tribble */
|
||||
final private Class<T> type;
|
||||
/** True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments */
|
||||
final private boolean bound;
|
||||
|
||||
/**
|
||||
* The name counter. This is how we create unique names for collections of RodBindings
|
||||
* on the command line. If you have provide the GATK with -X file1 and -X file2 to a
|
||||
* RodBinding argument as List<RodBinding<T>> then each binding will receive automatically
|
||||
* the name of X and X2.
|
||||
*/
|
||||
final private static Map<String, Integer> nameCounter = new HashMap<String, Integer>();
|
||||
|
||||
/** for UnitTests */
|
||||
final public static void resetNameCounter() {
|
||||
nameCounter.clear();
|
||||
}
|
||||
|
||||
@Requires("rawName != null")
|
||||
@Ensures("result != null")
|
||||
final private static synchronized String countedVariableName(final String rawName) {
|
||||
Integer count = nameCounter.get(rawName);
|
||||
if ( count == null ) {
|
||||
nameCounter.put(rawName, 1);
|
||||
return rawName;
|
||||
} else {
|
||||
nameCounter.put(rawName, count + 1);
|
||||
return rawName + (count + 1);
|
||||
}
|
||||
}
|
||||
|
||||
@Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
|
||||
public RodBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
|
||||
this.type = type;
|
||||
this.name = countedVariableName(rawName);
|
||||
this.source = source;
|
||||
this.tribbleType = tribbleType;
|
||||
this.tags = tags;
|
||||
this.bound = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make an unbound RodBinding<T>. Only available for creating the globally unique UNBOUND object
|
||||
* @param type class this unbound RodBinding creates
|
||||
*/
|
||||
@Requires({"type != null"})
|
||||
private RodBinding(Class<T> type) {
|
||||
this.type = type;
|
||||
this.name = UNBOUND_VARIABLE_NAME; // special value can never be found in RefMetaDataTracker
|
||||
this.source = UNBOUND_SOURCE;
|
||||
this.tribbleType = UNBOUND_TRIBBLE_TYPE;
|
||||
this.tags = new Tags();
|
||||
this.bound = false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments
|
||||
*/
|
||||
final public boolean isBound() {
|
||||
return bound;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The name of this binding. Often the name of the field itself, but can be overridden on cmdline
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
final public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the string name of the tribble type, such as vcf, bed, etc.
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
final public Class<T> getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return where the data for this ROD is coming from. A file or special value if coming from stdin
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
final public String getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The command line tags associated with this RodBinding. Will include the tags used to
|
||||
* determine the name and type of this RodBinding
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
final public Tags getTags() {
|
||||
return tags;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The Java class expected for this RodBinding. Must correspond to the type emited by Tribble
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
final public String getTribbleType() {
|
||||
return tribbleType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("(RodBinding name=%s source=%s)", getName(), getSource());
|
||||
}
|
||||
}
|
||||
|
|
@ -25,21 +25,20 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
|
|
@ -64,6 +63,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
*/
|
||||
private final Collection<Object> argumentSources = new ArrayList<Object>();
|
||||
|
||||
protected static Logger logger = Logger.getLogger(CommandLineExecutable.class);
|
||||
|
||||
/**
|
||||
* this is the function that the inheriting class can expect to have called
|
||||
* when the command line system has initialized.
|
||||
|
|
@ -81,7 +82,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
|
||||
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
|
||||
engine.setSAMFileIDs(ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles,parser));
|
||||
engine.setReferenceMetaDataFiles(ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings,getArgumentCollection().DBSNPFile,parser));
|
||||
|
||||
engine.setWalker(walker);
|
||||
walker.setToolkit(engine);
|
||||
|
|
@ -96,6 +96,25 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
loadArgumentsIntoObject(walker);
|
||||
argumentSources.add(walker);
|
||||
|
||||
Collection<RMDTriplet> newStyle = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser);
|
||||
|
||||
// todo: remove me when the old style system is removed
|
||||
if ( getArgumentCollection().RODBindings.size() > 0 ) {
|
||||
logger.warn("################################################################################");
|
||||
logger.warn("################################################################################");
|
||||
logger.warn("Deprecated -B rod binding syntax detected. This syntax will be retired in GATK 1.2.");
|
||||
logger.warn("Please use arguments defined by each specific walker instead.");
|
||||
for ( String oldStyleRodBinding : getArgumentCollection().RODBindings ) {
|
||||
logger.warn(" -B rod binding with value " + oldStyleRodBinding + " tags: " + parser.getTags(oldStyleRodBinding).getPositionalTags());
|
||||
}
|
||||
logger.warn("################################################################################");
|
||||
logger.warn("################################################################################");
|
||||
}
|
||||
|
||||
Collection<RMDTriplet> oldStyle = ListFileUtils.unpackRODBindingsOldStyle(getArgumentCollection().RODBindings, parser);
|
||||
oldStyle.addAll(newStyle);
|
||||
engine.setReferenceMetaDataFiles(oldStyle);
|
||||
|
||||
for (ReadFilter filter: filters) {
|
||||
loadArgumentsIntoObject(filter);
|
||||
argumentSources.add(filter);
|
||||
|
|
@ -112,6 +131,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.
|
||||
* This report will be written to either STDOUT or to the run repository, depending on the options
|
||||
|
|
@ -142,7 +162,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
*/
|
||||
protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() {
|
||||
return Arrays.asList( new VCFWriterArgumentTypeDescriptor(engine,System.out,argumentSources),
|
||||
new SAMFileReaderArgumentTypeDescriptor(engine),
|
||||
new SAMFileWriterArgumentTypeDescriptor(engine,System.out),
|
||||
new OutputStreamArgumentTypeDescriptor(engine,System.out) );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,6 +36,8 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
||||
import org.broadinstitute.sting.utils.help.GATKDoclet;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -175,12 +177,8 @@ public class CommandLineGATK extends CommandLineExecutable {
|
|||
StringBuilder additionalHelp = new StringBuilder();
|
||||
Formatter formatter = new Formatter(additionalHelp);
|
||||
|
||||
formatter.format("Description:%n");
|
||||
|
||||
WalkerManager walkerManager = engine.getWalkerManager();
|
||||
String walkerHelpText = walkerManager.getWalkerDescriptionText(walkerType);
|
||||
|
||||
printDescriptorLine(formatter,WALKER_INDENT,"",WALKER_INDENT,FIELD_SEPARATOR,walkerHelpText,TextFormattingUtils.DEFAULT_LINE_WIDTH);
|
||||
formatter.format("For a full description of this walker, see its GATKdocs at:%n");
|
||||
formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType));
|
||||
|
||||
return additionalHelp.toString();
|
||||
}
|
||||
|
|
@ -194,8 +192,6 @@ public class CommandLineGATK extends CommandLineExecutable {
|
|||
StringBuilder additionalHelp = new StringBuilder();
|
||||
Formatter formatter = new Formatter(additionalHelp);
|
||||
|
||||
formatter.format("Available analyses:%n");
|
||||
|
||||
// Get the list of walker names from the walker manager.
|
||||
WalkerManager walkerManager = engine.getWalkerManager();
|
||||
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
|
|||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
@ -370,33 +370,6 @@ public class GenomeAnalysisEngine {
|
|||
throw new ArgumentException("Walker does not allow a reference but one was provided.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that all required reference-ordered data has been supplied, and any reference-ordered data that was not
|
||||
* 'allowed' is still present.
|
||||
*
|
||||
* @param rods Reference-ordered data to load.
|
||||
*/
|
||||
protected void validateSuppliedReferenceOrderedData(List<ReferenceOrderedDataSource> rods) {
|
||||
// Check to make sure that all required metadata is present.
|
||||
List<RMD> allRequired = WalkerManager.getRequiredMetaData(walker);
|
||||
for (RMD required : allRequired) {
|
||||
boolean found = false;
|
||||
for (ReferenceOrderedDataSource rod : rods) {
|
||||
if (rod.matchesNameAndRecordType(required.name(), required.type()))
|
||||
found = true;
|
||||
}
|
||||
if (!found)
|
||||
throw new ArgumentException(String.format("Walker requires reference metadata to be supplied named '%s' of type '%s', but this metadata was not provided. " +
|
||||
"Please supply the specified metadata file.", required.name(), required.type().getSimpleName()));
|
||||
}
|
||||
|
||||
// Check to see that no forbidden rods are present.
|
||||
for (ReferenceOrderedDataSource rod : rods) {
|
||||
if (!WalkerManager.isAllowed(walker, rod))
|
||||
throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s", walker.getClass(), rod.getName()));
|
||||
}
|
||||
}
|
||||
|
||||
protected void validateSuppliedIntervals() {
|
||||
// Only read walkers support '-L unmapped' intervals. Trap and validate any other instances of -L unmapped.
|
||||
if(!(walker instanceof ReadWalker)) {
|
||||
|
|
@ -926,9 +899,6 @@ public class GenomeAnalysisEngine {
|
|||
GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType);
|
||||
// try and make the tracks given their requests
|
||||
// create of live instances of the tracks
|
||||
List<RMDTrack> tracks = new ArrayList<RMDTrack>();
|
||||
|
||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
|
||||
|
|
@ -939,7 +909,6 @@ public class GenomeAnalysisEngine {
|
|||
flashbackData()));
|
||||
|
||||
// validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match.
|
||||
validateSuppliedReferenceOrderedData(dataSources);
|
||||
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder);
|
||||
|
||||
return dataSources;
|
||||
|
|
|
|||
|
|
@ -177,19 +177,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @return The list of allowed reference meta data.
|
||||
*/
|
||||
public static List<RMD> getAllowsMetaData(Class<? extends Walker> walkerClass) {
|
||||
Allows allowsDataSource = getWalkerAllowed(walkerClass);
|
||||
if (allowsDataSource == null)
|
||||
return Collections.<RMD>emptyList();
|
||||
return Arrays.asList(allowsDataSource.referenceMetaData());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a list of RODs allowed by the walker.
|
||||
* @param walker Walker to query.
|
||||
* @return The list of allowed reference meta data.
|
||||
*/
|
||||
public static List<RMD> getAllowsMetaData(Walker walker) {
|
||||
return getAllowsMetaData(walker.getClass());
|
||||
return Collections.<RMD>emptyList();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -226,24 +214,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @return True if the walker forbids this data type. False otherwise.
|
||||
*/
|
||||
public static boolean isAllowed(Class<? extends Walker> walkerClass, ReferenceOrderedDataSource rod) {
|
||||
Allows allowsDataSource = getWalkerAllowed(walkerClass);
|
||||
|
||||
// Allows is less restrictive than requires. If an allows
|
||||
// clause is not specified, any kind of data is allowed.
|
||||
if( allowsDataSource == null )
|
||||
return true;
|
||||
|
||||
// The difference between unspecified RMD and the empty set of metadata can't be detected.
|
||||
// Treat an empty 'allows' as 'allow everything'. Maybe we can have a special RMD flag to account for this
|
||||
// case in the future.
|
||||
if( allowsDataSource.referenceMetaData().length == 0 )
|
||||
return true;
|
||||
|
||||
for( RMD allowed: allowsDataSource.referenceMetaData() ) {
|
||||
if( rod.matchesNameAndRecordType(allowed.name(),allowed.type()) )
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -283,8 +254,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @return The list of required reference meta data.
|
||||
*/
|
||||
public static List<RMD> getRequiredMetaData(Class<? extends Walker> walkerClass) {
|
||||
Requires requiresDataSource = getWalkerRequirements(walkerClass);
|
||||
return Arrays.asList(requiresDataSource.referenceMetaData());
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -23,8 +23,26 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reference;
|
||||
package org.broadinstitute.sting.gatk.arguments;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.simpleframework.xml.*;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* @version 1.0
|
||||
*/
|
||||
@Root
|
||||
public class DbsnpArgumentCollection {
|
||||
|
||||
/**
|
||||
* A dbSNP VCF file.
|
||||
*/
|
||||
@Input(fullName="dbsnp", shortName = "D", doc="dbSNP file", required=false)
|
||||
public RodBinding<VariantContext> dbsnp = RodBinding.makeUnbound(VariantContext.class);
|
||||
|
||||
public interface ReferenceDataSourceProgressListener {
|
||||
public void percentProgress(int percent);
|
||||
}
|
||||
|
||||
|
|
@ -117,11 +117,6 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
|
||||
public boolean nonDeterministicRandomSeed = false;
|
||||
|
||||
|
||||
@Element(required = false)
|
||||
@Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
|
||||
public String DBSNPFile = null;
|
||||
|
||||
/**
|
||||
* The override mechanism in the GATK, by default, populates the command-line arguments, then
|
||||
* the defaults from the walker annotations. Unfortunately, walker annotations should be trumped
|
||||
|
|
@ -380,9 +375,6 @@ public class GATKArgumentCollection {
|
|||
if (!other.excludeIntervals.equals(this.excludeIntervals)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.DBSNPFile.equals(this.DBSNPFile)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.unsafe.equals(this.unsafe)) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.arguments;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.simpleframework.xml.Root;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* @version 1.0
|
||||
*/
|
||||
@Root
|
||||
public class StandardVariantContextInputArgumentCollection {
|
||||
|
||||
/**
|
||||
* The VCF file we are using.
|
||||
*
|
||||
* Variants from this file are used by this tool as input.
|
||||
*/
|
||||
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
|
||||
public RodBinding<VariantContext> variants;
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -1,8 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -49,11 +51,14 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
|
|||
* @param loc Locus at which to track.
|
||||
* @return A tracker containing information about this locus.
|
||||
*/
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
|
||||
RefMetaDataTracker tracks = new RefMetaDataTracker(states.size());
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
||||
List<RODRecordList> bindings = states.isEmpty() ? Collections.<RODRecordList>emptyList() : new ArrayList<RODRecordList>(states.size());
|
||||
|
||||
for ( ReferenceOrderedDataState state: states )
|
||||
tracks.bind( state.dataSource.getName(), state.iterator.seekForward(loc) );
|
||||
return tracks;
|
||||
// todo -- warning, I removed the reference to the name from states
|
||||
bindings.add( state.iterator.seekForward(loc) );
|
||||
|
||||
return new RefMetaDataTracker(bindings, referenceContext);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
public interface ReferenceOrderedView extends View {
|
||||
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc );
|
||||
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext refContext );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
|
|
@ -45,7 +46,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
*/
|
||||
private RODMergingIterator rodQueue = null;
|
||||
|
||||
RefMetaDataTracker tracker = null;
|
||||
Collection<RODRecordList> allTracksHere;
|
||||
|
||||
GenomeLoc lastLoc = null;
|
||||
RODRecordList interval = null;
|
||||
|
||||
|
|
@ -94,12 +96,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
}
|
||||
|
||||
rodQueue = new RODMergingIterator(iterators);
|
||||
|
||||
//throw new StingException("RodLocusView currently disabled");
|
||||
}
|
||||
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
|
||||
return tracker;
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
||||
// special case the interval again -- add it into the ROD
|
||||
if ( interval != null ) { allTracksHere.add(interval); }
|
||||
return new RefMetaDataTracker(allTracksHere, referenceContext);
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
|
|
@ -122,10 +124,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
|
||||
if ( DEBUG ) System.out.printf("In RodLocusView.next(): creating tracker...%n");
|
||||
|
||||
// Update the tracker here for use
|
||||
Collection<RODRecordList> allTracksHere = getSpanningTracks(datum);
|
||||
tracker = createTracker(allTracksHere);
|
||||
|
||||
allTracksHere = getSpanningTracks(datum);
|
||||
GenomeLoc rodSite = datum.getLocation();
|
||||
GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart());
|
||||
|
||||
|
|
@ -137,19 +136,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases);
|
||||
}
|
||||
|
||||
private RefMetaDataTracker createTracker( Collection<RODRecordList> allTracksHere ) {
|
||||
RefMetaDataTracker t = new RefMetaDataTracker(allTracksHere.size());
|
||||
for ( RODRecordList track : allTracksHere ) {
|
||||
if ( ! t.hasROD(track.getName()) )
|
||||
t.bind(track.getName(), track);
|
||||
}
|
||||
|
||||
// special case the interval again -- add it into the ROD
|
||||
if ( interval != null ) { t.bind(interval.getName(), interval); }
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
private Collection<RODRecordList> getSpanningTracks(RODRecordList marker) {
|
||||
return rodQueue.allElementsLTE(marker);
|
||||
}
|
||||
|
|
@ -197,10 +183,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
return getSkippedBases(getLocOneBeyondShard());
|
||||
}
|
||||
|
||||
public RefMetaDataTracker getTracker() {
|
||||
return tracker;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the current view.
|
||||
*/
|
||||
|
|
@ -209,6 +191,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
state.dataSource.close( state.iterator );
|
||||
|
||||
rodQueue = null;
|
||||
tracker = null;
|
||||
allTracksHere = null;
|
||||
}
|
||||
}
|
||||
|
|
@ -893,6 +893,7 @@ public class SAMDataSource {
|
|||
* Custom representation of interval bounds.
|
||||
* Makes it simpler to track current position.
|
||||
*/
|
||||
private int[] intervalContigIndices;
|
||||
private int[] intervalStarts;
|
||||
private int[] intervalEnds;
|
||||
|
||||
|
|
@ -917,12 +918,14 @@ public class SAMDataSource {
|
|||
if(foundMappedIntervals) {
|
||||
if(keepOnlyUnmappedReads)
|
||||
throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals. Please apply this filter to only mapped or only unmapped reads");
|
||||
this.intervalContigIndices = new int[intervals.size()];
|
||||
this.intervalStarts = new int[intervals.size()];
|
||||
this.intervalEnds = new int[intervals.size()];
|
||||
int i = 0;
|
||||
for(GenomeLoc interval: intervals) {
|
||||
intervalStarts[i] = (int)interval.getStart();
|
||||
intervalEnds[i] = (int)interval.getStop();
|
||||
intervalContigIndices[i] = interval.getContigIndex();
|
||||
intervalStarts[i] = interval.getStart();
|
||||
intervalEnds[i] = interval.getStop();
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
|
@ -961,11 +964,10 @@ public class SAMDataSource {
|
|||
while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) {
|
||||
if(!keepOnlyUnmappedReads) {
|
||||
// Mapped read filter; check against GenomeLoc-derived bounds.
|
||||
if(candidateRead.getAlignmentEnd() >= intervalStarts[currentBound] ||
|
||||
(candidateRead.getReadUnmappedFlag() && candidateRead.getAlignmentStart() >= intervalStarts[currentBound])) {
|
||||
// This read ends after the current interval begins (or, if unmapped, starts within the bounds of the interval.
|
||||
if(readEndsOnOrAfterStartingBound(candidateRead)) {
|
||||
// This read ends after the current interval begins.
|
||||
// Promising, but this read must be checked against the ending bound.
|
||||
if(candidateRead.getAlignmentStart() <= intervalEnds[currentBound]) {
|
||||
if(readStartsOnOrBeforeEndingBound(candidateRead)) {
|
||||
// Yes, this read is within both bounds. This must be our next read.
|
||||
nextRead = candidateRead;
|
||||
break;
|
||||
|
|
@ -993,6 +995,37 @@ public class SAMDataSource {
|
|||
candidateRead = iterator.next();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the read lies after the start of the current bound. If the read is unmapped but placed, its
|
||||
* end will be distorted, so rely only on the alignment start.
|
||||
* @param read The read to position-check.
|
||||
* @return True if the read starts after the current bounds. False otherwise.
|
||||
*/
|
||||
private boolean readEndsOnOrAfterStartingBound(final SAMRecord read) {
|
||||
return
|
||||
// Read ends on a later contig, or...
|
||||
read.getReferenceIndex() > intervalContigIndices[currentBound] ||
|
||||
// Read ends of this contig...
|
||||
(read.getReferenceIndex() == intervalContigIndices[currentBound] &&
|
||||
// either after this location, or...
|
||||
(read.getAlignmentEnd() >= intervalStarts[currentBound] ||
|
||||
// read is unmapped but positioned and alignment start is on or after this start point.
|
||||
(read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound])));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the read lies before the end of the current bound.
|
||||
* @param read The read to position-check.
|
||||
* @return True if the read starts after the current bounds. False otherwise.
|
||||
*/
|
||||
private boolean readStartsOnOrBeforeEndingBound(final SAMRecord read) {
|
||||
return
|
||||
// Read starts on a prior contig, or...
|
||||
read.getReferenceIndex() < intervalContigIndices[currentBound] ||
|
||||
// Read starts on this contig and the alignment start is registered before this end point.
|
||||
(read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ import java.io.File;
|
|||
* Loads reference data from fasta file
|
||||
* Looks for fai and dict files, and tries to create them if they don't exist
|
||||
*/
|
||||
public class ReferenceDataSource implements ReferenceDataSourceProgressListener {
|
||||
public class ReferenceDataSource {
|
||||
private IndexedFastaSequenceFile index;
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
|
|
@ -75,7 +75,7 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener
|
|||
// get exclusive lock
|
||||
if (!indexLock.exclusiveLock())
|
||||
throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile);
|
||||
FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, this);
|
||||
FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, true);
|
||||
FastaSequenceIndex sequenceIndex = faiBuilder.createIndex();
|
||||
FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile);
|
||||
}
|
||||
|
|
@ -194,13 +194,4 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener
|
|||
public IndexedFastaSequenceFile getReference() {
|
||||
return this.index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify user of progress in creating fai file
|
||||
* @param percent Percent of fasta file read as a percent
|
||||
*/
|
||||
public void percentProgress(int percent) {
|
||||
System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percent));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.rmd;
|
|||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Tags;
|
|||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -110,11 +110,11 @@ public class ReferenceOrderedDataSource {
|
|||
}
|
||||
|
||||
public Class getType() {
|
||||
return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
|
||||
return builder.getFeatureManager().getByTriplet(fileDescriptor).getCodecClass();
|
||||
}
|
||||
|
||||
public Class getRecordType() {
|
||||
return builder.createCodec(getType(),getName()).getFeatureType();
|
||||
return builder.getFeatureManager().getByTriplet(fileDescriptor).getFeatureClass();
|
||||
}
|
||||
|
||||
public File getFile() {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
/**
|
||||
* A read filter (transformer) that sets all reads mapping quality to a given value.
|
||||
*
|
||||
* <p>
|
||||
* If a BAM file contains erroneous or missing mapping qualities, this 'filter' will set
|
||||
* all your mapping qualities to a given value. Default being 60.
|
||||
* </p>
|
||||
*
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* BAM file(s)
|
||||
* </p>
|
||||
*
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* BAM file(s) with all reads mapping qualities reassigned
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -rf ReassignMappingQuality
|
||||
* -DMQ 35
|
||||
* </pre>
|
||||
*
|
||||
* @author carneiro
|
||||
* @since 8/8/11
|
||||
*/
|
||||
|
||||
public class ReassignMappingQualityFilter extends ReadFilter {
|
||||
|
||||
@Argument(fullName = "default_mapping_quality", shortName = "DMQ", doc = "Default read mapping quality to assign to all reads", required = false)
|
||||
public int defaultMappingQuality = 60;
|
||||
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
rec.setMappingQuality(defaultMappingQuality);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -87,8 +87,8 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
writer.writeHeader(stub.getVCFHeader());
|
||||
}
|
||||
|
||||
public void add(VariantContext vc, byte ref) {
|
||||
writer.add(vc, ref);
|
||||
public void add(VariantContext vc) {
|
||||
writer.add(vc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -117,7 +117,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
BasicFeatureSource<VariantContext> source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false);
|
||||
|
||||
for ( VariantContext vc : source.iterator() ) {
|
||||
target.writer.add(vc, vc.getReferenceBaseForIndel());
|
||||
target.writer.add(vc);
|
||||
}
|
||||
|
||||
source.close();
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.Type;
|
||||
|
||||
/**
|
||||
* Insert an OutputStreamStub instead of a full-fledged concrete OutputStream implementations.
|
||||
|
|
@ -78,7 +79,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||
ArgumentDefinition definition = createDefaultArgumentDefinition(source);
|
||||
String fileName = getArgumentValue( definition, matches );
|
||||
|
||||
|
|
@ -91,7 +92,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
engine.addOutput(stub);
|
||||
|
||||
Object result = createInstanceOfClass(type,stub);
|
||||
Object result = createInstanceOfClass(makeRawTypeIfNecessary(type),stub);
|
||||
// WARNING: Side effects required by engine!
|
||||
parsingEngine.addTags(result,getArgumentTags(matches));
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.reflect.Type;
|
||||
|
||||
/**
|
||||
* Describe how to parse SAMFileReaders.
|
||||
|
|
@ -59,7 +60,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||
SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
|
||||
|
||||
String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches );
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.Type;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -102,7 +103,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||
// Extract all possible parameters that could be passed to a BAM file writer?
|
||||
ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source);
|
||||
String writerFileName = getArgumentValue( bamArgumentDefinition, matches );
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.lang.reflect.Type;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
|
|
@ -124,7 +125,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
* @return Transform from the matches into the associated argument.
|
||||
*/
|
||||
@Override
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||
ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source);
|
||||
// Get the filename for the genotype file, if it exists. If not, we'll need to send output to out.
|
||||
String writerFileName = getArgumentValue(defaultArgumentDefinition,matches);
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.io.stubs;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
|
|
@ -177,14 +178,23 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
vcfHeader = header;
|
||||
|
||||
// Check for the command-line argument header line. If not present, add it in.
|
||||
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
|
||||
boolean foundCommandLineHeaderLine = false;
|
||||
for(VCFHeaderLine line: vcfHeader.getMetaData()) {
|
||||
if(line.getKey().equals(commandLineArgHeaderLine.getKey()))
|
||||
foundCommandLineHeaderLine = true;
|
||||
if ( !skipWritingHeader ) {
|
||||
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
|
||||
boolean foundCommandLineHeaderLine = false;
|
||||
for (VCFHeaderLine line: vcfHeader.getMetaData()) {
|
||||
if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) )
|
||||
foundCommandLineHeaderLine = true;
|
||||
}
|
||||
if ( !foundCommandLineHeaderLine )
|
||||
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
|
||||
|
||||
// also put in the reference contig header lines
|
||||
String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName());
|
||||
for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() )
|
||||
vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly));
|
||||
|
||||
vcfHeader.addMetaDataLine(new VCFHeaderLine("reference", "file://" + engine.getArguments().referenceFile.getAbsolutePath()));
|
||||
}
|
||||
if(!foundCommandLineHeaderLine && !skipWritingHeader)
|
||||
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
|
||||
|
||||
outputTracker.getStorage(this).writeHeader(vcfHeader);
|
||||
}
|
||||
|
|
@ -192,8 +202,8 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
/**
|
||||
* @{inheritDoc}
|
||||
*/
|
||||
public void add(VariantContext vc, byte ref) {
|
||||
outputTracker.getStorage(this).add(vc,ref);
|
||||
public void add(VariantContext vc) {
|
||||
outputTracker.getStorage(this).add(vc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -220,4 +230,27 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class);
|
||||
return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\"");
|
||||
}
|
||||
|
||||
private VCFHeaderLine getContigHeaderLine(SAMSequenceRecord contig, String assembly) {
|
||||
String val;
|
||||
if ( assembly != null )
|
||||
val = String.format("<ID=%s,length=%d,assembly=%s>", contig.getSequenceName(), contig.getSequenceLength(), assembly);
|
||||
else
|
||||
val = String.format("<ID=%s,length=%d>", contig.getSequenceName(), contig.getSequenceLength());
|
||||
return new VCFHeaderLine("contig", val);
|
||||
}
|
||||
|
||||
private String getReferenceAssembly(String refPath) {
|
||||
// This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
|
||||
String assembly = null;
|
||||
if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 )
|
||||
assembly = "b37";
|
||||
else if ( refPath.indexOf("b36") != -1 )
|
||||
assembly = "b36";
|
||||
else if ( refPath.indexOf("hg18") != -1 )
|
||||
assembly = "hg18";
|
||||
else if ( refPath.indexOf("hg19") != -1 )
|
||||
assembly = "hg19";
|
||||
return assembly;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,238 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.Iterator;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* This is a low-level iterator designed to provide system-wide generic support for reading record-oriented data
|
||||
* files. The only assumption made is that every line in the file provides a complete and separate data record. The records
|
||||
* can be associated with coordinates or coordinate intervals, there can be one or more records associated with a given
|
||||
* position/interval, or intervals can overlap. The records must be comprised of delimited fields, but the format is
|
||||
* otherwise free. For any specific line-based data format, an appropriate implementation of ReferenceOrderedDatum must be
|
||||
* provided that is capable of parsing itself from a single line of data. This implementation will be used,
|
||||
* through reflection mechanism, as a callback to do all the work.
|
||||
*
|
||||
* The model is, hence, as follows:
|
||||
*
|
||||
* String dataRecord <---> RodImplementation ( ::parseLine(dataRecord.split(delimiter)) is aware of the format and fills
|
||||
* an instance of RodImplementation with data values from dataRecord line).
|
||||
*
|
||||
*
|
||||
* instantiation of RODRecordIterator(dataFile, trackName, RodImplementation.class) will immediately provide an iterator
|
||||
* that walks along the dataFile line by line, and on each call to next() returns a new RodImplementation object
|
||||
* representing a single line (record) of data. The returned object will be initialized with "track name" trackName -
|
||||
* track names (as returned by ROD.getName()) are often used in other parts of the code to distinguish between
|
||||
* multiple streams of (possibly heterogeneous) annotation data bound to an application.
|
||||
*
|
||||
* This generic iterator skips and ignores a) empty lines, b) lines starting with '#' (comments): they are never sent back
|
||||
* to the ROD implementation class for processing.
|
||||
*
|
||||
* This iterator does not actually check if the ROD records (lines) in the file are indeed ordedered by coordinate,
|
||||
* and it does not depend on such an order as it still implements a low-level line-based traversal of the data. Higher-level
|
||||
* iterators/wrappers will perform all the necessary checks.
|
||||
*
|
||||
* Note: some data formats/ROD implementations may require a header line in the file. In this case the current (ugly)
|
||||
* mechanism is as follows:
|
||||
* 1) rod implementation's ::initialize(file) method should be able to open the file, find and read the header line
|
||||
* and return the header object (to be kept by the iterator)
|
||||
* 2) rod implementation's ::parseLine(header,line) method should be capable of making use of that saved header object now served to it
|
||||
* and
|
||||
* 3) ::parseLine(header,line) should be able to recognize the original header line in the file and skip it (after ROD's initialize()
|
||||
* method is called, the iterator will re-open the file and start reading it from the very beginning; there is no
|
||||
* other way, except for "smart" ::parseLine(), to avoid reading in the header line as "data").
|
||||
*
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Sep 10, 2009
|
||||
* Time: 1:22:23 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class RODRecordIterator<ROD extends ReferenceOrderedDatum> implements Iterator<ROD> {
|
||||
|
||||
private PushbackIterator<String> reader;
|
||||
|
||||
// stores name of the track this iterator reads (will be also returned by getName() of ROD objects
|
||||
// generated by this iterator)
|
||||
private String name;
|
||||
|
||||
// we keep the file object, only to use file name in error reports
|
||||
private File file;
|
||||
|
||||
// rod type; this is what we will instantiate for RODs at runtime
|
||||
private Class<ROD> type;
|
||||
|
||||
private Object header = null; // Some RODs may use header
|
||||
|
||||
// field delimiter in the file. Should it be the job of the iterator to split the lines though? RODs can do that!
|
||||
private String fieldDelimiter;
|
||||
|
||||
// constructor for the ROD objects we are going to return. Constructor that takes the track name as its single arg is required.
|
||||
private Constructor<ROD> named_constructor;
|
||||
|
||||
// keep track of the lines we are reading. used for error messages only.
|
||||
private long linenum = 0;
|
||||
|
||||
private boolean allow_empty = true;
|
||||
private boolean allow_comments = true;
|
||||
public static Pattern EMPTYLINE_PATTERN = Pattern.compile("^\\s*$");
|
||||
|
||||
public RODRecordIterator(File file, String name, Class<ROD> type) {
|
||||
try {
|
||||
reader = new PushbackIterator<String>(new XReadLines(file));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotReadInputFile(file, e);
|
||||
}
|
||||
this.file = file;
|
||||
this.name = name;
|
||||
this.type = type;
|
||||
try {
|
||||
named_constructor = type.getConstructor(String.class);
|
||||
}
|
||||
catch (java.lang.NoSuchMethodException e) {
|
||||
throw new ReviewedStingException("ROD class "+type.getName()+" does not have constructor that accepts a single String argument (track name)");
|
||||
}
|
||||
ROD rod = instantiateROD(name);
|
||||
fieldDelimiter = rod.delimiterRegex(); // get delimiter from the ROD itself
|
||||
try {
|
||||
header = rod.initialize(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "ROD "+type.getName() + " failed to initialize properly from file "+file);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns <tt>true</tt> if the iteration has more elements. (In other
|
||||
* words, returns <tt>true</tt> if <tt>next</tt> would return an element
|
||||
* rather than throwing an exception.)
|
||||
*
|
||||
* @return <tt>true</tt> if the iterator has more elements.
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
if ( allow_empty || allow_comments ) {
|
||||
while ( reader.hasNext() ) {
|
||||
String line = reader.next();
|
||||
if ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ) continue; // skip empty line
|
||||
if ( allow_comments && line.charAt(0) == '#' ) continue; // skip comment lines
|
||||
// the line is not empty and not a comment line, so we have next after all
|
||||
reader.pushback(line);
|
||||
return true;
|
||||
}
|
||||
return false; // oops, we end up here if there's nothing left
|
||||
} else {
|
||||
return reader.hasNext();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next valid ROD record in the file, skipping empty and comment lines.
|
||||
*
|
||||
* @return the next element in the iteration.
|
||||
* @throws java.util.NoSuchElementException
|
||||
* iteration has no more elements.
|
||||
*/
|
||||
public ROD next() {
|
||||
ROD n = null;
|
||||
boolean parsed_ok = false;
|
||||
String line ;
|
||||
|
||||
while ( ! parsed_ok && reader.hasNext() ) {
|
||||
line = reader.next();
|
||||
linenum++;
|
||||
while ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ||
|
||||
allow_comments && line.charAt(0) == '#' ) {
|
||||
if ( reader.hasNext() ) {
|
||||
line = reader.next();
|
||||
linenum++;
|
||||
} else {
|
||||
line = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( line == null ) break; // if we ran out of lines while skipping empty lines/comments, then we are done
|
||||
|
||||
String parts[] = line.split(fieldDelimiter);
|
||||
|
||||
try {
|
||||
n = instantiateROD(name);
|
||||
parsed_ok = n.parseLine(header,parts) ;
|
||||
}
|
||||
catch ( Exception e ) {
|
||||
throw new UserException.MalformedFile(file, "Failed to parse ROD data ("+type.getName()+") from file "+ file + " at line #"+linenum+
|
||||
"\nOffending line: "+line+
|
||||
"\nReason ("+e.getClass().getName()+")", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes from the underlying collection the last element returned by the
|
||||
* iterator (optional operation). This method can be called only once per
|
||||
* call to <tt>next</tt>. The behavior of an iterator is unspecified if
|
||||
* the underlying collection is modified while the iteration is in
|
||||
* progress in any way other than by calling this method.
|
||||
*
|
||||
* @throws UnsupportedOperationException if the <tt>remove</tt>
|
||||
* operation is not supported by this Iterator.
|
||||
* @throws IllegalStateException if the <tt>next</tt> method has not
|
||||
* yet been called, or the <tt>remove</tt> method has already
|
||||
* been called after the last call to the <tt>next</tt>
|
||||
* method.
|
||||
*/
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("remove() operation is not supported by RODRecordIterator");
|
||||
}
|
||||
|
||||
/** Instantiates appropriate implementation of the ROD used by this iteratot. The 'name' argument is the name
|
||||
* of the ROD track.
|
||||
* @param name
|
||||
* @return
|
||||
*/
|
||||
private ROD instantiateROD(final String name) {
|
||||
try {
|
||||
return (ROD) named_constructor.newInstance(name);
|
||||
} catch (Exception e) {
|
||||
throw new DynamicClassResolutionException(named_constructor.getDeclaringClass(), e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,13 +1,15 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -18,348 +20,406 @@ import java.util.*;
|
|||
* The standard interaction model is:
|
||||
*
|
||||
* Traversal system arrives at a site, which has a bunch of RMDs covering it
|
||||
Genotype * Traversal calls tracker.bind(name, RMD) for each RMDs in RMDs
|
||||
* Traversal passes tracker to the walker
|
||||
* walker calls lookup(name, default) to obtain the RMDs values at this site, or default if none was
|
||||
* bound at this site.
|
||||
* Traversal passes creates a tracker and passes it to the walker
|
||||
* walker calls get(rodBinding) to obtain the RMDs values at this site for the track
|
||||
* associated with rodBinding.
|
||||
*
|
||||
* Note that this is an immutable class. Once created the underlying data structures
|
||||
* cannot be modified
|
||||
*
|
||||
* User: mdepristo
|
||||
* Date: Apr 3, 2009
|
||||
* Time: 3:05:23 PM
|
||||
*/
|
||||
public class RefMetaDataTracker {
|
||||
// TODO: this should be a list, not a map, actually
|
||||
private final static RODRecordList EMPTY_ROD_RECORD_LIST = new RODRecordListImpl("EMPTY");
|
||||
|
||||
final Map<String, RODRecordList> map;
|
||||
protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
|
||||
final ReferenceContext ref;
|
||||
final protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
|
||||
|
||||
public RefMetaDataTracker(int nBindings) {
|
||||
if ( nBindings == 0 )
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// Special ENGINE interaction functions
|
||||
//
|
||||
//
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
public RefMetaDataTracker(final Collection<RODRecordList> allBindings, final ReferenceContext ref) {
|
||||
this.ref = ref;
|
||||
|
||||
// set up the map
|
||||
if ( allBindings.isEmpty() )
|
||||
map = Collections.emptyMap();
|
||||
else
|
||||
map = new HashMap<String, RODRecordList>(nBindings);
|
||||
else {
|
||||
Map<String, RODRecordList> tmap = new HashMap<String, RODRecordList>(allBindings.size());
|
||||
for ( RODRecordList rod : allBindings ) {
|
||||
if ( rod != null && ! rod.isEmpty() )
|
||||
tmap.put(canonicalName(rod.getName()), rod);
|
||||
}
|
||||
|
||||
// ensure that no one modifies the map itself
|
||||
map = Collections.unmodifiableMap(tmap);
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// Generic accessors
|
||||
//
|
||||
//
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Gets all of the Tribble features spanning this locus, returning them as a list of specific
|
||||
* type T extending Feature. This function looks across all tracks to find the Features, so
|
||||
* if you have two tracks A and B each containing 1 Feature, then getValues will return
|
||||
* a list containing both features.
|
||||
*
|
||||
* Note that this function assumes that all of the bound features are instances of or
|
||||
* subclasses of T. A ClassCastException will occur if this isn't the case. If you want
|
||||
* to get all Features without any danger of such an exception use the root Tribble
|
||||
* interface Feature.
|
||||
*
|
||||
* @param type The type of the underlying objects bound here
|
||||
* @param <T> as above
|
||||
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||
*/
|
||||
@Requires({"type != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type) {
|
||||
return addValues(map.keySet(), type, new ArrayList<T>(), null, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* get all the reference meta data associated with a track name.
|
||||
* @param name the name of the track we're looking for
|
||||
* @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
|
||||
* dbSNP RMD this will be a RodDbSNP, etc.
|
||||
* Provides the same functionality as @link #getValues(Class<T>) but will only include
|
||||
* Features that start as the GenomeLoc provide onlyAtThisLoc.
|
||||
*
|
||||
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
|
||||
* @param type The type of the underlying objects bound here
|
||||
* @param onlyAtThisLoc
|
||||
* @param <T> as above
|
||||
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||
*/
|
||||
public List<Object> getReferenceMetaData(final String name) {
|
||||
RODRecordList list = getTrackDataByName(name, true);
|
||||
List<Object> objects = new ArrayList<Object>();
|
||||
if (list == null) return objects;
|
||||
for (GATKFeature feature : list)
|
||||
objects.add(feature.getUnderlyingObject());
|
||||
return objects;
|
||||
@Requires({"type != null", "onlyAtThisLoc != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type, final GenomeLoc onlyAtThisLoc) {
|
||||
return addValues(map.keySet(), type, new ArrayList<T>(), onlyAtThisLoc, true, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* get all the reference meta data associated with a track name.
|
||||
* @param name the name of the track we're looking for
|
||||
* @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with
|
||||
* the passed in parameter (false).
|
||||
* @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
|
||||
* dbSNP rod this will be a RodDbSNP, etc.
|
||||
* Uses the same logic as @link #getValues(Class) but arbitrary select one of the resulting
|
||||
* elements of the list to return. That is, if there would be two elements in the result of
|
||||
* @link #getValues(Class), one of these two is selected, and which one it will be isn't
|
||||
* specified. Consequently, this method is only really safe if (1) you absolutely know
|
||||
* that only one binding will meet the constraints of @link #getValues(Class) or (2)
|
||||
* you truly don't care which of the multiple bindings available you are going to examine.
|
||||
*
|
||||
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
|
||||
* If there are no bindings here, getFirstValue() return null
|
||||
*
|
||||
* @param type The type of the underlying objects bound here
|
||||
* @param <T> as above
|
||||
* @return A random single element the RODs bound here, or null if none are bound.
|
||||
*/
|
||||
public List<Object> getReferenceMetaData(final String name, boolean requireExactMatch) {
|
||||
RODRecordList list = getTrackDataByName(name, requireExactMatch);
|
||||
List<Object> objects = new ArrayList<Object>();
|
||||
if (list == null) return objects;
|
||||
for (GATKFeature feature : list)
|
||||
objects.add(feature.getUnderlyingObject());
|
||||
return objects;
|
||||
@Requires({"type != null"})
|
||||
public <T extends Feature> T getFirstValue(final Class<T> type) {
|
||||
return safeGetFirst(getValues(type));
|
||||
}
|
||||
|
||||
/**
|
||||
* get all the GATK features associated with a specific track name
|
||||
* @param name the name of the track we're looking for
|
||||
* @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with
|
||||
* the passed in parameter (false).
|
||||
* @return a list of GATKFeatures for the target rmd
|
||||
* Uses the same logic as @link #getValue(Class,GenomeLoc) to determine the list
|
||||
* of eligible Features and @link #getFirstValue(Class) to select a single
|
||||
* element from the interval list.
|
||||
*
|
||||
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
|
||||
* @param type The type of the underlying objects bound here
|
||||
* @param <T> as above
|
||||
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||
* @return A random single element the RODs bound here starting at onlyAtThisLoc, or null if none are bound.
|
||||
*/
|
||||
public List<GATKFeature> getGATKFeatureMetaData(final String name, boolean requireExactMatch) {
|
||||
List<GATKFeature> feat = getTrackDataByName(name,requireExactMatch);
|
||||
return (feat == null) ? new ArrayList<GATKFeature>() : feat; // to satisfy the above requirement that we don't return null
|
||||
@Requires({"type != null", "onlyAtThisLoc != null"})
|
||||
public <T extends Feature> T getFirstValue(final Class<T> type, final GenomeLoc onlyAtThisLoc) {
|
||||
return safeGetFirst(getValues(type, onlyAtThisLoc));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* get a singleton record, given the name and a type. This function will return the first record at the current position seen,
|
||||
* and emit a logger warning if there were more than one option.
|
||||
* Gets all of the Tribble features bound to RodBinding spanning this locus, returning them as
|
||||
* a list of specific type T extending Feature.
|
||||
*
|
||||
* WARNING: this method is deprecated, since we now suppport more than one RMD at a single position for all tracks. If there are
|
||||
* are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets
|
||||
* picked may change from time to time! BE WARNED!
|
||||
*
|
||||
* @param name the name of the track
|
||||
* @param clazz the underlying type to return
|
||||
* @param <T> the type to parameterize on, matching the clazz argument
|
||||
* @return a record of type T, or null if no record is present.
|
||||
* Note that this function assumes that all of the bound features are instances of or
|
||||
* subclasses of T. A ClassCastException will occur if this isn't the case.
|
||||
*
|
||||
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
|
||||
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
|
||||
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||
*/
|
||||
@Deprecated
|
||||
public <T> T lookup(final String name, Class<T> clazz) {
|
||||
RODRecordList objects = getTrackDataByName(name, true);
|
||||
@Requires({"rodBinding != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final RodBinding<T> rodBinding) {
|
||||
return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList<T>(1), getTrackDataByName(rodBinding), null, false, false);
|
||||
}
|
||||
|
||||
// if emtpy or null return null;
|
||||
if (objects == null || objects.size() < 1) return null;
|
||||
/**
|
||||
* Gets all of the Tribble features bound to any RodBinding in rodBindings,
|
||||
* spanning this locus, returning them as a list of specific type T extending Feature.
|
||||
*
|
||||
* Note that this function assumes that all of the bound features are instances of or
|
||||
* subclasses of T. A ClassCastException will occur if this isn't the case.
|
||||
*
|
||||
* @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched
|
||||
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
|
||||
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||
*/
|
||||
@Requires({"rodBindings != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final Collection<RodBinding<T>> rodBindings) {
|
||||
List<T> results = new ArrayList<T>(1);
|
||||
for ( RodBinding<T> rodBinding : rodBindings )
|
||||
results.addAll(getValues(rodBinding));
|
||||
return results;
|
||||
}
|
||||
|
||||
if (objects.size() > 1)
|
||||
logger.info("lookup is choosing the first record from " + (objects.size() - 1) + " options");
|
||||
/**
|
||||
* The same logic as @link #getValues(RodBinding) but enforces that each Feature start at onlyAtThisLoc
|
||||
*
|
||||
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
|
||||
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
|
||||
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||
*/
|
||||
@Requires({"rodBinding != null", "onlyAtThisLoc != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final RodBinding<T> rodBinding, final GenomeLoc onlyAtThisLoc) {
|
||||
return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList<T>(1), getTrackDataByName(rodBinding), onlyAtThisLoc, true, false);
|
||||
}
|
||||
|
||||
Object obj = objects.get(0).getUnderlyingObject();
|
||||
if (!(clazz.isAssignableFrom(obj.getClass())))
|
||||
throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString()
|
||||
+ " it's of type " + obj.getClass());
|
||||
/**
|
||||
* The same logic as @link #getValues(List) but enforces that each Feature start at onlyAtThisLoc
|
||||
*
|
||||
* @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched
|
||||
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
|
||||
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||
*/
|
||||
@Requires({"rodBindings != null", "onlyAtThisLoc != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final Collection<RodBinding<T>> rodBindings, final GenomeLoc onlyAtThisLoc) {
|
||||
List<T> results = new ArrayList<T>(1);
|
||||
for ( RodBinding<T> rodBinding : rodBindings )
|
||||
results.addAll(getValues(rodBinding, onlyAtThisLoc));
|
||||
return results;
|
||||
}
|
||||
|
||||
return (T)obj;
|
||||
/**
|
||||
* Uses the same logic as @getValues(RodBinding) to determine the list
|
||||
* of eligible Features and select a single element from the resulting set
|
||||
* of eligible features.
|
||||
*
|
||||
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
|
||||
* @param <T> as above
|
||||
* @return A random single element the eligible Features found, or null if none are bound.
|
||||
*/
|
||||
@Requires({"rodBinding != null"})
|
||||
public <T extends Feature> T getFirstValue(final RodBinding<T> rodBinding) {
|
||||
return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), null, false, true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses the same logic as @getValues(RodBinding, GenomeLoc) to determine the list
|
||||
* of eligible Features and select a single element from the resulting set
|
||||
* of eligible features.
|
||||
*
|
||||
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
|
||||
* @param <T> as above
|
||||
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||
* @return A random single element the eligible Features found, or null if none are bound.
|
||||
*/
|
||||
@Requires({"rodBinding != null", "onlyAtThisLoc != null"})
|
||||
public <T extends Feature> T getFirstValue(final RodBinding<T> rodBinding, final GenomeLoc onlyAtThisLoc) {
|
||||
return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), onlyAtThisLoc, true, true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses the same logic as @getValues(List) to determine the list
|
||||
* of eligible Features and select a single element from the resulting set
|
||||
* of eligible features.
|
||||
*
|
||||
* @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched
|
||||
* @param <T> as above
|
||||
* @return A random single element the eligible Features found, or null if none are bound.
|
||||
*/
|
||||
@Requires({"rodBindings != null"})
|
||||
public <T extends Feature> T getFirstValue(final Collection<RodBinding<T>> rodBindings) {
|
||||
for ( RodBinding<T> rodBinding : rodBindings ) {
|
||||
T val = getFirstValue(rodBinding);
|
||||
if ( val != null )
|
||||
return val;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses the same logic as @getValues(RodBinding,GenomeLoc) to determine the list
|
||||
* of eligible Features and select a single element from the resulting set
|
||||
* of eligible features.
|
||||
*
|
||||
* @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched
|
||||
* @param <T> as above
|
||||
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||
* @return A random single element the eligible Features found, or null if none are bound.
|
||||
*/
|
||||
@Requires({"rodBindings != null", "onlyAtThisLoc != null"})
|
||||
public <T extends Feature> T getFirstValue(final Collection<RodBinding<T>> rodBindings, final GenomeLoc onlyAtThisLoc) {
|
||||
for ( RodBinding<T> rodBinding : rodBindings ) {
|
||||
T val = getFirstValue(rodBinding, onlyAtThisLoc);
|
||||
if ( val != null )
|
||||
return val;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is there a binding at this site to a ROD/track with the specified name?
|
||||
*
|
||||
* @param name the name of the rod
|
||||
* @return true if it has the rod
|
||||
* @param rodBinding the rod binding we want to know about
|
||||
* @return true if any Features are bound in this tracker to rodBinding
|
||||
*/
|
||||
public boolean hasROD(final String name) {
|
||||
return map.containsKey(canonicalName(name));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records
|
||||
* at the current site, they all will be added to the list as separate elements.
|
||||
*
|
||||
* @return collection of all rods
|
||||
*/
|
||||
public Collection<GATKFeature> getAllRods() {
|
||||
List<GATKFeature> l = new ArrayList<GATKFeature>();
|
||||
for ( RODRecordList rl : map.values() ) {
|
||||
if ( rl == null ) continue; // how do we get null value stored for a track? shouldn't the track be missing from the map alltogether?
|
||||
l.addAll(rl);
|
||||
}
|
||||
return l;
|
||||
|
||||
@Requires({"rodBinding != null"})
|
||||
public boolean hasValues(final RodBinding rodBinding) {
|
||||
return map.containsKey(canonicalName(rodBinding.getName()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all of the RMD tracks at the current site. Each track is returned as a single compound
|
||||
* object (RODRecordList) that may contain multiple RMD records associated with the current site.
|
||||
*
|
||||
* @return collection of all tracks
|
||||
* @return List of all tracks
|
||||
*/
|
||||
public Collection<RODRecordList> getBoundRodTracks() {
|
||||
LinkedList<RODRecordList> bound = new LinkedList<RODRecordList>();
|
||||
|
||||
for ( RODRecordList value : map.values() ) {
|
||||
if ( value != null && value.size() != 0 ) bound.add(value);
|
||||
}
|
||||
|
||||
return bound;
|
||||
public List<RODRecordList> getBoundRodTracks() {
|
||||
return new ArrayList<RODRecordList>(map.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the number of ROD bindings (name -> value) where value is not empty in this tracker
|
||||
* The number of tracks with at least one value bound here
|
||||
* @return the number of tracks with at least one bound Feature
|
||||
*/
|
||||
public int getNBoundRodTracks() {
|
||||
return getNBoundRodTracks(null);
|
||||
public int getNTracksWithBoundFeatures() {
|
||||
return map.size();
|
||||
}
|
||||
|
||||
public int getNBoundRodTracks(final String excludeIn ) {
|
||||
final String exclude = excludeIn == null ? null : canonicalName(excludeIn);
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// old style accessors
|
||||
//
|
||||
// TODO -- DELETE ME
|
||||
//
|
||||
//
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
int n = 0;
|
||||
for ( RODRecordList value : map.values() ) {
|
||||
if ( value != null && ! value.isEmpty() ) {
|
||||
if ( exclude == null || ! value.getName().equals(exclude) )
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
@Deprecated
|
||||
public boolean hasValues(final String name) {
|
||||
return map.containsKey(canonicalName(name));
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type, final String name) {
|
||||
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), null, false, false);
|
||||
}
|
||||
@Deprecated
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
|
||||
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), onlyAtThisLoc, true, false);
|
||||
}
|
||||
@Deprecated
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type, final Collection<String> names, final GenomeLoc onlyAtThisLoc) {
|
||||
return addValues(names, type, new ArrayList<T>(), onlyAtThisLoc, true, false);
|
||||
}
|
||||
@Deprecated
|
||||
public <T extends Feature> T getFirstValue(final Class<T> type, final String name) {
|
||||
return safeGetFirst(getValues(type, name));
|
||||
}
|
||||
@Deprecated
|
||||
public <T extends Feature> T getFirstValue(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
|
||||
return safeGetFirst(getValues(type, name, onlyAtThisLoc));
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// Private utility functions
|
||||
//
|
||||
//
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Binds the list of reference ordered data records (RMDs) to track name at this site. Should be used only by the traversal
|
||||
* system to provide access to RMDs in a structured way to the walkers.
|
||||
* Helper function for getFirst() operations that takes a list of <T> and
|
||||
* returns the first element, or null if no such element exists.
|
||||
*
|
||||
* @param name the name of the track
|
||||
* @param rod the collection of RMD data
|
||||
*/
|
||||
public void bind(final String name, RODRecordList rod) {
|
||||
//logger.debug(String.format("Binding %s to %s", name, rod));
|
||||
map.put(canonicalName(name), rod);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number
|
||||
* of entries per ROD.
|
||||
* The name of each VariantContext corresponds to the ROD name.
|
||||
*
|
||||
* @param ref reference context
|
||||
* @return variant context
|
||||
*/
|
||||
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref) {
|
||||
return getAllVariantContexts(ref, null, null, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all of the variant contexts that start at the current location
|
||||
* @param ref
|
||||
* @param curLocation
|
||||
* @param l
|
||||
* @param <T>
|
||||
* @return
|
||||
*/
|
||||
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, GenomeLoc curLocation) {
|
||||
return getAllVariantContexts(ref, null, curLocation, true, false);
|
||||
@Requires({"l != null"})
|
||||
final private <T extends Feature> T safeGetFirst(final List<T> l) {
|
||||
return l.isEmpty() ? null : l.get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts all possible ROD tracks to VariantContexts objects. If allowedTypes != null, then only
|
||||
* VariantContexts in the allow set of types will be returned. If requireStartsHere is true, then curLocation
|
||||
* must not be null, and only records whose start position is == to curLocation.getStart() will be returned.
|
||||
* If takeFirstOnly is true, then only a single VariantContext will be converted from any individual ROD. Of course,
|
||||
* this single object must pass the allowed types and start here options if provided. Note that the result
|
||||
* may return multiple VariantContexts with the same name if that particular track contained multiple RODs spanning
|
||||
* the current location.
|
||||
*
|
||||
* The name of each VariantContext corresponds to the ROD name.
|
||||
*
|
||||
* @param ref reference context
|
||||
* @param allowedTypes allowed types
|
||||
* @param curLocation location
|
||||
* @param requireStartHere do we require the rod to start at this location?
|
||||
* @param takeFirstOnly do we take the first rod only?
|
||||
* @return variant context
|
||||
*/
|
||||
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
List<VariantContext> contexts = new ArrayList<VariantContext>();
|
||||
|
||||
for ( RODRecordList rodList : getBoundRodTracks() ) {
|
||||
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the variant contexts associated with track name name
|
||||
*
|
||||
* see getVariantContexts for more information.
|
||||
*
|
||||
* @param ref ReferenceContext to enable conversion to variant context
|
||||
* @param name name
|
||||
* @param curLocation location
|
||||
* @param allowedTypes allowed types
|
||||
* @param requireStartHere do we require the rod to start at this location?
|
||||
* @param takeFirstOnly do we take the first rod only?
|
||||
* @return variant context
|
||||
*/
|
||||
// public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
// return getVariantContexts(null, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
// }
|
||||
|
||||
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
return getVariantContexts(ref, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
}
|
||||
|
||||
// public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
// return getVariantContexts(null, names, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
// }
|
||||
|
||||
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
|
||||
|
||||
private <T extends Feature> List<T> addValues(final Collection<String> names,
|
||||
final Class<T> type,
|
||||
List<T> values,
|
||||
final GenomeLoc curLocation,
|
||||
final boolean requireStartHere,
|
||||
final boolean takeFirstOnly ) {
|
||||
for ( String name : names ) {
|
||||
RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match
|
||||
|
||||
if ( rodList != null )
|
||||
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
|
||||
RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match
|
||||
values = addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly );
|
||||
if ( takeFirstOnly && ! values.isEmpty() )
|
||||
break;
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
public Collection<VariantContext> getVariantContextsByPrefix(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
|
||||
|
||||
for ( String name : names ) {
|
||||
RODRecordList rodList = getTrackDataByName(name,false); // require that the name is an exact match
|
||||
|
||||
if ( rodList != null )
|
||||
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the variant context associated with name, and assumes the system only has a single bound track at this location. Throws an exception if not.
|
||||
* see getVariantContexts for more information.
|
||||
*
|
||||
* @param name name
|
||||
* @param curLocation location
|
||||
* @param allowedTypes allowed types
|
||||
* @param requireStartHere do we require the rod to start at this location?
|
||||
* @return variant context
|
||||
*/
|
||||
public VariantContext getVariantContext(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere ) {
|
||||
Collection<VariantContext> contexts = getVariantContexts(ref, name, allowedTypes, curLocation, requireStartHere, false );
|
||||
|
||||
if ( contexts.size() > 1 )
|
||||
throw new ReviewedStingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation);
|
||||
else if ( contexts.size() == 0 )
|
||||
return null;
|
||||
else
|
||||
return contexts.iterator().next();
|
||||
}
|
||||
|
||||
/**
|
||||
* Very simple accessor that gets the first (and only!) VC associated with name at the current location, or
|
||||
* null if there's no binding here.
|
||||
*
|
||||
* @param ref
|
||||
* @param name
|
||||
* @param curLocation
|
||||
* @return
|
||||
*/
|
||||
public VariantContext getVariantContext(ReferenceContext ref, String name, GenomeLoc curLocation) {
|
||||
return getVariantContext(ref, name, null, curLocation, true);
|
||||
return values;
|
||||
}
|
||||
|
||||
|
||||
private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList rodList, ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
|
||||
private <T extends Feature> List<T> addValues(final String name,
|
||||
final Class<T> type,
|
||||
List<T> values,
|
||||
final RODRecordList rodList,
|
||||
final GenomeLoc curLocation,
|
||||
final boolean requireStartHere,
|
||||
final boolean takeFirstOnly ) {
|
||||
for ( GATKFeature rec : rodList ) {
|
||||
if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) {
|
||||
// ok, we might actually be able to turn this record in a variant context
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref);
|
||||
if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing
|
||||
Object obj = rec.getUnderlyingObject();
|
||||
if (!(type.isAssignableFrom(obj.getClass())))
|
||||
throw new UserException.CommandLineException("Unable to cast track named " + name + " to type of " + type.toString()
|
||||
+ " it's of type " + obj.getClass());
|
||||
|
||||
if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted
|
||||
continue;
|
||||
T objT = (T)obj;
|
||||
if ( takeFirstOnly ) {
|
||||
if ( values == null )
|
||||
values = Arrays.asList(objT);
|
||||
else
|
||||
values.add(objT);
|
||||
|
||||
// now, let's decide if we want to keep it
|
||||
boolean goodType = allowedTypes == null || allowedTypes.contains(vc.getType());
|
||||
boolean goodPos = ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart();
|
||||
|
||||
if ( goodType && goodPos ) { // ok, we are going to keep this thing
|
||||
contexts.add(vc);
|
||||
|
||||
if ( takeFirstOnly )
|
||||
// we only want the first passing instance, so break the loop over records in rodList
|
||||
break;
|
||||
break;
|
||||
} else {
|
||||
if ( values == null )
|
||||
values = new ArrayList<T>();
|
||||
values.add(objT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return values == null ? Collections.<T>emptyList() : values;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the reference metadata track named 'name' and returns all ROD records from that track associated
|
||||
* with the current site as a RODRecordList collection object. If no data track with specified name is available,
|
||||
* with the current site as a RODRecordList List object. If no data track with specified name is available,
|
||||
* returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up
|
||||
* with track name set to 'name' and location set to null; otherwise the wrapper object will have name and
|
||||
* location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution,
|
||||
|
|
@ -367,29 +427,16 @@ public class RefMetaDataTracker {
|
|||
* for instance, on locus traversal, location is usually expected to be a single base we are currently looking at,
|
||||
* regardless of the presence of "extended" RODs overlapping with that location).
|
||||
* @param name track name
|
||||
* @param requireExactMatch do we require an exact match of the rod name?
|
||||
* @return track data for the given rod
|
||||
*/
|
||||
private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) {
|
||||
//logger.debug(String.format("Lookup %s%n", name));
|
||||
|
||||
private RODRecordList getTrackDataByName(final String name) {
|
||||
final String luName = canonicalName(name);
|
||||
RODRecordList trackData = null;
|
||||
RODRecordList l = map.get(luName);
|
||||
return l == null ? EMPTY_ROD_RECORD_LIST : l;
|
||||
}
|
||||
|
||||
if ( requireExactMatch ) {
|
||||
if ( map.containsKey(luName) )
|
||||
trackData = map.get(luName);
|
||||
} else {
|
||||
for ( Map.Entry<String, RODRecordList> datum : map.entrySet() ) {
|
||||
final String rodName = datum.getKey();
|
||||
if ( datum.getValue() != null && rodName.startsWith(luName) ) {
|
||||
if ( trackData == null ) trackData = new RODRecordListImpl(name);
|
||||
//System.out.printf("Adding bindings from %s to %s at %s%n", rodName, name, datum.getValue().getLocation());
|
||||
((RODRecordListImpl)trackData).add(datum.getValue(), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
return trackData;
|
||||
private RODRecordList getTrackDataByName(final RodBinding binding) {
|
||||
return getTrackDataByName(binding.getName());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -398,6 +445,7 @@ public class RefMetaDataTracker {
|
|||
* @return canonical name of the rod
|
||||
*/
|
||||
private final String canonicalName(final String name) {
|
||||
// todo -- remove me after switch to RodBinding syntax
|
||||
return name.toLowerCase();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,130 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.*;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Class for representing arbitrary reference ordered data sets
|
||||
* <p/>
|
||||
* User: mdepristo
|
||||
* Date: Feb 27, 2009
|
||||
* Time: 10:47:14 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements Iterable<ReferenceOrderedDatum> {
|
||||
private String name;
|
||||
private File file = null;
|
||||
// private String fieldDelimiter;
|
||||
|
||||
/** Header object returned from the datum */
|
||||
// private Object header = null;
|
||||
|
||||
private Class<ROD> type = null; // runtime type information for object construction
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(ReferenceOrderedData.class);
|
||||
|
||||
/**
|
||||
* given an existing file, open it and append all the valid triplet lines to an existing list
|
||||
*
|
||||
* @param rodTripletList the list of existing triplets
|
||||
* @param filename the file to attempt to extract ROD triplets from
|
||||
*/
|
||||
protected static void extractRodsFromFile(List<String> rodTripletList, String filename) {
|
||||
BufferedReader str;
|
||||
try {
|
||||
str = new BufferedReader(new FileReader(new File(filename)));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotReadInputFile(new File(filename), "Unable to load the ROD input file", e);
|
||||
}
|
||||
String line = "NO LINES READ IN";
|
||||
try {
|
||||
while ((line = str.readLine()) != null) {
|
||||
if (line.matches(".+,.+,.+")) rodTripletList.add(line.trim());
|
||||
else logger.warn("the following file line didn't parsing into a triplet -> " + line);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(new File(filename), "Failed reading the input rod file; last line read was " + line, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public ReferenceOrderedData(final String name, File file, Class<ROD> type ) {
|
||||
this.name = name;
|
||||
this.file = file;
|
||||
this.type = type;
|
||||
// this.header = initializeROD(name, file, type);
|
||||
// this.fieldDelimiter = newROD(name, type).delimiterRegex();
|
||||
}
|
||||
|
||||
public String getName() { return name; }
|
||||
|
||||
public File getFile() { return file; }
|
||||
|
||||
public Class<ROD> getType() { return type; }
|
||||
|
||||
/**
|
||||
* Special equals override to see if this ROD is compatible with the given
|
||||
* name and type. 'Compatible' means that this ROD has the name that's passed
|
||||
* in and its data can fit into the container specified by type.
|
||||
*
|
||||
* @param name Name to check.
|
||||
* @param type Type to check.
|
||||
*
|
||||
* @return True if these parameters imply this rod. False otherwise.
|
||||
*/
|
||||
public boolean matches(String name, Class<? extends ReferenceOrderedDatum> type) {
|
||||
return this.name.equals(name) && type.isAssignableFrom(this.type);
|
||||
}
|
||||
|
||||
public Iterator<ReferenceOrderedDatum> iterator() {
|
||||
Iterator<ReferenceOrderedDatum> it;
|
||||
try {
|
||||
Method m = type.getDeclaredMethod("createIterator", String.class, java.io.File.class);
|
||||
it = (Iterator<ReferenceOrderedDatum>) m.invoke(null, name, file);
|
||||
} catch (java.lang.NoSuchMethodException e) {
|
||||
it = new RODRecordIterator(file,name,type);
|
||||
} catch (java.lang.NullPointerException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (java.lang.SecurityException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (java.lang.IllegalAccessException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (java.lang.IllegalArgumentException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (java.lang.reflect.InvocationTargetException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
// return new RODIterator<ROD>(it);
|
||||
return it;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Manipulations of all of the data
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public static void write(ArrayList<ReferenceOrderedDatum> data, File output) throws IOException {
|
||||
final FileWriter out = new FileWriter(output);
|
||||
|
||||
for (ReferenceOrderedDatum rec : data) {
|
||||
out.write(rec.repl() + "\n");
|
||||
}
|
||||
|
||||
out.close();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* An interface marking that a given Tribble codec can look at the file and determine whether the
|
||||
* codec specifically parsing the contents of the file.
|
||||
*/
|
||||
public interface SelfScopingFeatureCodec {
|
||||
/**
|
||||
* This function returns true iff the File potentialInput can be parsed by this
|
||||
* codec.
|
||||
*
|
||||
* The GATK assumes that there's never a situation where two SelfScopingFeaetureCodecs
|
||||
* return true for the same file. If this occurs the GATK splits out an error.
|
||||
*
|
||||
* Note this function must never throw an error. All errors should be trapped
|
||||
* and false returned.
|
||||
*
|
||||
* @param potentialInput the file to test for parsiability with this codec
|
||||
* @return true if potentialInput can be parsed, false otherwise
|
||||
*/
|
||||
public boolean canDecode(final File potentialInput);
|
||||
}
|
||||
|
|
@ -4,7 +4,7 @@ import org.broad.tribble.Feature;
|
|||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.gelitext.GeliTextFeature;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
|
|
@ -112,24 +112,28 @@ public class VariantContextAdaptors {
|
|||
alleles.add(refAllele);
|
||||
|
||||
// add all of the alt alleles
|
||||
boolean sawNullAllele = refAllele.isNull();
|
||||
for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||
return null;
|
||||
}
|
||||
alleles.add(Allele.create(alt, false));
|
||||
Allele altAllele = Allele.create(alt, false);
|
||||
alleles.add(altAllele);
|
||||
if ( altAllele.isNull() )
|
||||
sawNullAllele = true;
|
||||
}
|
||||
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
|
||||
if ( DbSNPHelper.isDeletion(dbsnp) ) {
|
||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index]));
|
||||
}
|
||||
Collection<Genotype> genotypes = null;
|
||||
VariantContext vc = new VariantContext(name, dbsnp.getChr(),dbsnp.getStart() - (DbSNPHelper.isDeletion(dbsnp) ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
|
||||
|
||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
||||
|
||||
Map<String, Genotype> genotypes = null;
|
||||
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel);
|
||||
return vc;
|
||||
} else
|
||||
return null; // can't handle anything else
|
||||
|
|
@ -159,16 +163,6 @@ public class VariantContextAdaptors {
|
|||
@Override
|
||||
public Class<? extends Feature> getAdaptableFeatureType() { return GeliTextFeature.class; }
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
* @param input the Rod object, in this case a RodGeliText
|
||||
* @return a VariantContext object
|
||||
*/
|
||||
// VariantContext convert(String name, Object input) {
|
||||
// return convert(name, input, null);
|
||||
// }
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
|
|
@ -234,16 +228,6 @@ public class VariantContextAdaptors {
|
|||
@Override
|
||||
public Class<? extends Feature> getAdaptableFeatureType() { return HapMapFeature.class; }
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
* @param input the Rod object, in this case a RodGeliText
|
||||
* @return a VariantContext object
|
||||
*/
|
||||
// VariantContext convert(String name, Object input) {
|
||||
// return convert(name, input, null);
|
||||
// }
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
|
|
@ -258,6 +242,11 @@ public class VariantContextAdaptors {
|
|||
|
||||
HapMapFeature hapmap = (HapMapFeature)input;
|
||||
|
||||
int index = hapmap.getStart() - ref.getWindow().getStart();
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
||||
|
||||
HashSet<Allele> alleles = new HashSet<Allele>();
|
||||
Allele refSNPAllele = Allele.create(ref.getBase(), true);
|
||||
int deletionLength = -1;
|
||||
|
|
@ -316,7 +305,7 @@ public class VariantContextAdaptors {
|
|||
long end = hapmap.getEnd();
|
||||
if ( deletionLength > 0 )
|
||||
end += deletionLength;
|
||||
VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs);
|
||||
VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs, refBaseForIndel);
|
||||
return vc;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,31 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.utils.helpers;
|
||||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.features;
|
||||
|
||||
import net.sf.samtools.util.SequenceUtil;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.annotation.Strand;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
|
@ -34,12 +59,12 @@ public class DbSNPHelper {
|
|||
return dbsnp;
|
||||
}
|
||||
|
||||
public static String rsIDOfFirstRealSNP(List<Object> featureList) {
|
||||
public static String rsIDOfFirstRealSNP(List<Feature> featureList, boolean deleteMe) {
|
||||
if (featureList == null)
|
||||
return null;
|
||||
|
||||
String rsID = null;
|
||||
for ( Object d : featureList ) {
|
||||
for ( Feature d : featureList ) {
|
||||
if ( d instanceof DbSNPFeature ) {
|
||||
if ( DbSNPHelper.isSNP((DbSNPFeature)d) ) {
|
||||
rsID = ((DbSNPFeature)d).getRsID();
|
||||
|
|
@ -56,14 +81,29 @@ public class DbSNPHelper {
|
|||
return rsID;
|
||||
}
|
||||
|
||||
public static String rsIDOfFirstRealIndel(List<Object> featureList) {
|
||||
public static String rsIDOfFirstRealSNP(List<VariantContext> VCs) {
|
||||
if ( VCs == null )
|
||||
return null;
|
||||
|
||||
String rsID = null;
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if ( vc.isSNP() ) {
|
||||
rsID = vc.getID();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return rsID;
|
||||
}
|
||||
|
||||
public static String rsIDOfFirstRealIndel(List<Feature> featureList) {
|
||||
if (featureList == null)
|
||||
return null;
|
||||
|
||||
String rsID = null;
|
||||
for ( Object d : featureList ) {
|
||||
for ( Feature d : featureList ) {
|
||||
if ( d instanceof DbSNPFeature ) {
|
||||
if ( DbSNPHelper.isIndel((DbSNPFeature)d) ) {
|
||||
if ( DbSNPHelper.isIndel((DbSNPFeature) d) ) {
|
||||
rsID = ((DbSNPFeature)d).getRsID();
|
||||
break;
|
||||
}
|
||||
|
|
@ -117,7 +157,11 @@ public class DbSNPHelper {
|
|||
}
|
||||
|
||||
public static boolean isIndel(DbSNPFeature feature) {
|
||||
return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || feature.getVariantType().contains("in-del");
|
||||
return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || DbSNPHelper.isComplexIndel(feature);
|
||||
}
|
||||
|
||||
public static boolean isComplexIndel(DbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("in-del");
|
||||
}
|
||||
|
||||
public static boolean isHapmap(DbSNPFeature feature) {
|
||||
|
|
@ -1,193 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.features.annotator;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.exception.CodecLineParsingException;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec<AnnotatorInputTableFeature> {
|
||||
|
||||
private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class);
|
||||
|
||||
public static final String DELIMITER = "\t";
|
||||
|
||||
private ArrayList<String> header;
|
||||
|
||||
/**
|
||||
* The parser to use when resolving genome-wide locations.
|
||||
*/
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* Set the parser to use when resolving genetic data.
|
||||
* @param genomeLocParser The supplied parser.
|
||||
*/
|
||||
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the header.
|
||||
*
|
||||
* @param reader
|
||||
*
|
||||
* @return The # of header lines for this file.
|
||||
*/
|
||||
public Object readHeader(LineReader reader)
|
||||
{
|
||||
int[] lineCounter = new int[1];
|
||||
try {
|
||||
header = readHeader(reader, lineCounter);
|
||||
} catch(IOException e) {
|
||||
throw new IllegalArgumentException("Unable to read from file.", e);
|
||||
}
|
||||
return header;
|
||||
}
|
||||
|
||||
public Class<AnnotatorInputTableFeature> getFeatureType() {
|
||||
return AnnotatorInputTableFeature.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Feature decodeLoc(String line) {
|
||||
StringTokenizer st = new StringTokenizer(line, DELIMITER);
|
||||
if ( st.countTokens() < 1 )
|
||||
throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line);
|
||||
|
||||
GenomeLoc loc;
|
||||
String chr = st.nextToken();
|
||||
if ( chr.indexOf(":") != -1 ) {
|
||||
loc = genomeLocParser.parseGenomeLoc(chr);
|
||||
} else {
|
||||
if ( st.countTokens() < 3 )
|
||||
throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line);
|
||||
loc = genomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken()));
|
||||
}
|
||||
return new AnnotatorInputTableFeature(loc.getContig(), loc.getStart(), loc.getStop());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parses the line into an AnnotatorInputTableFeature object.
|
||||
*
|
||||
* @param line
|
||||
*/
|
||||
public AnnotatorInputTableFeature decode(String line) {
|
||||
final ArrayList<String> header = this.header; //optimization
|
||||
final ArrayList<String> values = Utils.split(line, DELIMITER, header.size());
|
||||
|
||||
if ( values.size() != header.size()) {
|
||||
throw new CodecLineParsingException(String.format("Encountered a line that has %d columns while the header has %d columns.\nHeader: " + header + "\nLine: " + values, values.size(), header.size()));
|
||||
}
|
||||
|
||||
final AnnotatorInputTableFeature feature = new AnnotatorInputTableFeature(header);
|
||||
for ( int i = 0; i < header.size(); i++ ) {
|
||||
feature.putColumnValue(header.get(i), values.get(i));
|
||||
}
|
||||
|
||||
GenomeLoc loc;
|
||||
if ( values.get(0).indexOf(":") != -1 )
|
||||
loc = genomeLocParser.parseGenomeLoc(values.get(0));
|
||||
else
|
||||
loc = genomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2)));
|
||||
|
||||
//parse the location
|
||||
feature.setChr(loc.getContig());
|
||||
feature.setStart((int)loc.getStart());
|
||||
feature.setEnd((int)loc.getStop());
|
||||
|
||||
return feature;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the header.
|
||||
* @param source
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public static ArrayList<String> readHeader(final File source) throws IOException {
|
||||
FileInputStream is = new FileInputStream(source);
|
||||
try {
|
||||
return readHeader(new AsciiLineReader(is), null);
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the header, and also sets the 2nd arg to the number of lines in the header.
|
||||
* @param source
|
||||
* @param lineCounter An array of length 1 or null. If not null, array[0] will be set to the number of lines in the header.
|
||||
* @return The header fields.
|
||||
* @throws IOException
|
||||
*/
|
||||
private static ArrayList<String> readHeader(final LineReader source, int[] lineCounter) throws IOException {
|
||||
|
||||
ArrayList<String> header = null;
|
||||
int numLines = 0;
|
||||
|
||||
//find the 1st line that's non-empty and not a comment
|
||||
String line = null;
|
||||
while( (line = source.readLine()) != null ) {
|
||||
numLines++;
|
||||
if ( line.trim().isEmpty() || line.startsWith("#") ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
//parse the header
|
||||
header = Utils.split(line, DELIMITER);
|
||||
break;
|
||||
}
|
||||
|
||||
// check that we found the header
|
||||
if ( header == null ) {
|
||||
throw new IllegalArgumentException("No header in " + source + ". All lines are either comments or empty.");
|
||||
}
|
||||
|
||||
if(lineCounter != null) {
|
||||
lineCounter[0] = numLines;
|
||||
}
|
||||
|
||||
logger.debug(String.format("Found header line containing %d columns:\n[%s]", header.size(), Utils.join("\t", header)));
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,158 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.features.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* This class represents a single record in an AnnotatorInputTable.
|
||||
*/
|
||||
public class AnnotatorInputTableFeature implements Feature {
|
||||
|
||||
private ArrayList<String> columnNames;
|
||||
private HashMap<String, String> columnValues; //maps colum names to column values
|
||||
|
||||
private String chr;
|
||||
private int start;
|
||||
private int end;
|
||||
private String strRep = null;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* @param chr The chromosome name.
|
||||
* @param start The start position
|
||||
* @param end The end position
|
||||
*/
|
||||
public AnnotatorInputTableFeature(String chr, int start, int end) {
|
||||
this.chr = chr;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* @param columnNames The column names as parsed out of the file header.
|
||||
*/
|
||||
public AnnotatorInputTableFeature(ArrayList<String> columnNames) {
|
||||
this.columnNames = columnNames;
|
||||
this.columnValues = new HashMap<String, String>();
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @return the list of column names from the file header.
|
||||
*/
|
||||
public ArrayList<String> getHeader() {
|
||||
return columnNames;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the value of the given column.
|
||||
*
|
||||
* @param columnName The column name as it appears in the file header.
|
||||
* @return The value
|
||||
*/
|
||||
public String getColumnValue(final String columnName) {
|
||||
return columnValues.get(columnName);
|
||||
}
|
||||
|
||||
|
||||
public boolean containsColumnName(final String columnName) {
|
||||
return columnValues.containsKey(columnName);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the value for the given column.
|
||||
*
|
||||
* @param columnName The column name as it appears in the file header.
|
||||
* @param value The value
|
||||
* @return The existing value associated with the columnName, if there is one.
|
||||
*/
|
||||
protected String putColumnValue(final String columnName, final String value) {
|
||||
return columnValues.put(columnName, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return all values in this line, hashed by their column names.
|
||||
*/
|
||||
public Map<String,String> getColumnValues() {
|
||||
return Collections.unmodifiableMap(columnValues);
|
||||
}
|
||||
|
||||
|
||||
public String getChr() {
|
||||
return chr;
|
||||
}
|
||||
|
||||
public int getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public int getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
protected void setChr(String chr) {
|
||||
this.chr = chr;
|
||||
}
|
||||
|
||||
protected void setStart(int start) {
|
||||
this.start = start;
|
||||
}
|
||||
|
||||
protected void setEnd(int end) {
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if ( strRep == null ) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
for(String columnName : columnNames ) {
|
||||
if ( sb.length() == 0 )
|
||||
sb.append("[");
|
||||
else
|
||||
sb.append(", ");
|
||||
sb.append(columnName + "=" + columnValues.get(columnName));
|
||||
}
|
||||
sb.append("]");
|
||||
|
||||
strRep = sb.toString();
|
||||
}
|
||||
|
||||
return strRep;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.features.refseq;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.gatk.refdata.Transcript;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
package org.broadinstitute.sting.gatk.refdata.features.refseq;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
|
|
@ -12,14 +12,13 @@ import org.broadinstitute.sting.commandline.CommandLineProgram;
|
|||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory
|
||||
|
|
@ -83,14 +82,14 @@ public class RMDIndexer extends CommandLineProgram {
|
|||
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
|
||||
|
||||
// find the types available to the track builders
|
||||
Map<String,Class> typeMapping = builder.getAvailableTrackNamesAndTypes();
|
||||
FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType);
|
||||
|
||||
// check that the type is valid
|
||||
if (!typeMapping.containsKey(inputFileType))
|
||||
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + Utils.join(",",typeMapping.keySet()));
|
||||
if (descriptor == null)
|
||||
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures());
|
||||
|
||||
// create the codec
|
||||
FeatureCodec codec = builder.createByType(typeMapping.get(inputFileType));
|
||||
FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser);
|
||||
|
||||
// check if it's a reference dependent feature codec
|
||||
if (codec instanceof ReferenceDependentFeatureCodec)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,216 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.NameAwareCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* Class for managing Tribble Feature readers available to the GATK. The features
|
||||
* are dynamically determined via a PluginManager. This class provides convenient
|
||||
* getter methods for obtaining FeatureDescriptor objects that collect all of the
|
||||
* useful information about the Tribble Codec, Feature, and name in one place.
|
||||
*
|
||||
* @author depristo
|
||||
*/
|
||||
public class FeatureManager {
|
||||
public static class FeatureDescriptor {
|
||||
final String name;
|
||||
final FeatureCodec codec;
|
||||
|
||||
public FeatureDescriptor(final String name, final FeatureCodec codec) {
|
||||
this.name = name;
|
||||
this.codec = codec;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
public FeatureCodec getCodec() {
|
||||
return codec;
|
||||
}
|
||||
public Class getCodecClass() { return codec.getClass(); }
|
||||
public Class getFeatureClass() { return codec.getFeatureType(); }
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("FeatureDescriptor name=%s codec=%s feature=%s", getName(), getCodecClass().getName(), getFeatureClass().getName());
|
||||
}
|
||||
}
|
||||
|
||||
private final PluginManager<FeatureCodec> pluginManager;
|
||||
private final Collection<FeatureDescriptor> featureDescriptors = new HashSet<FeatureDescriptor>();
|
||||
|
||||
|
||||
/**
|
||||
* Construct a FeatureManager
|
||||
*/
|
||||
public FeatureManager() {
|
||||
pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
|
||||
|
||||
for (final String rawName: pluginManager.getPluginsByName().keySet()) {
|
||||
FeatureCodec codec = pluginManager.createByName(rawName);
|
||||
String name = rawName.toUpperCase();
|
||||
FeatureDescriptor featureDescriptor = new FeatureDescriptor(name, codec);
|
||||
featureDescriptors.add(featureDescriptor);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the FeatureDescriptor whose getCodecClass().equals(codecClass).
|
||||
*
|
||||
* @param codecClass
|
||||
* @return A FeatureDescriptor or null if none is found
|
||||
*/
|
||||
@Requires("codecClass != null")
|
||||
public FeatureDescriptor getByCodec(Class codecClass) {
|
||||
for ( FeatureDescriptor descriptor : featureDescriptors )
|
||||
if ( descriptor.getCodecClass().equals(codecClass) )
|
||||
return descriptor;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a collection of FeatureDescriptors that emit records of type featureClass
|
||||
*
|
||||
* @param featureClass
|
||||
* @return A FeatureDescriptor or null if none is found
|
||||
*/
|
||||
@Requires("featureClass != null")
|
||||
public <T extends Feature> Collection<FeatureDescriptor> getByFeature(Class<T> featureClass) {
|
||||
Set<FeatureDescriptor> consistentDescriptors = new HashSet<FeatureDescriptor>();
|
||||
|
||||
if (featureClass == null)
|
||||
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
|
||||
|
||||
for ( FeatureDescriptor descriptor : featureDescriptors ) {
|
||||
if ( featureClass.isAssignableFrom(descriptor.getFeatureClass()))
|
||||
consistentDescriptors.add(descriptor);
|
||||
}
|
||||
return consistentDescriptors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the FeatureDescriptor with getName().equals(name)
|
||||
*
|
||||
* @param name
|
||||
* @return A FeatureDescriptor or null if none is found
|
||||
*/
|
||||
@Requires("name != null")
|
||||
public FeatureDescriptor getByName(String name) {
|
||||
for ( FeatureDescriptor descriptor : featureDescriptors )
|
||||
if ( descriptor.getName().equalsIgnoreCase(name) )
|
||||
return descriptor;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the FeatureDescriptor that can read the contexts of File file, is one can be determined
|
||||
*
|
||||
* @param file
|
||||
* @return A FeatureDescriptor or null if none is found
|
||||
*/
|
||||
@Requires({"file != null", "file.isFile()", "file.canRead()"})
|
||||
public FeatureDescriptor getByFiletype(File file) {
|
||||
List<FeatureDescriptor> canParse = new ArrayList<FeatureDescriptor>();
|
||||
for ( FeatureDescriptor descriptor : featureDescriptors )
|
||||
if ( descriptor.getCodec() instanceof SelfScopingFeatureCodec ) {
|
||||
if ( ((SelfScopingFeatureCodec) descriptor.getCodec()).canDecode(file) ) {
|
||||
canParse.add(descriptor);
|
||||
}
|
||||
}
|
||||
|
||||
if ( canParse.size() == 0 )
|
||||
return null;
|
||||
else if ( canParse.size() > 1 )
|
||||
throw new ReviewedStingException("BUG: multiple feature descriptors can read file " + file + ": " + canParse);
|
||||
else
|
||||
return canParse.get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the FeatureDescriptor associated with the type described by triplet, or null if none is found
|
||||
* @param triplet
|
||||
* @return
|
||||
*/
|
||||
@Requires("triplet != null")
|
||||
public FeatureDescriptor getByTriplet(RMDTriplet triplet) {
|
||||
return getByName(triplet.getType());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return all of the FeatureDescriptors available to the GATK. Never null
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public Collection<FeatureDescriptor> getFeatureDescriptors() {
|
||||
return Collections.unmodifiableCollection(featureDescriptors);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public String userFriendlyListOfAvailableFeatures() {
|
||||
List<String> names = new ArrayList<String>();
|
||||
for ( final FeatureDescriptor descriptor : featureDescriptors )
|
||||
names.add(descriptor.getName());
|
||||
return Utils.join(",", names);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new FeatureCodec of the type described in descriptor, assigning it the
|
||||
* name (if possible) and providing it the genomeLocParser (where necessary)
|
||||
*
|
||||
* @param descriptor FeatureDescriptor of the Tribble FeatureCodec we want to create
|
||||
* @param name the name to assign this codec
|
||||
* @return the feature codec itself
|
||||
*/
|
||||
@Requires({"descriptor != null", "name != null", "genomeLocParser != null"})
|
||||
@Ensures("result != null")
|
||||
public FeatureCodec createCodec(FeatureDescriptor descriptor, String name, GenomeLocParser genomeLocParser) {
|
||||
FeatureCodec codex = pluginManager.createByType(descriptor.getCodecClass());
|
||||
if ( codex instanceof NameAwareCodec )
|
||||
((NameAwareCodec)codex).setName(name);
|
||||
if ( codex instanceof ReferenceDependentFeatureCodec )
|
||||
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
|
||||
return codex;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface QueryableTrack
|
||||
* <p/>
|
||||
* a decorator interface for tracks that are queryable
|
||||
*/
|
||||
public interface QueryableTrack {
|
||||
public CloseableIterator<GATKFeature> query(final GenomeLoc interval) throws IOException;
|
||||
public CloseableIterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException;
|
||||
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException;
|
||||
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException;
|
||||
public void close();
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -12,18 +12,17 @@
|
|||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
|
|
@ -36,12 +35,11 @@ import org.broad.tribble.util.LittleEndianOutputStream;
|
|||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -67,7 +65,7 @@ import java.util.*;
|
|||
* that gets iterators from the FeatureReader using Tribble.
|
||||
*
|
||||
*/
|
||||
public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||
public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
||||
/**
|
||||
* our log, which we use to capture anything from this class
|
||||
*/
|
||||
|
|
@ -76,8 +74,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
// a constant we use for marking sequence dictionary entries in the Tribble index property list
|
||||
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
|
||||
|
||||
private Map<String, Class> classes = null;
|
||||
|
||||
// private sequence dictionary we use to set our tracks with
|
||||
private SAMSequenceDictionary dict = null;
|
||||
|
||||
|
|
@ -91,6 +87,8 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
*/
|
||||
private ValidationExclusion.TYPE validationExclusionType;
|
||||
|
||||
FeatureManager featureManager;
|
||||
|
||||
/**
|
||||
* Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally
|
||||
* used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor,
|
||||
|
|
@ -102,29 +100,14 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
public RMDTrackBuilder(SAMSequenceDictionary dict,
|
||||
GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
super(FeatureCodec.class, "Codecs", "Codec");
|
||||
this.dict = dict;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.validationExclusionType = validationExclusionType;
|
||||
|
||||
classes = new HashMap<String, Class>();
|
||||
for (String name: this.getPluginsByName().keySet()) {
|
||||
classes.put(name.toUpperCase(), getPluginsByName().get(name));
|
||||
} }
|
||||
|
||||
/** @return a list of all available track types we currently have access to create */
|
||||
public Map<String, Class> getAvailableTrackNamesAndTypes() {
|
||||
return Collections.unmodifiableMap(classes);
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
featureManager = new FeatureManager();
|
||||
}
|
||||
|
||||
/** @return a list of all available track record types we currently have access to create */
|
||||
public Map<String, Class> getAvailableTrackNamesAndRecordTypes() {
|
||||
HashMap classToRecord = new HashMap<String, Class>();
|
||||
for (String name: this.getPluginsByName().keySet()) {
|
||||
FeatureCodec codec = this.createByName(name);
|
||||
classToRecord.put(name, codec.getFeatureType());
|
||||
}
|
||||
return classToRecord;
|
||||
public FeatureManager getFeatureManager() {
|
||||
return featureManager;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -133,45 +116,38 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
* @param fileDescriptor a description of the type of track to build.
|
||||
*
|
||||
* @return an instance of the track
|
||||
* @throws RMDTrackCreationException
|
||||
* if we don't know of the target class or we couldn't create it
|
||||
*/
|
||||
public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) throws RMDTrackCreationException {
|
||||
public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) {
|
||||
String name = fileDescriptor.getName();
|
||||
File inputFile = new File(fileDescriptor.getFile());
|
||||
|
||||
Class featureCodecClass = getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
|
||||
if (featureCodecClass == null)
|
||||
FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByTriplet(fileDescriptor);
|
||||
if (descriptor == null)
|
||||
throw new UserException.BadArgumentValue("-B",fileDescriptor.getType());
|
||||
|
||||
// return a feature reader track
|
||||
Pair<FeatureSource, SAMSequenceDictionary> pair;
|
||||
if (inputFile.getAbsolutePath().endsWith(".gz"))
|
||||
pair = createTabixIndexedFeatureSource(featureCodecClass, name, inputFile);
|
||||
pair = createTabixIndexedFeatureSource(descriptor, name, inputFile);
|
||||
else
|
||||
pair = getFeatureSource(featureCodecClass, name, inputFile, fileDescriptor.getStorageType());
|
||||
pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType());
|
||||
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
|
||||
return new RMDTrack(featureCodecClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(featureCodecClass,name));
|
||||
return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream.
|
||||
* @param targetClass Type of Tribble class to build.
|
||||
* @param codecClass Type of Tribble codec class to build.
|
||||
* @param inputFile Input file type to use.
|
||||
* @return An RMDTrack, suitable for accessing reference metadata.
|
||||
*/
|
||||
public RMDTrack createInstanceOfTrack(Class targetClass, File inputFile) {
|
||||
// TODO: Update RMDTriplet to contain an actual class object rather than a name to avoid these gymnastics.
|
||||
String typeName = null;
|
||||
for(Map.Entry<String,Class> trackType: getAvailableTrackNamesAndTypes().entrySet()) {
|
||||
if(trackType.getValue().equals(targetClass))
|
||||
typeName = trackType.getKey();
|
||||
}
|
||||
public RMDTrack createInstanceOfTrack(Class codecClass, File inputFile) {
|
||||
final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
|
||||
|
||||
if(typeName == null)
|
||||
throw new ReviewedStingException("Unable to find type name for class " + targetClass.getName());
|
||||
if (descriptor == null)
|
||||
throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName());
|
||||
|
||||
return createInstanceOfTrack(new RMDTriplet("anonymous",typeName,inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
|
||||
return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -179,16 +155,16 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
* reader of the appropriate type will figure out what the right index type is, and determine if it
|
||||
* exists.
|
||||
*
|
||||
* @param targetClass the codec class type
|
||||
* @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create
|
||||
* @param name the name of the track
|
||||
* @param inputFile the file to load
|
||||
* @return a feature reader implementation
|
||||
*/
|
||||
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(Class targetClass, String name, File inputFile) {
|
||||
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) {
|
||||
// we might not know the index type, try loading with the default reader constructor
|
||||
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
|
||||
try {
|
||||
return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null);
|
||||
return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null);
|
||||
} catch (TribbleException e) {
|
||||
throw new UserException(e.getMessage(), e);
|
||||
}
|
||||
|
|
@ -196,28 +172,26 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
|
||||
/**
|
||||
* add a name to the codec, if it takes one
|
||||
* @param targetClass the class to create a codec for
|
||||
* @param descriptor the class to create a codec for
|
||||
* @param name the name to assign this codec
|
||||
* @return the feature codec itself
|
||||
*/
|
||||
public FeatureCodec createCodec(Class targetClass, String name) {
|
||||
FeatureCodec codex = this.createByType(targetClass);
|
||||
if ( codex instanceof NameAwareCodec )
|
||||
((NameAwareCodec)codex).setName(name);
|
||||
if(codex instanceof ReferenceDependentFeatureCodec)
|
||||
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
|
||||
return codex;
|
||||
private FeatureCodec createCodec(FeatureManager.FeatureDescriptor descriptor, String name) {
|
||||
return featureManager.createCodec(descriptor, name, genomeLocParser);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a feature source object given:
|
||||
* @param targetClass the target class
|
||||
* @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create
|
||||
* @param name the name of the codec
|
||||
* @param inputFile the tribble file to parse
|
||||
* @param storageType How the RMD is streamed into the input file.
|
||||
* @return the input file as a FeatureReader
|
||||
*/
|
||||
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(Class targetClass, String name, File inputFile, RMDStorageType storageType) {
|
||||
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(FeatureManager.FeatureDescriptor descriptor,
|
||||
String name,
|
||||
File inputFile,
|
||||
RMDStorageType storageType) {
|
||||
// Feature source and sequence dictionary to use as the ultimate reference
|
||||
FeatureSource featureSource = null;
|
||||
SAMSequenceDictionary sequenceDictionary = null;
|
||||
|
|
@ -227,7 +201,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
|
||||
if(canBeIndexed) {
|
||||
try {
|
||||
Index index = loadIndex(inputFile, createCodec(targetClass, name));
|
||||
Index index = loadIndex(inputFile, createCodec(descriptor, name));
|
||||
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
|
||||
catch (ReviewedStingException e) { }
|
||||
|
||||
|
|
@ -240,7 +214,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
sequenceDictionary = getSequenceDictionaryFromProperties(index);
|
||||
}
|
||||
|
||||
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name));
|
||||
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name));
|
||||
}
|
||||
catch (TribbleException e) {
|
||||
throw new UserException(e.getMessage());
|
||||
|
|
@ -250,7 +224,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
}
|
||||
}
|
||||
else {
|
||||
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(targetClass, name),false);
|
||||
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(descriptor, name),false);
|
||||
}
|
||||
|
||||
return new Pair<FeatureSource,SAMSequenceDictionary>(featureSource,sequenceDictionary);
|
||||
|
|
@ -385,22 +359,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
return idx;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a collection of track names that match the record type.
|
||||
* @param trackRecordType the record type specified in the @RMD annotation
|
||||
* @return a collection of available track record type names that match the record type
|
||||
*/
|
||||
public Collection<String> getTrackRecordTypeNames(Class trackRecordType) {
|
||||
Set<String> names = new TreeSet<String>();
|
||||
if (trackRecordType == null)
|
||||
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
|
||||
|
||||
for (Map.Entry<String, Class> availableTrackRecordType: getAvailableTrackNamesAndRecordTypes().entrySet()) {
|
||||
if (availableTrackRecordType.getValue() != null && trackRecordType.isAssignableFrom(availableTrackRecordType.getValue()))
|
||||
names.add(availableTrackRecordType.getKey());
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
// static functions to work with the sequence dictionaries of indexes
|
||||
|
|
@ -57,6 +57,7 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation {
|
|||
|
||||
public abstract GenomeLoc getLocation();
|
||||
|
||||
// TODO: this should be a Feature
|
||||
public abstract Object getUnderlyingObject();
|
||||
|
||||
/**
|
||||
|
|
@ -98,48 +99,9 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation {
|
|||
return feature.getEnd();
|
||||
}
|
||||
|
||||
// TODO: this should be a Feature, actually
|
||||
public Object getUnderlyingObject() {
|
||||
return feature;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* wrapping a old style rod into the new GATK feature style
|
||||
*/
|
||||
public static class RODGATKFeature extends GATKFeature {
|
||||
|
||||
// our data
|
||||
private ReferenceOrderedDatum datum;
|
||||
|
||||
public RODGATKFeature(ReferenceOrderedDatum datum) {
|
||||
super(datum.getName());
|
||||
this.datum = datum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return datum.getLocation();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getUnderlyingObject() {
|
||||
return datum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getChr() {
|
||||
return datum.getLocation().getContig();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getStart() {
|
||||
return (int)datum.getLocation().getStart();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getEnd() {
|
||||
return (int)datum.getLocation().getStop();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,65 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.utils;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class GATKFeatureIterator
|
||||
*
|
||||
* Takes a RODatum iterator and makes it an iterator of GATKFeatures. Shazam!
|
||||
*/
|
||||
public class GATKFeatureIterator implements CloseableIterator<GATKFeature> {
|
||||
private final Iterator<ReferenceOrderedDatum> iter;
|
||||
public GATKFeatureIterator(Iterator<ReferenceOrderedDatum> iter) {
|
||||
this.iter = iter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iter.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public GATKFeature next() {
|
||||
return new GATKFeature.RODGATKFeature(iter.next());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Remove not supported");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// do nothing, our underlying iterator doesn't support this
|
||||
}
|
||||
}
|
||||
|
|
@ -1,21 +1,25 @@
|
|||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Container class for GATK report tables
|
||||
*/
|
||||
public class GATKReport {
|
||||
private TreeMap<String, GATKReportTable> tables;
|
||||
public static final String GATKREPORT_HEADER_PREFIX = "##:GATKReport.v";
|
||||
private TreeMap<String, GATKReportTable> tables = new TreeMap<String, GATKReportTable>();
|
||||
|
||||
/**
|
||||
* Create a new, empty GATKReport.
|
||||
*/
|
||||
public GATKReport() {
|
||||
tables = new TreeMap<String, GATKReportTable>();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -23,7 +27,7 @@ public class GATKReport {
|
|||
* @param filename the path to the file to load
|
||||
*/
|
||||
public GATKReport(String filename) {
|
||||
loadReport(new File(filename));
|
||||
this(new File(filename));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -31,7 +35,6 @@ public class GATKReport {
|
|||
* @param file the file to load
|
||||
*/
|
||||
public GATKReport(File file) {
|
||||
tables = new TreeMap<String, GATKReportTable>();
|
||||
loadReport(file);
|
||||
}
|
||||
|
||||
|
|
@ -46,11 +49,17 @@ public class GATKReport {
|
|||
GATKReportTable table = null;
|
||||
String[] header = null;
|
||||
int id = 0;
|
||||
GATKReportVersion version = null;
|
||||
List<Integer> columnStarts = null;
|
||||
|
||||
String line;
|
||||
while ( (line = reader.readLine()) != null ) {
|
||||
if (line.startsWith("##:GATKReport.v0.1 ")) {
|
||||
line = line.replaceFirst("##:GATKReport.v0.1 ", "");
|
||||
|
||||
if (line.startsWith(GATKREPORT_HEADER_PREFIX)) {
|
||||
|
||||
version = GATKReportVersion.fromHeader(line);
|
||||
|
||||
line = line.replaceFirst("##:GATKReport." + version.versionString + " ", "");
|
||||
String[] pieces = line.split(" : ");
|
||||
|
||||
String tableName = pieces[0];
|
||||
|
|
@ -58,14 +67,35 @@ public class GATKReport {
|
|||
|
||||
addTable(tableName, tableDesc);
|
||||
table = getTable(tableName);
|
||||
table.setVersion(version);
|
||||
|
||||
header = null;
|
||||
} else if ( line.isEmpty() ) {
|
||||
columnStarts = null;
|
||||
} else if ( line.trim().isEmpty() ) {
|
||||
// do nothing
|
||||
} else {
|
||||
if (table != null) {
|
||||
|
||||
String[] splitLine;
|
||||
|
||||
switch (version) {
|
||||
case V0_1:
|
||||
splitLine = TextFormattingUtils.splitWhiteSpace(line);
|
||||
break;
|
||||
|
||||
case V0_2:
|
||||
if (header == null) {
|
||||
columnStarts = TextFormattingUtils.getWordStarts(line);
|
||||
}
|
||||
splitLine = TextFormattingUtils.splitFixedWidth(line, columnStarts);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new ReviewedStingException("GATK report version parsing not implemented for: " + line);
|
||||
}
|
||||
|
||||
if (header == null) {
|
||||
header = line.split("\\s+");
|
||||
header = splitLine;
|
||||
|
||||
table.addPrimaryKey("id", false);
|
||||
|
||||
|
|
@ -75,10 +105,8 @@ public class GATKReport {
|
|||
|
||||
id = 0;
|
||||
} else {
|
||||
String[] entries = line.split("\\s+");
|
||||
|
||||
for (int columnIndex = 0; columnIndex < header.length; columnIndex++) {
|
||||
table.set(id, header[columnIndex], entries[columnIndex]);
|
||||
table.set(id, header[columnIndex], splitLine[columnIndex]);
|
||||
}
|
||||
|
||||
id++;
|
||||
|
|
@ -125,7 +153,10 @@ public class GATKReport {
|
|||
* @return the table object
|
||||
*/
|
||||
public GATKReportTable getTable(String tableName) {
|
||||
return tables.get(tableName);
|
||||
GATKReportTable table = tables.get(tableName);
|
||||
if (table == null)
|
||||
throw new ReviewedStingException("Table is not in GATKReport: " + tableName);
|
||||
return table;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -140,4 +171,8 @@ public class GATKReport {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Collection<GATKReportTable> getTables() {
|
||||
return tables.values();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,10 +37,10 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
|||
* tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero
|
||||
* values) in the table.
|
||||
*
|
||||
* @param primaryKey the primary key position in the column that should be set
|
||||
* @param primaryKey the primary key position in the column that should be retrieved
|
||||
* @return the value at the specified position in the column, or the default value if the element is not set
|
||||
*/
|
||||
public Object getWithoutSideEffects(Object primaryKey) {
|
||||
private Object getWithoutSideEffects(Object primaryKey) {
|
||||
if (!this.containsKey(primaryKey)) {
|
||||
return defaultValue;
|
||||
}
|
||||
|
|
@ -48,6 +48,16 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
|||
return this.get(primaryKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an object from the column, but if it doesn't exist, return the default value.
|
||||
*
|
||||
* @param primaryKey the primary key position in the column that should be retrieved
|
||||
* @return the string value at the specified position in the column, or the default value if the element is not set
|
||||
*/
|
||||
public String getStringValue(Object primaryKey) {
|
||||
return toString(getWithoutSideEffects(primaryKey));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the displayable property of the column. If true, the column will be displayed in the final output.
|
||||
* If not, printing will be suppressed for the contents of the table.
|
||||
|
|
@ -67,7 +77,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
|||
|
||||
for (Object obj : this.values()) {
|
||||
if (obj != null) {
|
||||
int width = obj.toString().length();
|
||||
int width = toString(obj).length();
|
||||
|
||||
if (width > maxWidth) {
|
||||
maxWidth = width;
|
||||
|
|
@ -77,4 +87,27 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
|||
|
||||
return maxWidth;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string version of the values.
|
||||
* @param obj The object to convert to a string
|
||||
* @return The string representation of the column
|
||||
*/
|
||||
private static String toString(Object obj) {
|
||||
String value;
|
||||
if (obj == null) {
|
||||
value = "null";
|
||||
} else if (obj instanceof Float) {
|
||||
value = String.format("%.8f", (Float) obj);
|
||||
} else if (obj instanceof Double) {
|
||||
value = String.format("%.8f", (Double) obj);
|
||||
} else {
|
||||
value = obj.toString();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
public String getColumnName() {
|
||||
return columnName;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
|
|
@ -11,7 +12,7 @@
|
|||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
|
|
@ -21,25 +22,34 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class RMDTrackCreationException
|
||||
*
|
||||
* if we fail for some reason to make a track, throw this exception
|
||||
* Tracks a linked list of GATKReportColumn in order by name.
|
||||
*/
|
||||
public class RMDTrackCreationException extends ReviewedStingException {
|
||||
public RMDTrackCreationException(String msg) {
|
||||
super(msg);
|
||||
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
|
||||
private List<String> columnNames = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
* Returns the column by index
|
||||
* @param i the index
|
||||
* @return The column
|
||||
*/
|
||||
public GATKReportColumn getByIndex(int i) {
|
||||
return get(columnNames.get(i));
|
||||
}
|
||||
|
||||
public RMDTrackCreationException(String message, Throwable throwable) {
|
||||
super(message, throwable);
|
||||
@Override
|
||||
public GATKReportColumn remove(Object key) {
|
||||
columnNames.remove(key);
|
||||
return super.remove(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public GATKReportColumn put(String key, GATKReportColumn value) {
|
||||
columnNames.add(key);
|
||||
return super.put(key, value);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,83 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class GATKReportParser {
|
||||
private List<GATKReportTableParser> tables = new ArrayList<GATKReportTableParser>();
|
||||
|
||||
public void parse(File file) throws IOException {
|
||||
InputStream stream = FileUtils.openInputStream(file);
|
||||
try {
|
||||
parse(stream);
|
||||
} finally {
|
||||
IOUtils.closeQuietly(stream);
|
||||
}
|
||||
}
|
||||
|
||||
public void parse(InputStream input) throws IOException {
|
||||
GATKReportTableParser table = null;
|
||||
|
||||
for (String line: new XReadLines(input)) {
|
||||
if (line.startsWith("##:GATKReport.v0.1 ")) {
|
||||
table = newTableParser(line);
|
||||
tables.add(table);
|
||||
table.parse(line);
|
||||
} else if (table != null) {
|
||||
if (line.trim().length() == 0)
|
||||
table = null;
|
||||
else
|
||||
table.parse(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getValue(String tableName, String[] key, String column) {
|
||||
for (GATKReportTableParser table: tables)
|
||||
if (table.getTableName().equals(tableName))
|
||||
return table.getValue(key, column);
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getValue(String tableName, String key, String column) {
|
||||
for (GATKReportTableParser table: tables)
|
||||
if (table.getTableName().equals(tableName))
|
||||
return table.getValue(key, column);
|
||||
return null;
|
||||
}
|
||||
|
||||
private GATKReportTableParser newTableParser(String header) {
|
||||
return new GATKReportTableParser();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.apache.commons.lang.ObjectUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -88,17 +89,20 @@ import java.util.regex.Pattern;
|
|||
* but at least the prototype contained herein works.
|
||||
*
|
||||
* @author Kiran Garimella
|
||||
* @author Khalid Shakir
|
||||
*/
|
||||
public class GATKReportTable {
|
||||
private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2;
|
||||
private String tableName;
|
||||
private String tableDescription;
|
||||
private GATKReportVersion version = LATEST_REPORT_VERSION;
|
||||
|
||||
private String primaryKeyName;
|
||||
private Collection<Object> primaryKeyColumn;
|
||||
private boolean primaryKeyDisplay;
|
||||
boolean sortByPrimaryKey = true;
|
||||
private boolean sortByPrimaryKey = true;
|
||||
|
||||
private LinkedHashMap<String, GATKReportColumn> columns;
|
||||
private GATKReportColumns columns;
|
||||
|
||||
/**
|
||||
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
|
||||
|
|
@ -113,6 +117,19 @@ public class GATKReportTable {
|
|||
return !m.find();
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
|
||||
*
|
||||
* @param description the name of the table or column
|
||||
* @return true if the name is valid, false if otherwise
|
||||
*/
|
||||
private boolean isValidDescription(String description) {
|
||||
Pattern p = Pattern.compile("\\r|\\n");
|
||||
Matcher m = p.matcher(description);
|
||||
|
||||
return !m.find();
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new GATK report table with the specified name and description
|
||||
*
|
||||
|
|
@ -128,11 +145,23 @@ public class GATKReportTable {
|
|||
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
|
||||
}
|
||||
|
||||
if (!isValidDescription(tableDescription)) {
|
||||
throw new ReviewedStingException("Attempted to set a GATKReportTable description of '" + tableDescription + "'. GATKReportTable descriptions must not contain newlines.");
|
||||
}
|
||||
|
||||
this.tableName = tableName;
|
||||
this.tableDescription = tableDescription;
|
||||
this.sortByPrimaryKey = sortByPrimaryKey;
|
||||
|
||||
columns = new LinkedHashMap<String, GATKReportColumn>();
|
||||
columns = new GATKReportColumns();
|
||||
}
|
||||
|
||||
public GATKReportVersion getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
protected void setVersion(GATKReportVersion version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -161,6 +190,57 @@ public class GATKReportTable {
|
|||
primaryKeyDisplay = display;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first primary key matching the dotted column values.
|
||||
* Ex: dbsnp.eval.called.all.novel.all
|
||||
* @param dottedColumnValues Period concatenated values.
|
||||
* @return The first primary key matching the column values or throws an exception.
|
||||
*/
|
||||
public Object getPrimaryKey(String dottedColumnValues) {
|
||||
Object key = findPrimaryKey(dottedColumnValues);
|
||||
if (key == null)
|
||||
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues);
|
||||
return key;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if there is at least on row with the dotted column values.
|
||||
* Ex: dbsnp.eval.called.all.novel.all
|
||||
* @param dottedColumnValues Period concatenated values.
|
||||
* @return true if there is at least one row matching the columns.
|
||||
*/
|
||||
public boolean containsPrimaryKey(String dottedColumnValues) {
|
||||
return findPrimaryKey(dottedColumnValues) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first primary key matching the dotted column values.
|
||||
* Ex: dbsnp.eval.called.all.novel.all
|
||||
* @param dottedColumnValues Period concatenated values.
|
||||
* @return The first primary key matching the column values or null.
|
||||
*/
|
||||
private Object findPrimaryKey(String dottedColumnValues) {
|
||||
return findPrimaryKey(dottedColumnValues.split("\\."));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first primary key matching the column values.
|
||||
* Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" }
|
||||
* @param columnValues column values.
|
||||
* @return The first primary key matching the column values.
|
||||
*/
|
||||
private Object findPrimaryKey(Object[] columnValues) {
|
||||
for (Object primaryKey : primaryKeyColumn) {
|
||||
boolean matching = true;
|
||||
for (int i = 0; matching && i < columnValues.length; i++) {
|
||||
matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1));
|
||||
}
|
||||
if (matching)
|
||||
return primaryKey;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set.
|
||||
*
|
||||
|
|
@ -230,6 +310,17 @@ public class GATKReportTable {
|
|||
return columns.get(columnName).get(primaryKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a value from the given position in the table
|
||||
*
|
||||
* @param primaryKey the primary key value
|
||||
* @param columnIndex the index of the column
|
||||
* @return the value stored at the specified position in the table
|
||||
*/
|
||||
private Object get(Object primaryKey, int columnIndex) {
|
||||
return columns.getByIndex(columnIndex).get(primaryKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Increment an element in the table. This implementation is awful - a functor would probably be better.
|
||||
*
|
||||
|
|
@ -515,7 +606,7 @@ public class GATKReportTable {
|
|||
String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s";
|
||||
|
||||
// Emit the table definition
|
||||
out.printf("##:GATKReport.v0.1 %s : %s%n", tableName, tableDescription);
|
||||
out.printf("##:GATKReport.%s %s : %s%n", LATEST_REPORT_VERSION.versionString, tableName, tableDescription);
|
||||
|
||||
// Emit the table header, taking into account the padding requirement if the primary key is a hidden column
|
||||
boolean needsPadding = false;
|
||||
|
|
@ -545,22 +636,8 @@ public class GATKReportTable {
|
|||
|
||||
for (String columnName : columns.keySet()) {
|
||||
if (columns.get(columnName).isDisplayable()) {
|
||||
Object obj = columns.get(columnName).getWithoutSideEffects(primaryKey);
|
||||
|
||||
if (needsPadding) { out.printf(" "); }
|
||||
|
||||
String value = "null";
|
||||
if (obj != null) {
|
||||
if (obj instanceof Float) {
|
||||
value = String.format("%.8f", (Float) obj);
|
||||
} else if (obj instanceof Double) {
|
||||
value = String.format("%.8f", (Double) obj);
|
||||
} else {
|
||||
value = obj.toString();
|
||||
}
|
||||
}
|
||||
|
||||
//out.printf(columnWidths.get(columnName), obj == null ? "null" : obj.toString());
|
||||
String value = columns.get(columnName).getStringValue(primaryKey);
|
||||
out.printf(columnWidths.get(columnName), value);
|
||||
|
||||
needsPadding = true;
|
||||
|
|
@ -577,4 +654,16 @@ public class GATKReportTable {
|
|||
public int getNumRows() {
|
||||
return primaryKeyColumn.size();
|
||||
}
|
||||
|
||||
public String getTableName() {
|
||||
return tableName;
|
||||
}
|
||||
|
||||
public String getTableDescription() {
|
||||
return tableDescription;
|
||||
}
|
||||
|
||||
public GATKReportColumns getColumns() {
|
||||
return columns;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,75 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class GATKReportTableParser {
|
||||
private int lineNum = 0;
|
||||
private String[] descriptions;
|
||||
private Map<String, Integer> headers = new HashMap<String, Integer>();
|
||||
private List<String[]> values = new ArrayList<String[]>();
|
||||
|
||||
public void parse(String line) {
|
||||
lineNum++;
|
||||
switch (lineNum) {
|
||||
case 1:
|
||||
descriptions = parseLine(line);
|
||||
case 2:
|
||||
String[] columnHeaders = parseLine(line);
|
||||
for (int i = 0; i < columnHeaders.length; i++)
|
||||
headers.put(columnHeaders[i], i);
|
||||
default:
|
||||
values.add(parseLine(line));
|
||||
}
|
||||
}
|
||||
|
||||
public String getTableName() {
|
||||
return descriptions[1];
|
||||
}
|
||||
|
||||
public String getValue(String[] key, String column) {
|
||||
if (!headers.containsKey(column))
|
||||
return null;
|
||||
for (String[] row: values)
|
||||
if (Arrays.equals(key, Arrays.copyOfRange(row, 1, key.length + 1)))
|
||||
return row[headers.get(column)];
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getValue(String key, String column) {
|
||||
return getValue(key.split("\\."), column);
|
||||
}
|
||||
|
||||
private String generateKey(String[] row, int i) {
|
||||
return StringUtils.join(row, ".", 0, i);
|
||||
}
|
||||
|
||||
private String[] parseLine(String line) {
|
||||
return line.split(" +");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
public enum GATKReportVersion {
|
||||
/**
|
||||
* Differences between other versions:
|
||||
* - Does not allow spaces in cells.
|
||||
* - Mostly fixed width but has a bug where the string width of floating point
|
||||
* values was not measured correctly leading to columns that aren't aligned
|
||||
*/
|
||||
V0_1("v0.1"),
|
||||
|
||||
/**
|
||||
* Differences between other versions:
|
||||
* - Spaces allowed in cells, for example in sample names with spaces in them ex: "C507/FG-CR 6".
|
||||
* - Fixed width fixed for floating point values
|
||||
*/
|
||||
V0_2("v0.2");
|
||||
|
||||
public final String versionString;
|
||||
|
||||
private GATKReportVersion(String versionString) {
|
||||
this.versionString = versionString;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return versionString;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the GATK Report Version from the file header.
|
||||
* @param header Header from the file starting with ##:GATKReport.v[version]
|
||||
* @return The version as an enum.
|
||||
*/
|
||||
public static GATKReportVersion fromHeader(String header) {
|
||||
if (header.startsWith("##:GATKReport.v0.1 "))
|
||||
return GATKReportVersion.V0_1;
|
||||
|
||||
if (header.startsWith("##:GATKReport.v0.2 "))
|
||||
return GATKReportVersion.V0_2;
|
||||
|
||||
throw new ReviewedStingException("Unknown GATK report version in header: " + header);
|
||||
}
|
||||
}
|
||||
|
|
@ -65,13 +65,13 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
|
|||
referenceView.expandBoundsToAccomodateLoc(location);
|
||||
}
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
final boolean keepMeP = walker.filter(tracker, refContext, locus);
|
||||
if (keepMeP) {
|
||||
M x = walker.map(tracker, refContext, locus);
|
||||
|
|
|
|||
|
|
@ -23,5 +23,4 @@ import java.lang.annotation.*;
|
|||
@Target(ElementType.TYPE)
|
||||
public @interface Allows {
|
||||
DataSource[] value();
|
||||
RMD[] referenceMetaData() default {};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,15 +25,18 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
|
|
@ -41,6 +44,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
|
@ -68,6 +72,9 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
@Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
|
||||
public boolean SHOW_INDEL_PILEUPS = false;
|
||||
|
||||
@Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false)
|
||||
public List<RodBinding<Feature>> rods = Collections.emptyList();
|
||||
|
||||
public void initialize() {
|
||||
}
|
||||
|
||||
|
|
@ -112,18 +119,11 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
*/
|
||||
private String getReferenceOrderedData( RefMetaDataTracker tracker ) {
|
||||
ArrayList<String> rodStrings = new ArrayList<String>();
|
||||
for ( GATKFeature datum : tracker.getAllRods() ) {
|
||||
if ( datum != null && datum.getUnderlyingObject() instanceof ReferenceOrderedDatum ) {
|
||||
rodStrings.add(((ReferenceOrderedDatum)datum.getUnderlyingObject()).toSimpleString()); // TODO: Aaron: this line still survives, try to remove it
|
||||
}
|
||||
for ( Feature datum : tracker.getValues(rods) ) {
|
||||
rodStrings.add(datum.toString());
|
||||
}
|
||||
String rodString = Utils.join(", ", rodStrings);
|
||||
|
||||
DbSNPFeature dbsnp = tracker.lookup(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, DbSNPFeature.class);
|
||||
|
||||
if ( dbsnp != null)
|
||||
rodString += DbSNPHelper.toMediumString(dbsnp);
|
||||
|
||||
if ( !rodString.equals("") )
|
||||
rodString = "[ROD: " + rodString + "]";
|
||||
|
||||
|
|
@ -132,8 +132,6 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
// Double check traversal result to make count is the same.
|
||||
// TODO: Is this check necessary?
|
||||
out.println("[REDUCE RESULT] Traversal result is: " + result);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -61,11 +62,8 @@ public class PrintRODsWalker extends RodWalker<Integer, Integer> {
|
|||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
Iterator<GATKFeature> rods = tracker.getAllRods().iterator();
|
||||
while ( rods.hasNext() ) {
|
||||
Object rod = rods.next().getUnderlyingObject();
|
||||
if (VariantContextAdaptors.canBeConvertedToVariantContext(rod) )
|
||||
out.println(rod.toString());
|
||||
for ( Feature feature : tracker.getValues(Feature.class) ) {
|
||||
out.println(feature.toString());
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -42,9 +44,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class AlleleBalance implements InfoFieldAnnotation {
|
||||
public class AlleleBalance extends InfoFieldAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -15,9 +17,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
import java.util.*;
|
||||
|
||||
|
||||
public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAnnotation {
|
||||
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
Double ratio = annotateSNP(stratifiedContext, vc, g);
|
||||
if (ratio == null)
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import java.util.Map;
|
|||
|
||||
|
||||
|
||||
public abstract class AnnotationByDepth implements InfoFieldAnnotation {
|
||||
public abstract class AnnotationByDepth extends InfoFieldAnnotation {
|
||||
|
||||
|
||||
protected int annotationByVariantDepth(final Map<String, Genotype> genotypes, Map<String, AlignmentContext> stratifiedContexts) {
|
||||
|
|
|
|||
|
|
@ -31,6 +31,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -46,9 +48,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class BaseCounts implements InfoFieldAnnotation {
|
||||
public class BaseCounts extends InfoFieldAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -43,14 +45,14 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( ! vc.hasGenotypes() )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -16,9 +18,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -22,13 +24,13 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnotation {
|
||||
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
||||
|
||||
private static String REF_ALLELE = "REF";
|
||||
|
||||
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
if ( g == null || !g.isCalled() )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import cern.jet.math.Arithmetic;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -42,11 +44,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
import java.util.*;
|
||||
|
||||
|
||||
public class FisherStrand implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
private static final String FS = "FS";
|
||||
private static final double MIN_PVALUE = 1E-320;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( ! vc.isVariant() || vc.isFiltered() )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -16,9 +18,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class GCContent implements InfoFieldAnnotation, ExperimentalAnnotation {
|
||||
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
double content = computeGCContent(ref);
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", content));
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -23,11 +25,11 @@ import java.util.Map;
|
|||
*/
|
||||
|
||||
// A set of annotations calculated directly from the GLs
|
||||
public class GLstats implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class GLstats extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
private static final int MIN_SAMPLES = 10;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -48,13 +50,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
private final static boolean DEBUG = false;
|
||||
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
||||
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
|
||||
private final static char REGEXP_WILDCARD = '.';
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if (stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -18,13 +20,13 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotation {
|
||||
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
|
||||
|
||||
private static final int MIN_SAMPLES = 10;
|
||||
private static final int MIN_GENOTYPE_QUALITY = 10;
|
||||
private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -16,11 +18,11 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
private boolean ANNOTATE_INDELS = true;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
||||
if ( !vc.isBiallelic() )
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -19,9 +21,9 @@ import java.util.*;
|
|||
* Time: 11:47:33 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class IndelType implements InfoFieldAnnotation, ExperimentalAnnotation {
|
||||
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
||||
int run;
|
||||
if (vc.isMixed()) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -16,9 +18,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class LowMQ implements InfoFieldAnnotation {
|
||||
public class LowMQ extends InfoFieldAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -18,9 +20,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class MappingQualityZero implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -49,9 +51,9 @@ import java.util.Map;
|
|||
* Time: 6:46:25 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MappingQualityZeroBySample implements GenotypeAnnotation {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref,
|
||||
AlignmentContext context, VariantContext vc, Genotype g) {
|
||||
public class MappingQualityZeroBySample extends GenotypeAnnotation {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings,
|
||||
ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) {
|
||||
if ( g == null || !g.isCalled() )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -18,9 +20,9 @@ import java.util.Map;
|
|||
|
||||
|
||||
|
||||
public class MappingQualityZeroFraction implements InfoFieldAnnotation, ExperimentalAnnotation {
|
||||
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -21,8 +23,8 @@ import java.util.Map;
|
|||
* Date: 5/16/11
|
||||
*/
|
||||
|
||||
public class NBaseCount implements InfoFieldAnnotation {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public class NBaseCount extends InfoFieldAnnotation {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -16,9 +18,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class QualByDepth extends AnnotationByDepth implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -20,9 +22,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -21,11 +23,11 @@ import java.util.Map;
|
|||
|
||||
|
||||
|
||||
public abstract class RankSumTest implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
|
||||
static final boolean DEBUG = false;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -52,13 +54,13 @@ import java.util.Map;
|
|||
* Time: 3:59:27 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadDepthAndAllelicFractionBySample implements GenotypeAnnotation {
|
||||
public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
|
||||
|
||||
private static String REF_ALLELE = "REF";
|
||||
|
||||
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref,
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref,
|
||||
AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
if ( g == null || !g.isCalled() )
|
||||
return null;
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue