diff --git a/build.xml b/build.xml
index fc495f7cc..4acb2086a 100644
--- a/build.xml
+++ b/build.xml
@@ -468,6 +468,10 @@
+
+
+
+
@@ -520,6 +524,8 @@
+
+
@@ -1074,7 +1080,7 @@
-
+
@@ -1087,7 +1093,7 @@
-
+
diff --git a/public/R/src/gsalib/DESCRIPTION b/public/R/src/gsalib/DESCRIPTION
new file mode 100644
index 000000000..6116e8c66
--- /dev/null
+++ b/public/R/src/gsalib/DESCRIPTION
@@ -0,0 +1,10 @@
+Package: gsalib
+Type: Package
+Title: Utility functions
+Version: 1.0
+Date: 2010-10-02
+Author: Kiran Garimella
+Maintainer: Kiran Garimella
+Description: Utility functions for GATK NGS analyses
+License: BSD
+LazyLoad: yes
diff --git a/public/R/src/gsalib/R/gsa.error.R b/public/R/src/gsalib/R/gsa.error.R
new file mode 100644
index 000000000..1c6a56046
--- /dev/null
+++ b/public/R/src/gsalib/R/gsa.error.R
@@ -0,0 +1,12 @@
+gsa.error <- function(message) {
+ message("");
+ gsa.message("Error: **********");
+ gsa.message(sprintf("Error: %s", message));
+ gsa.message("Error: **********");
+ message("");
+
+ traceback();
+
+ message("");
+ stop(message, call. = FALSE);
+}
diff --git a/public/R/src/gsalib/R/gsa.getargs.R b/public/R/src/gsalib/R/gsa.getargs.R
new file mode 100644
index 000000000..94613bf93
--- /dev/null
+++ b/public/R/src/gsalib/R/gsa.getargs.R
@@ -0,0 +1,116 @@
+.gsa.getargs.usage <- function(argspec, doc) {
+ cargs = commandArgs();
+
+ usage = "Usage:";
+
+ fileIndex = grep("--file=", cargs);
+ if (length(fileIndex) > 0) {
+ progname = gsub("--file=", "", cargs[fileIndex[1]]);
+
+ usage = sprintf("Usage: Rscript %s [arguments]", progname);
+
+ if (!is.na(doc)) {
+ message(sprintf("%s: %s\n", progname, doc));
+ }
+ }
+
+ message(usage);
+
+ for (argname in names(argspec)) {
+ key = argname;
+ defaultValue = 0;
+ doc = "";
+
+ if (is.list(argspec[[argname]])) {
+ defaultValue = argspec[[argname]]$value;
+ doc = argspec[[argname]]$doc;
+ }
+
+ message(sprintf(" -%-10s\t[default: %s]\t%s", key, defaultValue, doc));
+ }
+
+ message("");
+
+ stop(call. = FALSE);
+}
+
+gsa.getargs <- function(argspec, doc = NA) {
+ argsenv = new.env();
+
+ for (argname in names(argspec)) {
+ value = 0;
+ if (is.list(argspec[[argname]])) {
+ value = argspec[[argname]]$value;
+ } else {
+ value = argspec[[argname]];
+ }
+
+ assign(argname, value, envir=argsenv);
+ }
+
+ if (interactive()) {
+ for (argname in names(argspec)) {
+ value = get(argname, envir=argsenv);
+
+ if (is.na(value) | is.null(value)) {
+ if (exists("cmdargs")) {
+ assign(argname, cmdargs[[argname]], envir=argsenv);
+ } else {
+ assign(argname, readline(sprintf("Please enter a value for '%s': ", argname)), envir=argsenv);
+ }
+ } else {
+ assign(argname, value, envir=argsenv);
+ }
+ }
+ } else {
+ cargs = commandArgs(TRUE);
+
+ if (length(cargs) == 0) {
+ .gsa.getargs.usage(argspec, doc);
+ }
+
+ for (i in 1:length(cargs)) {
+ if (length(grep("^-", cargs[i], ignore.case=TRUE)) > 0) {
+ key = gsub("-", "", cargs[i]);
+ value = cargs[i+1];
+
+ if (key == "h" | key == "help") {
+ .gsa.getargs.usage(argspec, doc);
+ }
+
+ if (length(grep("^[\\d\\.e\\+\\-]+$", value, perl=TRUE, ignore.case=TRUE)) > 0) {
+ value = as.numeric(value);
+ }
+
+ assign(key, value, envir=argsenv);
+ }
+ }
+ }
+
+ args = as.list(argsenv);
+
+ isMissingArgs = 0;
+ missingArgs = c();
+
+ for (arg in names(argspec)) {
+ if (is.na(args[[arg]]) | is.null(args[[arg]])) {
+ gsa.warn(sprintf("Value for required argument '-%s' was not specified", arg));
+
+ isMissingArgs = 1;
+ missingArgs = c(missingArgs, arg);
+ }
+ }
+
+ if (isMissingArgs) {
+ gsa.error(
+ paste(
+ "Missing required arguments: -",
+ paste(missingArgs, collapse=" -"),
+ ". Specify -h or -help to this script for a list of available arguments.",
+ sep=""
+ )
+ );
+ }
+
+ args;
+}
diff --git a/public/R/src/gsalib/R/gsa.message.R b/public/R/src/gsalib/R/gsa.message.R
new file mode 100644
index 000000000..a2b909d3d
--- /dev/null
+++ b/public/R/src/gsalib/R/gsa.message.R
@@ -0,0 +1,3 @@
+gsa.message <- function(message) {
+ message(sprintf("[gsalib] %s", message));
+}
diff --git a/public/R/src/gsalib/R/gsa.plot.venn.R b/public/R/src/gsalib/R/gsa.plot.venn.R
new file mode 100644
index 000000000..b1353ccc1
--- /dev/null
+++ b/public/R/src/gsalib/R/gsa.plot.venn.R
@@ -0,0 +1,50 @@
+gsa.plot.venn <-
+function(a, b, c=0, a_and_b, a_and_c=0, b_and_c=0,
+ col=c("#FF6342", "#63C6DE", "#ADDE63"),
+ pos=c(0.20, 0.20, 0.80, 0.82),
+ debug=0
+ ) {
+ library(png);
+ library(graphics);
+
+ # Set up properties
+ for (i in 1:length(col)) {
+ rgbcol = col2rgb(col[i]);
+ col[i] = sprintf("%02X%02X%02X", rgbcol[1], rgbcol[2], rgbcol[3]);
+ }
+
+ chco = paste(col[1], col[2], col[3], sep=",");
+ chd = paste(a, b, c, a_and_b, a_and_c, b_and_c, sep=",");
+
+ props = c(
+ 'cht=v',
+ 'chs=525x525',
+ 'chds=0,10000000000',
+ paste('chco=', chco, sep=""),
+ paste('chd=t:', chd, sep="")
+ );
+ proplist = paste(props[1], props[2], props[3], props[4], props[5], sep='&');
+
+ # Get the venn diagram (as a temporary file)
+ filename = tempfile("venn");
+ cmd = paste("wget -O ", filename, " 'http://chart.apis.google.com/chart?", proplist, "' > /dev/null 2>&1", sep="");
+
+ if (debug == 1) {
+ print(cmd);
+ }
+ system(cmd);
+
+ # Render the temp png file into a plotting frame
+ a = readPNG(filename);
+
+ plot(0, 0, type="n", xaxt="n", yaxt="n", bty="n", xlim=c(0, 1), ylim=c(0, 1), xlab="", ylab="");
+ if (c == 0 || a >= b) {
+ rasterImage(a, pos[1], pos[2], pos[3], pos[4]);
+ } else {
+ rasterImage(a, 0.37+pos[1], 0.37+pos[2], 0.37+pos[3], 0.37+pos[4], angle=180);
+ }
+
+ # Clean up!
+ unlink(filename);
+}
+
diff --git a/public/R/src/gsalib/R/gsa.read.eval.R b/public/R/src/gsalib/R/gsa.read.eval.R
new file mode 100644
index 000000000..f1d49092b
--- /dev/null
+++ b/public/R/src/gsalib/R/gsa.read.eval.R
@@ -0,0 +1,83 @@
+.gsa.attemptToLoadFile <- function(filename) {
+ file = NA;
+
+ if (file.exists(filename) & file.info(filename)$size > 500) {
+ file = read.csv(filename, header=TRUE, comment.char="#");
+ }
+
+ file;
+}
+
+gsa.read.eval <-
+function(evalRoot) {
+ fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep="");
+ fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep="");
+ fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep="");
+ fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep="");
+ fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep="");
+ fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep="");
+ fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep="");
+ fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep="");
+ fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep="");
+ fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep="");
+ fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep="");
+ fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep="");
+ fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep="");
+ fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep="");
+
+ eval = list(
+ AlleleCountStats = NA,
+ CompOverlap = NA,
+ CountVariants = NA,
+ GenotypeConcordance = NA,
+ MetricsByAc = NA,
+ MetricsBySample = NA,
+ Quality_Metrics_by_allele_count = NA,
+ QualityScoreHistogram = NA,
+ SampleStatistics = NA,
+ SampleSummaryStatistics = NA,
+ SimpleMetricsBySample = NA,
+ TiTv = NA,
+ TiTvStats = NA,
+ Variant_Quality_Score = NA,
+
+ CallsetNames = c(),
+ CallsetOnlyNames = c(),
+ CallsetFilteredNames = c()
+ );
+
+ eval$AlleleCountStats = .gsa.attemptToLoadFile(fileAlleleCountStats);
+ eval$CompOverlap = .gsa.attemptToLoadFile(fileCompOverlap);
+ eval$CountVariants = .gsa.attemptToLoadFile(fileCountVariants);
+ eval$GenotypeConcordance = .gsa.attemptToLoadFile(fileGenotypeConcordance);
+ eval$MetricsByAc = .gsa.attemptToLoadFile(fileMetricsByAc);
+ eval$MetricsBySample = .gsa.attemptToLoadFile(fileMetricsBySample);
+ eval$Quality_Metrics_by_allele_count = .gsa.attemptToLoadFile(fileQuality_Metrics_by_allele_count);
+ eval$QualityScoreHistogram = .gsa.attemptToLoadFile(fileQualityScoreHistogram);
+ eval$SampleStatistics = .gsa.attemptToLoadFile(fileSampleStatistics);
+ eval$SampleSummaryStatistics = .gsa.attemptToLoadFile(fileSampleSummaryStatistics);
+ eval$SimpleMetricsBySample = .gsa.attemptToLoadFile(fileSimpleMetricsBySample);
+ eval$TiTv = .gsa.attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator);
+ eval$TiTvStats = .gsa.attemptToLoadFile(fileTiTvStats);
+ eval$Variant_Quality_Score = .gsa.attemptToLoadFile(fileVariant_Quality_Score);
+
+ uniqueJexlExpressions = unique(eval$TiTv$jexl_expression);
+ eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]);
+ eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames));
+ eval$CallsetFilteredNames = as.vector(c(
+ paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), sep=""),
+ paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), sep=""))
+ );
+
+ if (!(eval$CallsetFilteredNames[1] %in% unique(eval$TiTv$jexl_expression))) {
+ eval$CallsetFilteredNames[1] = paste("In", eval$CallsetNames[1], "-FilteredIn", eval$CallsetNames[2], sep="");
+ }
+
+ if (!(eval$CallsetFilteredNames[2] %in% unique(eval$TiTv$jexl_expression))) {
+ eval$CallsetFilteredNames[2] = paste("In", eval$CallsetNames[2], "-FilteredIn", eval$CallsetNames[1], sep="");
+ #eval$CallsetFilteredNames[2] = paste(gsub("^(\\w)", "In", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In", eval$CallsetNames[1], perl=TRUE), sep="");
+ }
+
+ eval;
+}
+
diff --git a/public/R/src/gsalib/R/gsa.read.gatkreport.R b/public/R/src/gsalib/R/gsa.read.gatkreport.R
new file mode 100644
index 000000000..011b5240d
--- /dev/null
+++ b/public/R/src/gsalib/R/gsa.read.gatkreport.R
@@ -0,0 +1,103 @@
+# Load a table into the specified environment. Make sure that each new table gets a unique name (this allows one to cat a bunch of tables with the same name together and load them into R without each table overwriting the last.
+.gsa.assignGATKTableToEnvironment <- function(tableName, tableHeader, tableRows, tableEnv) {
+ d = data.frame(tableRows, row.names=NULL, stringsAsFactors=FALSE);
+ colnames(d) = tableHeader;
+
+ for (i in 1:ncol(d)) {
+ v = suppressWarnings(as.numeric(d[,i]));
+
+ if (length(na.omit(as.numeric(v))) == length(d[,i])) {
+ d[,i] = v;
+ }
+ }
+
+ usedNames = ls(envir=tableEnv, pattern=tableName);
+
+ if (length(usedNames) > 0) {
+ tableName = paste(tableName, ".", length(usedNames), sep="");
+ }
+
+ assign(tableName, d, envir=tableEnv);
+}
+
+# Read a fixed width line of text into a list.
+.gsa.splitFixedWidth <- function(line, columnStarts) {
+ splitStartStop <- function(x) {
+ x = substring(x, starts, stops);
+ x = gsub("^[[:space:]]+|[[:space:]]+$", "", x);
+ x;
+ }
+
+ starts = c(1, columnStarts);
+ stops = c(columnStarts - 1, nchar(line));
+
+ sapply(line, splitStartStop)[,1];
+}
+
+# Load all GATKReport tables from a file
+gsa.read.gatkreport <- function(filename) {
+ con = file(filename, "r", blocking = TRUE);
+ lines = readLines(con);
+ close(con);
+
+ tableEnv = new.env();
+
+ tableName = NA;
+ tableHeader = c();
+ tableRows = c();
+ version = NA;
+
+ for (line in lines) {
+ if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) {
+ headerFields = unlist(strsplit(line, "[[:space:]]+"));
+
+ if (!is.na(tableName)) {
+ .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
+ }
+
+ tableName = headerFields[2];
+ tableHeader = c();
+ tableRows = c();
+
+ # For differences in versions see
+ # $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
+ if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
+ version = "v0.1";
+
+ } else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) {
+ version = "v0.2";
+ columnStarts = c();
+
+ }
+
+ } else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
+ # do nothing
+ } else if (!is.na(tableName)) {
+
+ if (version == "v0.1") {
+ row = unlist(strsplit(line, "[[:space:]]+"));
+
+ } else if (version == "v0.2") {
+ if (length(tableHeader) == 0) {
+ headerChars = unlist(strsplit(line, ""));
+ # Find the first position of non space characters, excluding the first character
+ columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
+ }
+
+ row = .gsa.splitFixedWidth(line, columnStarts);
+ }
+
+ if (length(tableHeader) == 0) {
+ tableHeader = row;
+ } else {
+ tableRows = rbind(tableRows, row);
+ }
+ }
+ }
+
+ if (!is.na(tableName)) {
+ .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
+ }
+
+ gatkreport = as.list(tableEnv);
+}
diff --git a/public/R/src/gsalib/R/gsa.read.squidmetrics.R b/public/R/src/gsalib/R/gsa.read.squidmetrics.R
new file mode 100644
index 000000000..39fa1ad32
--- /dev/null
+++ b/public/R/src/gsalib/R/gsa.read.squidmetrics.R
@@ -0,0 +1,28 @@
+gsa.read.squidmetrics = function(project, bylane = FALSE) {
+ suppressMessages(library(ROracle));
+
+ drv = dbDriver("Oracle");
+ con = dbConnect(drv, "REPORTING/REPORTING@ora01:1521/SEQPROD");
+
+ if (bylane) {
+ statement = paste("SELECT * FROM ILLUMINA_PICARD_METRICS WHERE \"Project\" = '", project, "'", sep="");
+ print(statement);
+
+ rs = dbSendQuery(con, statement = statement);
+ d = fetch(rs, n=-1);
+ dbHasCompleted(rs);
+ dbClearResult(rs);
+ } else {
+ statement = paste("SELECT * FROM ILLUMINA_SAMPLE_STATUS_AGG WHERE \"Project\" = '", project, "'", sep="");
+ print(statement);
+
+ rs = dbSendQuery(con, statement = statement);
+ d = fetch(rs, n=-1);
+ dbHasCompleted(rs);
+ dbClearResult(rs);
+ }
+
+ oraCloseDriver(drv);
+
+ subset(d, Project == project);
+}
diff --git a/public/R/src/gsalib/R/gsa.read.vcf.R b/public/R/src/gsalib/R/gsa.read.vcf.R
new file mode 100644
index 000000000..5beb6455d
--- /dev/null
+++ b/public/R/src/gsalib/R/gsa.read.vcf.R
@@ -0,0 +1,23 @@
+gsa.read.vcf <- function(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE) {
+ headers = readLines(vcffile, n=100);
+ headerline = headers[grep("#CHROM", headers)];
+ header = unlist(strsplit(gsub("#", "", headerline), "\t"))
+
+ d = read.table(vcffile, header=FALSE, skip=skip, nrows=nrows, stringsAsFactors=FALSE);
+ colnames(d) = header;
+
+ if (expandGenotypeFields) {
+ columns = ncol(d);
+
+ offset = columns + 1;
+ for (sampleIndex in 10:columns) {
+ gt = unlist(lapply(strsplit(d[,sampleIndex], ":"), function(x) x[1]));
+ d[,offset] = gt;
+ colnames(d)[offset] = sprintf("%s.GT", colnames(d)[sampleIndex]);
+
+ offset = offset + 1;
+ }
+ }
+
+ return(d);
+}
diff --git a/public/R/src/gsalib/R/gsa.warn.R b/public/R/src/gsalib/R/gsa.warn.R
new file mode 100644
index 000000000..7ee08ce65
--- /dev/null
+++ b/public/R/src/gsalib/R/gsa.warn.R
@@ -0,0 +1,3 @@
+gsa.warn <- function(message) {
+ gsa.message(sprintf("Warning: %s", message));
+}
diff --git a/public/R/src/gsalib/Read-and-delete-me b/public/R/src/gsalib/Read-and-delete-me
new file mode 100644
index 000000000..d04323a6e
--- /dev/null
+++ b/public/R/src/gsalib/Read-and-delete-me
@@ -0,0 +1,9 @@
+* Edit the help file skeletons in 'man', possibly combining help files
+ for multiple functions.
+* Put any C/C++/Fortran code in 'src'.
+* If you have compiled code, add a .First.lib() function in 'R' to load
+ the shared library.
+* Run R CMD build to build the package tarball.
+* Run R CMD check to check the package tarball.
+
+Read "Writing R Extensions" for more information.
diff --git a/public/R/src/gsalib/data/tearsheetdrop.jpg b/public/R/src/gsalib/data/tearsheetdrop.jpg
new file mode 100755
index 000000000..c9d480fa0
Binary files /dev/null and b/public/R/src/gsalib/data/tearsheetdrop.jpg differ
diff --git a/public/R/src/gsalib/man/gsa.error.Rd b/public/R/src/gsalib/man/gsa.error.Rd
new file mode 100644
index 000000000..df7c0cbde
--- /dev/null
+++ b/public/R/src/gsalib/man/gsa.error.Rd
@@ -0,0 +1,49 @@
+\name{gsa.error}
+\alias{gsa.error}
+\title{
+GSA error
+}
+\description{
+Write an error message to standard out with the prefix '[gsalib] Error:', print a traceback, and exit.
+}
+\usage{
+gsa.error(message)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{message}{
+The error message to write.
+}
+}
+\details{
+%% ~~ If necessary, more details than the description above ~~
+}
+\value{
+%% ~Describe the value returned
+%% If it is a LIST, use
+%% \item{comp1 }{Description of 'comp1'}
+%% \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Kiran Garimella
+}
+\note{
+%% ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+gsa.error("This is a message");
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/public/R/src/gsalib/man/gsa.getargs.Rd b/public/R/src/gsalib/man/gsa.getargs.Rd
new file mode 100644
index 000000000..27aa1b05a
--- /dev/null
+++ b/public/R/src/gsalib/man/gsa.getargs.Rd
@@ -0,0 +1,57 @@
+\name{gsa.getargs}
+\alias{gsa.getargs}
+\title{
+Get script arguments
+}
+\description{
+Get script arguments given a list object specifying arguments and documentation. Can be used in command-line or interactive mode. This is helpful when developing scripts in interactive mode that will eventually become command-line programs. If no arguments are specified or help is requested in command-line mode, the script will print out a usage statement with available arguments and exit.
+}
+\usage{
+gsa.getargs(argspec, doc = NA)
+}
+\arguments{
+ \item{argspec}{
+A list object. Each key is an argument name. The value is another list object with a 'value' and 'doc' keys. For example:
+\preformatted{argspec = list(
+ arg1 = list(value=10, doc="Info for optional arg1"),
+ arg2 = list(value=NA, doc="Info for required arg2")
+);
+}
+
+If the value provided is NA, the argument is considered required and must be specified when the script is invoked. For command-line mode, this means the argument must be specified on the command-line. In interactive mode, there are two ways of specifying these arguments. First, if a properly formatted list argument called 'cmdargs' is present in the current environment (i.e. the object returned by gsa.getargs() from a previous invocation), the value is taken from this object. Otherwise, the argument is prompted for.
+}
+
+ \item{doc}{
+An optional string succinctly documenting the purpose of the script.
+}
+}
+\details{
+Interactive scripts typically make use of hardcoded filepaths and parameter settings. This makes testing easy, but generalization to non-interactive mode more difficult. This utility provides a mechanism for writing scripts that work properly in both interactive and command-line modes.
+
+To use this method, specify a list with key-value pairs representing the arguments as specified above. In command-line mode, if no arguments are specified or the user specifies '-h' or '-help' anywhere on the command string, a help message indicating available arguments, their default values, and some documentation about the argument are provided.
+}
+\value{
+Returns a list with keys matching the argspec and values representing the specified arguments.
+
+\item{arg1 }{Value for argument 1}
+\item{arg2 }{Value for argument 2}
+...etc.
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Kiran Garimella
+}
+\examples{
+argspec = list(
+ file = list(value="/my/test.vcf", doc="VCF file"),
+ verbose = list(value=0, doc="If 1, set verbose mode"),
+ test2 = list(value=2.3e9, doc="Another argument that does stuff")
+);
+
+cmdargs = gsa.getargs(argspec, doc="My test program");
+
+print(cmdargs$file); # will print '[1] "/my/test.vcf"'
+}
+\keyword{ ~kwd1 }
diff --git a/public/R/src/gsalib/man/gsa.message.Rd b/public/R/src/gsalib/man/gsa.message.Rd
new file mode 100644
index 000000000..9752de8a9
--- /dev/null
+++ b/public/R/src/gsalib/man/gsa.message.Rd
@@ -0,0 +1,44 @@
+\name{gsa.message}
+\alias{gsa.message}
+\title{
+GSA message
+}
+\description{
+Write a message to standard out with the prefix '[gsalib]'.
+}
+\usage{
+gsa.message(message)
+}
+\arguments{
+ \item{message}{
+The message to write.
+}
+}
+\details{
+%% ~~ If necessary, more details than the description above ~~
+}
+\value{
+%% ~Describe the value returned
+%% If it is a LIST, use
+%% \item{comp1 }{Description of 'comp1'}
+%% \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Kiran Garimella
+}
+\note{
+%% ~~further notes~~
+}
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+## Write message to stdout
+gsa.message("This is a message");
+}
+\keyword{ ~kwd1 }
diff --git a/public/R/src/gsalib/man/gsa.plot.venn.Rd b/public/R/src/gsalib/man/gsa.plot.venn.Rd
new file mode 100644
index 000000000..bf4feb5bc
--- /dev/null
+++ b/public/R/src/gsalib/man/gsa.plot.venn.Rd
@@ -0,0 +1,75 @@
+\name{gsa.plot.venn}
+\alias{gsa.plot.venn}
+\title{
+Plot a proportional venn diagram
+}
+\description{
+Plot a proportional venn diagram (two or three-way venns allowed)
+}
+\usage{
+gsa.plot.venn(a, b, c = 0, a_and_b, a_and_c = 0, b_and_c = 0, col = c("#FF6342", "#63C6DE", "#ADDE63"), pos = c(0.2, 0.2, 0.8, 0.82), debug = 0)
+}
+\arguments{
+ \item{a}{
+size of 'a' circle
+}
+ \item{b}{
+size of 'b' circle
+}
+ \item{c}{
+size of 'c' circle
+}
+ \item{a_and_b}{
+size of a and b overlap
+}
+ \item{a_and_c}{
+size of a and c overlap
+}
+ \item{b_and_c}{
+size of b and c overlap
+}
+ \item{col}{
+vector of colors for each venn piece
+}
+ \item{pos}{
+vector of positional elements
+}
+ \item{debug}{
+if 1, set debug mode and print useful information
+}
+}
+\details{
+Plots a two-way or three-way proportional Venn diagram. Internally, this method uses the Google Chart API to generate the diagram, then renders it into the plot window where it can be annotated in interesting ways.
+}
+\value{
+%% ~Describe the value returned
+%% If it is a LIST, use
+%% \item{comp1 }{Description of 'comp1'}
+%% \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+}
+\author{
+Kiran Garimella
+}
+\note{
+%% ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+## Plot a two-way Venn diagram
+gsa.plot.venn(1000, 750, 0, 400);
+
+## Plot a three-way Venn diagram
+gsa.plot.venn(1000, 750, 900, 400, 650, 500);
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/public/R/src/gsalib/man/gsa.read.eval.Rd b/public/R/src/gsalib/man/gsa.read.eval.Rd
new file mode 100644
index 000000000..0e2baba73
--- /dev/null
+++ b/public/R/src/gsalib/man/gsa.read.eval.Rd
@@ -0,0 +1,111 @@
+\name{gsa.read.eval}
+\alias{gsa.read.eval}
+\title{
+Read a VariantEval file
+}
+\description{
+Read a VariantEval file that's output in R format.
+}
+\usage{
+gsa.read.eval(evalRoot)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{evalRoot}{
+%% ~~Describe \code{evalRoot} here~~
+}
+}
+\details{
+%% ~~ If necessary, more details than the description above ~~
+}
+\value{
+%% ~Describe the value returned
+%% If it is a LIST, use
+%% \item{comp1 }{Description of 'comp1'}
+%% \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+%% ~~who you are~~
+}
+\note{
+%% ~~further notes~~
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+##---- Should be DIRECTLY executable !! ----
+##-- ==> Define data, use random,
+##-- or do help(data=index) for the standard data sets.
+
+## The function is currently defined as
+function(evalRoot) {
+ fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep="");
+ fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep="");
+ fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep="");
+ fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep="");
+ fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep="");
+ fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep="");
+ fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep="");
+ fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep="");
+ fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep="");
+ fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep="");
+ fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep="");
+ fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep="");
+ fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep="");
+ fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep="");
+
+ eval = list(
+ AlleleCountStats = NA,
+ CompOverlap = NA,
+ CountVariants = NA,
+ GenotypeConcordance = NA,
+ MetricsByAc = NA,
+ MetricsBySample = NA,
+ Quality_Metrics_by_allele_count = NA,
+ QualityScoreHistogram = NA,
+ SampleStatistics = NA,
+ SampleSummaryStatistics = NA,
+ SimpleMetricsBySample = NA,
+ TiTv = NA,
+ TiTvStats = NA,
+ Variant_Quality_Score = NA,
+
+ CallsetNames = c(),
+ CallsetOnlyNames = c(),
+ CallsetFilteredNames = c()
+ );
+
+ eval$AlleleCountStats = .attemptToLoadFile(fileAlleleCountStats);
+ eval$CompOverlap = .attemptToLoadFile(fileCompOverlap);
+ eval$CountVariants = .attemptToLoadFile(fileCountVariants);
+ eval$GenotypeConcordance = .attemptToLoadFile(fileGenotypeConcordance);
+ eval$MetricsByAc = .attemptToLoadFile(fileMetricsByAc);
+ eval$MetricsBySample = .attemptToLoadFile(fileMetricsBySample);
+ eval$Quality_Metrics_by_allele_count = .attemptToLoadFile(fileQuality_Metrics_by_allele_count);
+ eval$QualityScoreHistogram = .attemptToLoadFile(fileQualityScoreHistogram);
+ eval$SampleStatistics = .attemptToLoadFile(fileSampleStatistics);
+ eval$SampleSummaryStatistics = .attemptToLoadFile(fileSampleSummaryStatistics);
+ eval$SimpleMetricsBySample = .attemptToLoadFile(fileSimpleMetricsBySample);
+ eval$TiTv = .attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator);
+ eval$TiTvStats = .attemptToLoadFile(fileTiTvStats);
+ eval$Variant_Quality_Score = .attemptToLoadFile(fileVariant_Quality_Score);
+
+ uniqueJexlExpressions = unique(eval$TiTv$jexl_expression);
+ eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]);
+ eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames));
+ eval$CallsetFilteredNames = as.vector(c());
+ eval;
+ }
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/public/R/src/gsalib/man/gsa.read.gatkreport.Rd b/public/R/src/gsalib/man/gsa.read.gatkreport.Rd
new file mode 100644
index 000000000..67c2c7b28
--- /dev/null
+++ b/public/R/src/gsalib/man/gsa.read.gatkreport.Rd
@@ -0,0 +1,55 @@
+\name{gsa.read.gatkreport}
+\alias{gsa.read.gatkreport}
+\title{
+gsa.read.gatkreport
+}
+\description{
+Reads a GATKReport file - a multi-table document - and loads each table as a separate data.frame object in a list.
+}
+\usage{
+gsa.read.gatkreport(filename)
+}
+\arguments{
+ \item{filename}{
+The path to the GATKReport file.
+}
+}
+\details{
+The GATKReport format replaces the multi-file output format used by many GATK tools and provides a single, consolidated file format. This format accomodates multiple tables and is still R-loadable - through this function.
+
+The file format looks like this:
+\preformatted{##:GATKReport.v0.1 TableName : The description of the table
+col1 col2 col3
+0 0.007451835696110506 25.474613284804366
+1 0.002362777171937477 29.844949954504095
+2 9.087604507451836E-4 32.87590975254731
+3 5.452562704471102E-4 34.498999090081895
+4 9.087604507451836E-4 35.14831665150137
+}
+
+}
+\value{
+Returns a list object, where each key is the TableName and the value is the data.frame object with the contents of the table. If multiple tables with the same name exist, each one after the first will be given names of "TableName.v1", "TableName.v2", ..., "TableName.vN".
+%% ~Describe the value returned
+%% If it is a LIST, use
+%% \item{comp1 }{Description of 'comp1'}
+%% \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Kiran Garimella
+}
+\note{
+%% ~~further notes~~
+}
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+report = gsa.read.gatkreport("/path/to/my/output.gatkreport");
+}
+\keyword{ ~kwd1 }
diff --git a/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd b/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd
new file mode 100644
index 000000000..0a8b37843
--- /dev/null
+++ b/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd
@@ -0,0 +1,48 @@
+\name{gsa.read.squidmetrics}
+\alias{gsa.read.squidmetrics}
+\title{
+gsa.read.squidmetrics
+}
+\description{
+Reads metrics for a specified SQUID project into a dataframe.
+}
+\usage{
+gsa.read.squidmetrics("C315")
+}
+\arguments{
+ \item{project}{
+The project for which metrics should be obtained.
+}
+ \item{bylane}{
+If TRUE, obtains per-lane metrics rather than the default per-sample metrics.
+}
+}
+\details{
+%% ~~ If necessary, more details than the description above ~~
+}
+\value{
+%% ~Describe the value returned
+%% If it is a LIST, use
+%% \item{comp1 }{Description of 'comp1'}
+%% \item{comp2 }{Description of 'comp2'}
+%% ...
+Returns a data frame with samples (or lanes) as the row and the metric as the column.
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Kiran Garimella
+}
+\note{
+This method will only work within the Broad Institute internal network.
+}
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+## Obtain metrics for project C315.
+d = gsa.read.squidmetrics("C315");
+}
+\keyword{ ~kwd1 }
diff --git a/public/R/src/gsalib/man/gsa.read.vcf.Rd b/public/R/src/gsalib/man/gsa.read.vcf.Rd
new file mode 100644
index 000000000..cffd35e8f
--- /dev/null
+++ b/public/R/src/gsalib/man/gsa.read.vcf.Rd
@@ -0,0 +1,53 @@
+\name{gsa.read.vcf}
+\alias{gsa.read.vcf}
+\title{
+gsa.read.vcf
+}
+\description{
+Reads a VCF file into a table. Optionally expands genotype columns into separate columns containing the genotype, separate from the other fields specified in the FORMAT field.
+}
+\usage{
+gsa.read.vcf(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE)
+}
+\arguments{
+ \item{vcffile}{
+The path to the vcf file.
+}
+ \item{skip}{
+The number of lines of the data file to skip before beginning to read data.
+}
+ \item{nrows}{
+The maximum number of rows to read in. Negative and other invalid values are ignored.
+}
+ \item{expandGenotypeFields}{
+If TRUE, adds an additional column per sample containing just the genotype.
+}
+}
+\details{
+The VCF format is the standard variant call file format used in the GATK. This function reads that data in as a table for easy analysis.
+}
+\value{
+Returns a data.frame object, where each column corresponds to the columns in the VCF file.
+%% ~Describe the value returned
+%% If it is a LIST, use
+%% \item{comp1 }{Description of 'comp1'}
+%% \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Kiran Garimella
+}
+\note{
+%% ~~further notes~~
+}
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+vcf = gsa.read.vcf("/path/to/my/output.vcf");
+}
+\keyword{ ~kwd1 }
diff --git a/public/R/src/gsalib/man/gsa.warn.Rd b/public/R/src/gsalib/man/gsa.warn.Rd
new file mode 100644
index 000000000..0b9770b5c
--- /dev/null
+++ b/public/R/src/gsalib/man/gsa.warn.Rd
@@ -0,0 +1,46 @@
+\name{gsa.warn}
+\alias{gsa.warn}
+\title{
+GSA warn
+}
+\description{
+Write a warning message to standard out with the prefix '[gsalib] Warning:'.
+}
+\usage{
+gsa.warn(message)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{message}{
+The warning message to write.
+}
+}
+\details{
+%% ~~ If necessary, more details than the description above ~~
+}
+\value{
+%% ~Describe the value returned
+%% If it is a LIST, use
+%% \item{comp1 }{Description of 'comp1'}
+%% \item{comp2 }{Description of 'comp2'}
+%% ...
+}
+\references{
+%% ~put references to the literature/web site here ~
+}
+\author{
+Kiran Garimella
+}
+\note{
+%% ~~further notes~~
+}
+
+\seealso{
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+## Write message to stdout
+gsa.warn("This is a warning message");
+}
+\keyword{ ~kwd1 }
+\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
diff --git a/public/R/src/gsalib/man/gsalib-package.Rd b/public/R/src/gsalib/man/gsalib-package.Rd
new file mode 100644
index 000000000..2b8d6db9f
--- /dev/null
+++ b/public/R/src/gsalib/man/gsalib-package.Rd
@@ -0,0 +1,68 @@
+\name{gsalib-package}
+\alias{gsalib-package}
+\alias{gsalib}
+\docType{package}
+\title{
+GATK utility analysis functions
+}
+\description{
+Utility functions for analyzing GATK-processed NGS data
+}
+\details{
+This package contains functions for working with GATK-processed NGS data. These functions include a command-line parser that also allows a script to be used in interactive mode (good for developing scripts that will eventually be automated), a proportional Venn diagram generator, convenience methods for parsing VariantEval output, and more.
+}
+\author{
+Genome Sequencing and Analysis Group
+
+Medical and Population Genetics Program
+
+Maintainer: Kiran Garimella
+}
+\references{
+GSA wiki page: http://www.broadinstitute.org/gsa/wiki
+
+GATK help forum: http://www.getsatisfaction.com/gsa
+}
+\examples{
+## get script arguments in interactive and non-interactive mode
+cmdargs = gsa.getargs( list(
+ requiredArg1 = list(
+ value = NA,
+ doc = "Documentation for requiredArg1"
+ ),
+
+ optionalArg1 = list(
+ value = 3e9,
+ doc = "Documentation for optionalArg1"
+ )
+) );
+
+## plot a proportional Venn diagram
+gsa.plot.venn(500, 250, 0, 100);
+
+## read a GATKReport file
+report = gsa.gatk.report("/path/to/my/output.gatkreport");
+
+## emit a message
+gsa.message("This is a message");
+
+## emit a warning message
+gsa.message("This is a warning message");
+
+## emit an error message
+gsa.message("This is an error message");
+
+## read the SQUID metrics for a given sequencing project (internal to the Broad only)
+s = gsa.read.squidmetrics("C427");
+
+## read command-line arguments
+cmdargs = gsa.getargs(
+ list(
+ file = list(value="/my/test.vcf", doc="VCF file"),
+ verbose = list(value=0, doc="If 1, set verbose mode"),
+ test2 = list(value=2.3e9, doc="Another argument that does stuff")
+ ),
+ doc="My test program"
+);
+}
+\keyword{ package }
diff --git a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java
index 8825c3767..6c8fe1834 100644
--- a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java
+++ b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java
@@ -25,7 +25,6 @@
package net.sf.picard.reference;
-import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*;
@@ -39,8 +38,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
* Produces fai file with same output as samtools faidx
*/
public class FastaSequenceIndexBuilder {
- public File fastaFile;
- ReferenceDataSourceProgressListener progress; // interface that provides a method for updating user on progress of reading file
+ final public File fastaFile;
+ final boolean printProgress;
// keep track of location in file
long bytesRead, endOfLastLine, lastTimestamp, fileLength; // initialized to -1 to keep 0-indexed position in file;
@@ -55,10 +54,10 @@ public class FastaSequenceIndexBuilder {
public enum Status { NONE, CONTIG, FIRST_SEQ_LINE, SEQ_LINE, COMMENT }
Status status = Status.NONE; // keeps state of what is currently being read. better to use int instead of enum?
- public FastaSequenceIndexBuilder(File fastaFile, ReferenceDataSourceProgressListener progress) {
- this.progress = progress;
+ public FastaSequenceIndexBuilder(File fastaFile, boolean printProgress) {
this.fastaFile = fastaFile;
fileLength = fastaFile.length();
+ this.printProgress = printProgress;
}
/**
@@ -252,8 +251,8 @@ public class FastaSequenceIndexBuilder {
if (System.currentTimeMillis() - lastTimestamp > 10000) {
int percentProgress = (int) (100*bytesRead/fileLength);
- if (progress != null)
- progress.percentProgress(percentProgress);
+ if (printProgress)
+ System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percentProgress));
lastTimestamp = System.currentTimeMillis();
}
}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java
index 9f92df6e0..8e3f753a8 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java
@@ -174,7 +174,8 @@ public class ArgumentDefinitions implements Iterable {
static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() {
public boolean matches( ArgumentDefinition definition, Object key ) {
- return definition.validation != null;
+ // We can perform some sort of validation for anything that isn't a flag.
+ return !definition.isFlag;
}
};
}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
index 60ed8c899..351583c07 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
@@ -44,7 +44,7 @@ public class ArgumentMatch implements Iterable {
public final String label;
/**
- * Maps indicies of command line arguments to values paired with that argument.
+ * Maps indices of command line arguments to values paired with that argument.
*/
public final SortedMap> indices = new TreeMap>();
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
index 9c33e084d..0fb8bbd3a 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
@@ -26,6 +26,8 @@
package org.broadinstitute.sting.commandline;
import org.apache.log4j.Logger;
+import org.broad.tribble.Feature;
+import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.walkers.Multiplex;
import org.broadinstitute.sting.gatk.walkers.Multiplexer;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
@@ -33,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
+import java.io.File;
import java.lang.annotation.Annotation;
import java.lang.reflect.*;
import java.util.*;
@@ -109,7 +112,7 @@ public abstract class ArgumentTypeDescriptor {
* @return The parsed object.
*/
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) {
- return parse(parsingEngine, source, source.field.getType(), matches);
+ return parse(parsingEngine, source, source.field.getGenericType(), matches);
}
/**
@@ -131,18 +134,18 @@ public abstract class ArgumentTypeDescriptor {
protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) {
Annotation argumentAnnotation = getArgumentAnnotation(source);
return new ArgumentDefinition( ArgumentIOType.getIOType(argumentAnnotation),
- source.field.getType(),
- ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
- ArgumentDefinition.getShortName(argumentAnnotation),
- ArgumentDefinition.getDoc(argumentAnnotation),
- source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
- source.isFlag(),
- source.isMultiValued(),
- source.isHidden(),
- getCollectionComponentType(source.field),
- ArgumentDefinition.getExclusiveOf(argumentAnnotation),
- ArgumentDefinition.getValidationRegex(argumentAnnotation),
- getValidOptions(source) );
+ source.field.getType(),
+ ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
+ ArgumentDefinition.getShortName(argumentAnnotation),
+ ArgumentDefinition.getDoc(argumentAnnotation),
+ source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
+ source.isFlag(),
+ source.isMultiValued(),
+ source.isHidden(),
+ makeRawTypeIfNecessary(getCollectionComponentType(source.field)),
+ ArgumentDefinition.getExclusiveOf(argumentAnnotation),
+ ArgumentDefinition.getValidationRegex(argumentAnnotation),
+ getValidOptions(source) );
}
/**
@@ -151,7 +154,7 @@ public abstract class ArgumentTypeDescriptor {
* @return The parameterized component type, or String.class if the parameterized type could not be found.
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
*/
- protected Class getCollectionComponentType( Field field ) {
+ protected Type getCollectionComponentType( Field field ) {
return null;
}
@@ -162,7 +165,7 @@ public abstract class ArgumentTypeDescriptor {
* @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection.
* @return The individual parsed object matching the argument match with Class type.
*/
- public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches );
+ public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches );
/**
* If the argument source only accepts a small set of options, populate the returned list with
@@ -273,6 +276,113 @@ public abstract class ArgumentTypeDescriptor {
public static boolean isArgumentHidden(Field field) {
return field.isAnnotationPresent(Hidden.class);
}
+
+ public Class makeRawTypeIfNecessary(Type t) {
+ if ( t == null )
+ return null;
+ else if ( t instanceof ParameterizedType )
+ return (Class)((ParameterizedType) t).getRawType();
+ else if ( t instanceof Class ) {
+ return (Class)t;
+ } else {
+ throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t);
+ }
+ }
+}
+
+/**
+ * Parser for RodBinding objects
+ */
+class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
+ /**
+ * We only want RodBinding class objects
+ * @param type The type to check.
+ * @return true if the provided class is a RodBinding.class
+ */
+ @Override
+ public boolean supports( Class type ) {
+ return isRodBinding(type);
+ }
+
+ public static boolean isRodBinding( Class type ) {
+ return RodBinding.class.isAssignableFrom(type);
+ }
+
+ @Override
+ public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
+ ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
+ String value = getArgumentValue( defaultDefinition, matches );
+ try {
+ String name = defaultDefinition.fullName;
+ String tribbleType = null;
+ Tags tags = getArgumentTags(matches);
+ // must have one or two tag values here
+ if ( tags.getPositionalTags().size() > 2 ) {
+ throw new UserException.CommandLineException(
+ String.format("Unexpected number of positional tags for argument %s : %s. " +
+ "Rod bindings only suport -X:type and -X:name,type argument styles",
+ value, source.field.getName()));
+ } if ( tags.getPositionalTags().size() == 2 ) {
+ // -X:name,type style
+ name = tags.getPositionalTags().get(0);
+ tribbleType = tags.getPositionalTags().get(1);
+ } else {
+ // case with 0 or 1 positional tags
+ FeatureManager manager = new FeatureManager();
+
+ // -X:type style is a type when we cannot determine the type dynamically
+ String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
+ if ( tag1 != null ) {
+ if ( manager.getByName(tag1) != null ) // this a type
+ tribbleType = tag1;
+ else
+ name = tag1;
+ }
+
+ if ( tribbleType == null ) {
+ // try to determine the file type dynamically
+ File file = new File(value);
+ if ( file.canRead() && file.isFile() ) {
+ FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
+ if ( featureDescriptor != null ) {
+ tribbleType = featureDescriptor.getName();
+ logger.warn("Dynamically determined type of " + file + " to be " + tribbleType);
+ }
+ }
+ }
+ }
+
+ if ( tribbleType == null ) // error handling
+ throw new UserException.CommandLineException(
+ String.format("Could not parse argument %s with value %s",
+ defaultDefinition.fullName, value));
+
+ Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
+ Class parameterType = getParameterizedTypeClass(type);
+ RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
+ parsingEngine.addTags(result,tags);
+ parsingEngine.addRodBinding(result);
+ return result;
+ } catch (InvocationTargetException e) {
+ throw new UserException.CommandLineException(
+ String.format("Failed to parse value %s for argument %s.",
+ value, source.field.getName()));
+ } catch (Exception e) {
+ throw new UserException.CommandLineException(
+ String.format("Failed to parse value %s for argument %s.",
+ value, source.field.getName()));
+ }
+ }
+
+ private Class getParameterizedTypeClass(Type t) {
+ if ( t instanceof ParameterizedType ) {
+ ParameterizedType parameterizedType = (ParameterizedType)t;
+ if ( parameterizedType.getActualTypeArguments().length != 1 )
+ throw new ReviewedStingException("BUG: more than 1 generic type found on class" + t);
+ return (Class)parameterizedType.getActualTypeArguments()[0];
+ } else
+ throw new ReviewedStingException("BUG: could not find generic type on class " + t);
+ }
}
/**
@@ -282,9 +392,10 @@ public abstract class ArgumentTypeDescriptor {
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public boolean supports( Class type ) {
- if( type.isPrimitive() ) return true;
- if( type.isEnum() ) return true;
- if( primitiveToWrapperMap.containsValue(type) ) return true;
+ if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
+ if ( type.isPrimitive() ) return true;
+ if ( type.isEnum() ) return true;
+ if ( primitiveToWrapperMap.containsValue(type) ) return true;
try {
type.getConstructor(String.class);
@@ -298,7 +409,8 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
@Override
- public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
+ public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type fulltype, ArgumentMatches matches) {
+ Class type = makeRawTypeIfNecessary(fulltype);
if (source.isFlag())
return true;
@@ -339,7 +451,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
throw e;
} catch (InvocationTargetException e) {
throw new UserException.CommandLineException(String.format("Failed to parse value %s for argument %s. This is most commonly caused by providing an incorrect data type (e.g. a double when an int is required)",
- value, source.field.getName()));
+ value, source.field.getName()));
} catch (Exception e) {
throw new DynamicClassResolutionException(String.class, e);
}
@@ -351,7 +463,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
return result;
}
-
+
/**
* A mapping of the primitive types to their associated wrapper classes. Is there really no way to infer
@@ -382,10 +494,10 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
@SuppressWarnings("unchecked")
- public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) {
- Class componentType;
+ public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Type fulltype, ArgumentMatches matches) {
+ Class type = makeRawTypeIfNecessary(fulltype);
+ Type componentType;
Object result;
- Tags tags;
if( Collection.class.isAssignableFrom(type) ) {
@@ -399,7 +511,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
componentType = getCollectionComponentType( source.field );
- ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType);
+ ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
Collection collection;
try {
@@ -428,7 +540,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
else if( type.isArray() ) {
componentType = type.getComponentType();
- ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType);
+ ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
// Assemble a collection of individual values used in this computation.
Collection values = new ArrayList();
@@ -436,7 +548,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
for( ArgumentMatch value: match )
values.add(value);
- result = Array.newInstance(componentType,values.size());
+ result = Array.newInstance(makeRawTypeIfNecessary(componentType),values.size());
int i = 0;
for( ArgumentMatch value: values ) {
@@ -459,16 +571,16 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
*/
@Override
- protected Class getCollectionComponentType( Field field ) {
- // If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
- if( field.getGenericType() instanceof ParameterizedType) {
- ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
- if( parameterizedType.getActualTypeArguments().length > 1 )
- throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
- return (Class)parameterizedType.getActualTypeArguments()[0];
- }
- else
- return String.class;
+ protected Type getCollectionComponentType( Field field ) {
+ // If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
+ if( field.getGenericType() instanceof ParameterizedType) {
+ ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
+ if( parameterizedType.getActualTypeArguments().length > 1 )
+ throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
+ return parameterizedType.getActualTypeArguments()[0];
+ }
+ else
+ return String.class;
}
}
@@ -515,7 +627,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
throw new ReviewedStingException("No multiplexed ids available");
Map