diff --git a/build.xml b/build.xml index fc495f7cc..4acb2086a 100644 --- a/build.xml +++ b/build.xml @@ -468,6 +468,10 @@ + + + + @@ -520,6 +524,8 @@ + + @@ -1074,7 +1080,7 @@ - + @@ -1087,7 +1093,7 @@ - + diff --git a/public/R/src/gsalib/DESCRIPTION b/public/R/src/gsalib/DESCRIPTION new file mode 100644 index 000000000..6116e8c66 --- /dev/null +++ b/public/R/src/gsalib/DESCRIPTION @@ -0,0 +1,10 @@ +Package: gsalib +Type: Package +Title: Utility functions +Version: 1.0 +Date: 2010-10-02 +Author: Kiran Garimella +Maintainer: Kiran Garimella +Description: Utility functions for GATK NGS analyses +License: BSD +LazyLoad: yes diff --git a/public/R/src/gsalib/R/gsa.error.R b/public/R/src/gsalib/R/gsa.error.R new file mode 100644 index 000000000..1c6a56046 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.error.R @@ -0,0 +1,12 @@ +gsa.error <- function(message) { + message(""); + gsa.message("Error: **********"); + gsa.message(sprintf("Error: %s", message)); + gsa.message("Error: **********"); + message(""); + + traceback(); + + message(""); + stop(message, call. = FALSE); +} diff --git a/public/R/src/gsalib/R/gsa.getargs.R b/public/R/src/gsalib/R/gsa.getargs.R new file mode 100644 index 000000000..94613bf93 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.getargs.R @@ -0,0 +1,116 @@ +.gsa.getargs.usage <- function(argspec, doc) { + cargs = commandArgs(); + + usage = "Usage:"; + + fileIndex = grep("--file=", cargs); + if (length(fileIndex) > 0) { + progname = gsub("--file=", "", cargs[fileIndex[1]]); + + usage = sprintf("Usage: Rscript %s [arguments]", progname); + + if (!is.na(doc)) { + message(sprintf("%s: %s\n", progname, doc)); + } + } + + message(usage); + + for (argname in names(argspec)) { + key = argname; + defaultValue = 0; + doc = ""; + + if (is.list(argspec[[argname]])) { + defaultValue = argspec[[argname]]$value; + doc = argspec[[argname]]$doc; + } + + message(sprintf(" -%-10s\t[default: %s]\t%s", key, defaultValue, doc)); + } + + message(""); + + stop(call. = FALSE); +} + +gsa.getargs <- function(argspec, doc = NA) { + argsenv = new.env(); + + for (argname in names(argspec)) { + value = 0; + if (is.list(argspec[[argname]])) { + value = argspec[[argname]]$value; + } else { + value = argspec[[argname]]; + } + + assign(argname, value, envir=argsenv); + } + + if (interactive()) { + for (argname in names(argspec)) { + value = get(argname, envir=argsenv); + + if (is.na(value) | is.null(value)) { + if (exists("cmdargs")) { + assign(argname, cmdargs[[argname]], envir=argsenv); + } else { + assign(argname, readline(sprintf("Please enter a value for '%s': ", argname)), envir=argsenv); + } + } else { + assign(argname, value, envir=argsenv); + } + } + } else { + cargs = commandArgs(TRUE); + + if (length(cargs) == 0) { + .gsa.getargs.usage(argspec, doc); + } + + for (i in 1:length(cargs)) { + if (length(grep("^-", cargs[i], ignore.case=TRUE)) > 0) { + key = gsub("-", "", cargs[i]); + value = cargs[i+1]; + + if (key == "h" | key == "help") { + .gsa.getargs.usage(argspec, doc); + } + + if (length(grep("^[\\d\\.e\\+\\-]+$", value, perl=TRUE, ignore.case=TRUE)) > 0) { + value = as.numeric(value); + } + + assign(key, value, envir=argsenv); + } + } + } + + args = as.list(argsenv); + + isMissingArgs = 0; + missingArgs = c(); + + for (arg in names(argspec)) { + if (is.na(args[[arg]]) | is.null(args[[arg]])) { + gsa.warn(sprintf("Value for required argument '-%s' was not specified", arg)); + + isMissingArgs = 1; + missingArgs = c(missingArgs, arg); + } + } + + if (isMissingArgs) { + gsa.error( + paste( + "Missing required arguments: -", + paste(missingArgs, collapse=" -"), + ". Specify -h or -help to this script for a list of available arguments.", + sep="" + ) + ); + } + + args; +} diff --git a/public/R/src/gsalib/R/gsa.message.R b/public/R/src/gsalib/R/gsa.message.R new file mode 100644 index 000000000..a2b909d3d --- /dev/null +++ b/public/R/src/gsalib/R/gsa.message.R @@ -0,0 +1,3 @@ +gsa.message <- function(message) { + message(sprintf("[gsalib] %s", message)); +} diff --git a/public/R/src/gsalib/R/gsa.plot.venn.R b/public/R/src/gsalib/R/gsa.plot.venn.R new file mode 100644 index 000000000..b1353ccc1 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.plot.venn.R @@ -0,0 +1,50 @@ +gsa.plot.venn <- +function(a, b, c=0, a_and_b, a_and_c=0, b_and_c=0, + col=c("#FF6342", "#63C6DE", "#ADDE63"), + pos=c(0.20, 0.20, 0.80, 0.82), + debug=0 + ) { + library(png); + library(graphics); + + # Set up properties + for (i in 1:length(col)) { + rgbcol = col2rgb(col[i]); + col[i] = sprintf("%02X%02X%02X", rgbcol[1], rgbcol[2], rgbcol[3]); + } + + chco = paste(col[1], col[2], col[3], sep=","); + chd = paste(a, b, c, a_and_b, a_and_c, b_and_c, sep=","); + + props = c( + 'cht=v', + 'chs=525x525', + 'chds=0,10000000000', + paste('chco=', chco, sep=""), + paste('chd=t:', chd, sep="") + ); + proplist = paste(props[1], props[2], props[3], props[4], props[5], sep='&'); + + # Get the venn diagram (as a temporary file) + filename = tempfile("venn"); + cmd = paste("wget -O ", filename, " 'http://chart.apis.google.com/chart?", proplist, "' > /dev/null 2>&1", sep=""); + + if (debug == 1) { + print(cmd); + } + system(cmd); + + # Render the temp png file into a plotting frame + a = readPNG(filename); + + plot(0, 0, type="n", xaxt="n", yaxt="n", bty="n", xlim=c(0, 1), ylim=c(0, 1), xlab="", ylab=""); + if (c == 0 || a >= b) { + rasterImage(a, pos[1], pos[2], pos[3], pos[4]); + } else { + rasterImage(a, 0.37+pos[1], 0.37+pos[2], 0.37+pos[3], 0.37+pos[4], angle=180); + } + + # Clean up! + unlink(filename); +} + diff --git a/public/R/src/gsalib/R/gsa.read.eval.R b/public/R/src/gsalib/R/gsa.read.eval.R new file mode 100644 index 000000000..f1d49092b --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.eval.R @@ -0,0 +1,83 @@ +.gsa.attemptToLoadFile <- function(filename) { + file = NA; + + if (file.exists(filename) & file.info(filename)$size > 500) { + file = read.csv(filename, header=TRUE, comment.char="#"); + } + + file; +} + +gsa.read.eval <- +function(evalRoot) { + fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep=""); + fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep=""); + fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep=""); + fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep=""); + fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep=""); + fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep=""); + fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep=""); + fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep=""); + fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep=""); + fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep=""); + fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep=""); + fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep=""); + fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep=""); + fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep=""); + + eval = list( + AlleleCountStats = NA, + CompOverlap = NA, + CountVariants = NA, + GenotypeConcordance = NA, + MetricsByAc = NA, + MetricsBySample = NA, + Quality_Metrics_by_allele_count = NA, + QualityScoreHistogram = NA, + SampleStatistics = NA, + SampleSummaryStatistics = NA, + SimpleMetricsBySample = NA, + TiTv = NA, + TiTvStats = NA, + Variant_Quality_Score = NA, + + CallsetNames = c(), + CallsetOnlyNames = c(), + CallsetFilteredNames = c() + ); + + eval$AlleleCountStats = .gsa.attemptToLoadFile(fileAlleleCountStats); + eval$CompOverlap = .gsa.attemptToLoadFile(fileCompOverlap); + eval$CountVariants = .gsa.attemptToLoadFile(fileCountVariants); + eval$GenotypeConcordance = .gsa.attemptToLoadFile(fileGenotypeConcordance); + eval$MetricsByAc = .gsa.attemptToLoadFile(fileMetricsByAc); + eval$MetricsBySample = .gsa.attemptToLoadFile(fileMetricsBySample); + eval$Quality_Metrics_by_allele_count = .gsa.attemptToLoadFile(fileQuality_Metrics_by_allele_count); + eval$QualityScoreHistogram = .gsa.attemptToLoadFile(fileQualityScoreHistogram); + eval$SampleStatistics = .gsa.attemptToLoadFile(fileSampleStatistics); + eval$SampleSummaryStatistics = .gsa.attemptToLoadFile(fileSampleSummaryStatistics); + eval$SimpleMetricsBySample = .gsa.attemptToLoadFile(fileSimpleMetricsBySample); + eval$TiTv = .gsa.attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator); + eval$TiTvStats = .gsa.attemptToLoadFile(fileTiTvStats); + eval$Variant_Quality_Score = .gsa.attemptToLoadFile(fileVariant_Quality_Score); + + uniqueJexlExpressions = unique(eval$TiTv$jexl_expression); + eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]); + eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames)); + eval$CallsetFilteredNames = as.vector(c( + paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), sep=""), + paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), sep="")) + ); + + if (!(eval$CallsetFilteredNames[1] %in% unique(eval$TiTv$jexl_expression))) { + eval$CallsetFilteredNames[1] = paste("In", eval$CallsetNames[1], "-FilteredIn", eval$CallsetNames[2], sep=""); + } + + if (!(eval$CallsetFilteredNames[2] %in% unique(eval$TiTv$jexl_expression))) { + eval$CallsetFilteredNames[2] = paste("In", eval$CallsetNames[2], "-FilteredIn", eval$CallsetNames[1], sep=""); + #eval$CallsetFilteredNames[2] = paste(gsub("^(\\w)", "In", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In", eval$CallsetNames[1], perl=TRUE), sep=""); + } + + eval; +} + diff --git a/public/R/src/gsalib/R/gsa.read.gatkreport.R b/public/R/src/gsalib/R/gsa.read.gatkreport.R new file mode 100644 index 000000000..011b5240d --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.gatkreport.R @@ -0,0 +1,103 @@ +# Load a table into the specified environment. Make sure that each new table gets a unique name (this allows one to cat a bunch of tables with the same name together and load them into R without each table overwriting the last. +.gsa.assignGATKTableToEnvironment <- function(tableName, tableHeader, tableRows, tableEnv) { + d = data.frame(tableRows, row.names=NULL, stringsAsFactors=FALSE); + colnames(d) = tableHeader; + + for (i in 1:ncol(d)) { + v = suppressWarnings(as.numeric(d[,i])); + + if (length(na.omit(as.numeric(v))) == length(d[,i])) { + d[,i] = v; + } + } + + usedNames = ls(envir=tableEnv, pattern=tableName); + + if (length(usedNames) > 0) { + tableName = paste(tableName, ".", length(usedNames), sep=""); + } + + assign(tableName, d, envir=tableEnv); +} + +# Read a fixed width line of text into a list. +.gsa.splitFixedWidth <- function(line, columnStarts) { + splitStartStop <- function(x) { + x = substring(x, starts, stops); + x = gsub("^[[:space:]]+|[[:space:]]+$", "", x); + x; + } + + starts = c(1, columnStarts); + stops = c(columnStarts - 1, nchar(line)); + + sapply(line, splitStartStop)[,1]; +} + +# Load all GATKReport tables from a file +gsa.read.gatkreport <- function(filename) { + con = file(filename, "r", blocking = TRUE); + lines = readLines(con); + close(con); + + tableEnv = new.env(); + + tableName = NA; + tableHeader = c(); + tableRows = c(); + version = NA; + + for (line in lines) { + if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) { + headerFields = unlist(strsplit(line, "[[:space:]]+")); + + if (!is.na(tableName)) { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + } + + tableName = headerFields[2]; + tableHeader = c(); + tableRows = c(); + + # For differences in versions see + # $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java + if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) { + version = "v0.1"; + + } else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) { + version = "v0.2"; + columnStarts = c(); + + } + + } else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) { + # do nothing + } else if (!is.na(tableName)) { + + if (version == "v0.1") { + row = unlist(strsplit(line, "[[:space:]]+")); + + } else if (version == "v0.2") { + if (length(tableHeader) == 0) { + headerChars = unlist(strsplit(line, "")); + # Find the first position of non space characters, excluding the first character + columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1); + } + + row = .gsa.splitFixedWidth(line, columnStarts); + } + + if (length(tableHeader) == 0) { + tableHeader = row; + } else { + tableRows = rbind(tableRows, row); + } + } + } + + if (!is.na(tableName)) { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + } + + gatkreport = as.list(tableEnv); +} diff --git a/public/R/src/gsalib/R/gsa.read.squidmetrics.R b/public/R/src/gsalib/R/gsa.read.squidmetrics.R new file mode 100644 index 000000000..39fa1ad32 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.squidmetrics.R @@ -0,0 +1,28 @@ +gsa.read.squidmetrics = function(project, bylane = FALSE) { + suppressMessages(library(ROracle)); + + drv = dbDriver("Oracle"); + con = dbConnect(drv, "REPORTING/REPORTING@ora01:1521/SEQPROD"); + + if (bylane) { + statement = paste("SELECT * FROM ILLUMINA_PICARD_METRICS WHERE \"Project\" = '", project, "'", sep=""); + print(statement); + + rs = dbSendQuery(con, statement = statement); + d = fetch(rs, n=-1); + dbHasCompleted(rs); + dbClearResult(rs); + } else { + statement = paste("SELECT * FROM ILLUMINA_SAMPLE_STATUS_AGG WHERE \"Project\" = '", project, "'", sep=""); + print(statement); + + rs = dbSendQuery(con, statement = statement); + d = fetch(rs, n=-1); + dbHasCompleted(rs); + dbClearResult(rs); + } + + oraCloseDriver(drv); + + subset(d, Project == project); +} diff --git a/public/R/src/gsalib/R/gsa.read.vcf.R b/public/R/src/gsalib/R/gsa.read.vcf.R new file mode 100644 index 000000000..5beb6455d --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.vcf.R @@ -0,0 +1,23 @@ +gsa.read.vcf <- function(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE) { + headers = readLines(vcffile, n=100); + headerline = headers[grep("#CHROM", headers)]; + header = unlist(strsplit(gsub("#", "", headerline), "\t")) + + d = read.table(vcffile, header=FALSE, skip=skip, nrows=nrows, stringsAsFactors=FALSE); + colnames(d) = header; + + if (expandGenotypeFields) { + columns = ncol(d); + + offset = columns + 1; + for (sampleIndex in 10:columns) { + gt = unlist(lapply(strsplit(d[,sampleIndex], ":"), function(x) x[1])); + d[,offset] = gt; + colnames(d)[offset] = sprintf("%s.GT", colnames(d)[sampleIndex]); + + offset = offset + 1; + } + } + + return(d); +} diff --git a/public/R/src/gsalib/R/gsa.warn.R b/public/R/src/gsalib/R/gsa.warn.R new file mode 100644 index 000000000..7ee08ce65 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.warn.R @@ -0,0 +1,3 @@ +gsa.warn <- function(message) { + gsa.message(sprintf("Warning: %s", message)); +} diff --git a/public/R/src/gsalib/Read-and-delete-me b/public/R/src/gsalib/Read-and-delete-me new file mode 100644 index 000000000..d04323a6e --- /dev/null +++ b/public/R/src/gsalib/Read-and-delete-me @@ -0,0 +1,9 @@ +* Edit the help file skeletons in 'man', possibly combining help files + for multiple functions. +* Put any C/C++/Fortran code in 'src'. +* If you have compiled code, add a .First.lib() function in 'R' to load + the shared library. +* Run R CMD build to build the package tarball. +* Run R CMD check to check the package tarball. + +Read "Writing R Extensions" for more information. diff --git a/public/R/src/gsalib/data/tearsheetdrop.jpg b/public/R/src/gsalib/data/tearsheetdrop.jpg new file mode 100755 index 000000000..c9d480fa0 Binary files /dev/null and b/public/R/src/gsalib/data/tearsheetdrop.jpg differ diff --git a/public/R/src/gsalib/man/gsa.error.Rd b/public/R/src/gsalib/man/gsa.error.Rd new file mode 100644 index 000000000..df7c0cbde --- /dev/null +++ b/public/R/src/gsalib/man/gsa.error.Rd @@ -0,0 +1,49 @@ +\name{gsa.error} +\alias{gsa.error} +\title{ +GSA error +} +\description{ +Write an error message to standard out with the prefix '[gsalib] Error:', print a traceback, and exit. +} +\usage{ +gsa.error(message) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{message}{ +The error message to write. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +%% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +gsa.error("This is a message"); +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsa.getargs.Rd b/public/R/src/gsalib/man/gsa.getargs.Rd new file mode 100644 index 000000000..27aa1b05a --- /dev/null +++ b/public/R/src/gsalib/man/gsa.getargs.Rd @@ -0,0 +1,57 @@ +\name{gsa.getargs} +\alias{gsa.getargs} +\title{ +Get script arguments +} +\description{ +Get script arguments given a list object specifying arguments and documentation. Can be used in command-line or interactive mode. This is helpful when developing scripts in interactive mode that will eventually become command-line programs. If no arguments are specified or help is requested in command-line mode, the script will print out a usage statement with available arguments and exit. +} +\usage{ +gsa.getargs(argspec, doc = NA) +} +\arguments{ + \item{argspec}{ +A list object. Each key is an argument name. The value is another list object with a 'value' and 'doc' keys. For example: +\preformatted{argspec = list( + arg1 = list(value=10, doc="Info for optional arg1"), + arg2 = list(value=NA, doc="Info for required arg2") +); +} + +If the value provided is NA, the argument is considered required and must be specified when the script is invoked. For command-line mode, this means the argument must be specified on the command-line. In interactive mode, there are two ways of specifying these arguments. First, if a properly formatted list argument called 'cmdargs' is present in the current environment (i.e. the object returned by gsa.getargs() from a previous invocation), the value is taken from this object. Otherwise, the argument is prompted for. +} + + \item{doc}{ +An optional string succinctly documenting the purpose of the script. +} +} +\details{ +Interactive scripts typically make use of hardcoded filepaths and parameter settings. This makes testing easy, but generalization to non-interactive mode more difficult. This utility provides a mechanism for writing scripts that work properly in both interactive and command-line modes. + +To use this method, specify a list with key-value pairs representing the arguments as specified above. In command-line mode, if no arguments are specified or the user specifies '-h' or '-help' anywhere on the command string, a help message indicating available arguments, their default values, and some documentation about the argument are provided. +} +\value{ +Returns a list with keys matching the argspec and values representing the specified arguments. + +\item{arg1 }{Value for argument 1} +\item{arg2 }{Value for argument 2} +...etc. +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\examples{ +argspec = list( + file = list(value="/my/test.vcf", doc="VCF file"), + verbose = list(value=0, doc="If 1, set verbose mode"), + test2 = list(value=2.3e9, doc="Another argument that does stuff") +); + +cmdargs = gsa.getargs(argspec, doc="My test program"); + +print(cmdargs$file); # will print '[1] "/my/test.vcf"' +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.message.Rd b/public/R/src/gsalib/man/gsa.message.Rd new file mode 100644 index 000000000..9752de8a9 --- /dev/null +++ b/public/R/src/gsalib/man/gsa.message.Rd @@ -0,0 +1,44 @@ +\name{gsa.message} +\alias{gsa.message} +\title{ +GSA message +} +\description{ +Write a message to standard out with the prefix '[gsalib]'. +} +\usage{ +gsa.message(message) +} +\arguments{ + \item{message}{ +The message to write. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +## Write message to stdout +gsa.message("This is a message"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.plot.venn.Rd b/public/R/src/gsalib/man/gsa.plot.venn.Rd new file mode 100644 index 000000000..bf4feb5bc --- /dev/null +++ b/public/R/src/gsalib/man/gsa.plot.venn.Rd @@ -0,0 +1,75 @@ +\name{gsa.plot.venn} +\alias{gsa.plot.venn} +\title{ +Plot a proportional venn diagram +} +\description{ +Plot a proportional venn diagram (two or three-way venns allowed) +} +\usage{ +gsa.plot.venn(a, b, c = 0, a_and_b, a_and_c = 0, b_and_c = 0, col = c("#FF6342", "#63C6DE", "#ADDE63"), pos = c(0.2, 0.2, 0.8, 0.82), debug = 0) +} +\arguments{ + \item{a}{ +size of 'a' circle +} + \item{b}{ +size of 'b' circle +} + \item{c}{ +size of 'c' circle +} + \item{a_and_b}{ +size of a and b overlap +} + \item{a_and_c}{ +size of a and c overlap +} + \item{b_and_c}{ +size of b and c overlap +} + \item{col}{ +vector of colors for each venn piece +} + \item{pos}{ +vector of positional elements +} + \item{debug}{ +if 1, set debug mode and print useful information +} +} +\details{ +Plots a two-way or three-way proportional Venn diagram. Internally, this method uses the Google Chart API to generate the diagram, then renders it into the plot window where it can be annotated in interesting ways. +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +%% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +## Plot a two-way Venn diagram +gsa.plot.venn(1000, 750, 0, 400); + +## Plot a three-way Venn diagram +gsa.plot.venn(1000, 750, 900, 400, 650, 500); +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsa.read.eval.Rd b/public/R/src/gsalib/man/gsa.read.eval.Rd new file mode 100644 index 000000000..0e2baba73 --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.eval.Rd @@ -0,0 +1,111 @@ +\name{gsa.read.eval} +\alias{gsa.read.eval} +\title{ +Read a VariantEval file +} +\description{ +Read a VariantEval file that's output in R format. +} +\usage{ +gsa.read.eval(evalRoot) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{evalRoot}{ +%% ~~Describe \code{evalRoot} here~~ +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +%% ~~who you are~~ +} +\note{ +%% ~~further notes~~ +} + +%% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +##---- Should be DIRECTLY executable !! ---- +##-- ==> Define data, use random, +##-- or do help(data=index) for the standard data sets. + +## The function is currently defined as +function(evalRoot) { + fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep=""); + fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep=""); + fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep=""); + fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep=""); + fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep=""); + fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep=""); + fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep=""); + fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep=""); + fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep=""); + fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep=""); + fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep=""); + fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep=""); + fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep=""); + fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep=""); + + eval = list( + AlleleCountStats = NA, + CompOverlap = NA, + CountVariants = NA, + GenotypeConcordance = NA, + MetricsByAc = NA, + MetricsBySample = NA, + Quality_Metrics_by_allele_count = NA, + QualityScoreHistogram = NA, + SampleStatistics = NA, + SampleSummaryStatistics = NA, + SimpleMetricsBySample = NA, + TiTv = NA, + TiTvStats = NA, + Variant_Quality_Score = NA, + + CallsetNames = c(), + CallsetOnlyNames = c(), + CallsetFilteredNames = c() + ); + + eval$AlleleCountStats = .attemptToLoadFile(fileAlleleCountStats); + eval$CompOverlap = .attemptToLoadFile(fileCompOverlap); + eval$CountVariants = .attemptToLoadFile(fileCountVariants); + eval$GenotypeConcordance = .attemptToLoadFile(fileGenotypeConcordance); + eval$MetricsByAc = .attemptToLoadFile(fileMetricsByAc); + eval$MetricsBySample = .attemptToLoadFile(fileMetricsBySample); + eval$Quality_Metrics_by_allele_count = .attemptToLoadFile(fileQuality_Metrics_by_allele_count); + eval$QualityScoreHistogram = .attemptToLoadFile(fileQualityScoreHistogram); + eval$SampleStatistics = .attemptToLoadFile(fileSampleStatistics); + eval$SampleSummaryStatistics = .attemptToLoadFile(fileSampleSummaryStatistics); + eval$SimpleMetricsBySample = .attemptToLoadFile(fileSimpleMetricsBySample); + eval$TiTv = .attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator); + eval$TiTvStats = .attemptToLoadFile(fileTiTvStats); + eval$Variant_Quality_Score = .attemptToLoadFile(fileVariant_Quality_Score); + + uniqueJexlExpressions = unique(eval$TiTv$jexl_expression); + eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]); + eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames)); + eval$CallsetFilteredNames = as.vector(c()); + eval; + } +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsa.read.gatkreport.Rd b/public/R/src/gsalib/man/gsa.read.gatkreport.Rd new file mode 100644 index 000000000..67c2c7b28 --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.gatkreport.Rd @@ -0,0 +1,55 @@ +\name{gsa.read.gatkreport} +\alias{gsa.read.gatkreport} +\title{ +gsa.read.gatkreport +} +\description{ +Reads a GATKReport file - a multi-table document - and loads each table as a separate data.frame object in a list. +} +\usage{ +gsa.read.gatkreport(filename) +} +\arguments{ + \item{filename}{ +The path to the GATKReport file. +} +} +\details{ +The GATKReport format replaces the multi-file output format used by many GATK tools and provides a single, consolidated file format. This format accomodates multiple tables and is still R-loadable - through this function. + +The file format looks like this: +\preformatted{##:GATKReport.v0.1 TableName : The description of the table +col1 col2 col3 +0 0.007451835696110506 25.474613284804366 +1 0.002362777171937477 29.844949954504095 +2 9.087604507451836E-4 32.87590975254731 +3 5.452562704471102E-4 34.498999090081895 +4 9.087604507451836E-4 35.14831665150137 +} + +} +\value{ +Returns a list object, where each key is the TableName and the value is the data.frame object with the contents of the table. If multiple tables with the same name exist, each one after the first will be given names of "TableName.v1", "TableName.v2", ..., "TableName.vN". +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +report = gsa.read.gatkreport("/path/to/my/output.gatkreport"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd b/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd new file mode 100644 index 000000000..0a8b37843 --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd @@ -0,0 +1,48 @@ +\name{gsa.read.squidmetrics} +\alias{gsa.read.squidmetrics} +\title{ +gsa.read.squidmetrics +} +\description{ +Reads metrics for a specified SQUID project into a dataframe. +} +\usage{ +gsa.read.squidmetrics("C315") +} +\arguments{ + \item{project}{ +The project for which metrics should be obtained. +} + \item{bylane}{ +If TRUE, obtains per-lane metrics rather than the default per-sample metrics. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +Returns a data frame with samples (or lanes) as the row and the metric as the column. +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +This method will only work within the Broad Institute internal network. +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +## Obtain metrics for project C315. +d = gsa.read.squidmetrics("C315"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.read.vcf.Rd b/public/R/src/gsalib/man/gsa.read.vcf.Rd new file mode 100644 index 000000000..cffd35e8f --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.vcf.Rd @@ -0,0 +1,53 @@ +\name{gsa.read.vcf} +\alias{gsa.read.vcf} +\title{ +gsa.read.vcf +} +\description{ +Reads a VCF file into a table. Optionally expands genotype columns into separate columns containing the genotype, separate from the other fields specified in the FORMAT field. +} +\usage{ +gsa.read.vcf(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE) +} +\arguments{ + \item{vcffile}{ +The path to the vcf file. +} + \item{skip}{ +The number of lines of the data file to skip before beginning to read data. +} + \item{nrows}{ +The maximum number of rows to read in. Negative and other invalid values are ignored. +} + \item{expandGenotypeFields}{ +If TRUE, adds an additional column per sample containing just the genotype. +} +} +\details{ +The VCF format is the standard variant call file format used in the GATK. This function reads that data in as a table for easy analysis. +} +\value{ +Returns a data.frame object, where each column corresponds to the columns in the VCF file. +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +vcf = gsa.read.vcf("/path/to/my/output.vcf"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.warn.Rd b/public/R/src/gsalib/man/gsa.warn.Rd new file mode 100644 index 000000000..0b9770b5c --- /dev/null +++ b/public/R/src/gsalib/man/gsa.warn.Rd @@ -0,0 +1,46 @@ +\name{gsa.warn} +\alias{gsa.warn} +\title{ +GSA warn +} +\description{ +Write a warning message to standard out with the prefix '[gsalib] Warning:'. +} +\usage{ +gsa.warn(message) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{message}{ +The warning message to write. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +## Write message to stdout +gsa.warn("This is a warning message"); +} +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsalib-package.Rd b/public/R/src/gsalib/man/gsalib-package.Rd new file mode 100644 index 000000000..2b8d6db9f --- /dev/null +++ b/public/R/src/gsalib/man/gsalib-package.Rd @@ -0,0 +1,68 @@ +\name{gsalib-package} +\alias{gsalib-package} +\alias{gsalib} +\docType{package} +\title{ +GATK utility analysis functions +} +\description{ +Utility functions for analyzing GATK-processed NGS data +} +\details{ +This package contains functions for working with GATK-processed NGS data. These functions include a command-line parser that also allows a script to be used in interactive mode (good for developing scripts that will eventually be automated), a proportional Venn diagram generator, convenience methods for parsing VariantEval output, and more. +} +\author{ +Genome Sequencing and Analysis Group + +Medical and Population Genetics Program + +Maintainer: Kiran Garimella +} +\references{ +GSA wiki page: http://www.broadinstitute.org/gsa/wiki + +GATK help forum: http://www.getsatisfaction.com/gsa +} +\examples{ +## get script arguments in interactive and non-interactive mode +cmdargs = gsa.getargs( list( + requiredArg1 = list( + value = NA, + doc = "Documentation for requiredArg1" + ), + + optionalArg1 = list( + value = 3e9, + doc = "Documentation for optionalArg1" + ) +) ); + +## plot a proportional Venn diagram +gsa.plot.venn(500, 250, 0, 100); + +## read a GATKReport file +report = gsa.gatk.report("/path/to/my/output.gatkreport"); + +## emit a message +gsa.message("This is a message"); + +## emit a warning message +gsa.message("This is a warning message"); + +## emit an error message +gsa.message("This is an error message"); + +## read the SQUID metrics for a given sequencing project (internal to the Broad only) +s = gsa.read.squidmetrics("C427"); + +## read command-line arguments +cmdargs = gsa.getargs( + list( + file = list(value="/my/test.vcf", doc="VCF file"), + verbose = list(value=0, doc="If 1, set verbose mode"), + test2 = list(value=2.3e9, doc="Another argument that does stuff") + ), + doc="My test program" +); +} +\keyword{ package } diff --git a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java index 8825c3767..6c8fe1834 100644 --- a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java +++ b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java @@ -25,7 +25,6 @@ package net.sf.picard.reference; -import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*; @@ -39,8 +38,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException; * Produces fai file with same output as samtools faidx */ public class FastaSequenceIndexBuilder { - public File fastaFile; - ReferenceDataSourceProgressListener progress; // interface that provides a method for updating user on progress of reading file + final public File fastaFile; + final boolean printProgress; // keep track of location in file long bytesRead, endOfLastLine, lastTimestamp, fileLength; // initialized to -1 to keep 0-indexed position in file; @@ -55,10 +54,10 @@ public class FastaSequenceIndexBuilder { public enum Status { NONE, CONTIG, FIRST_SEQ_LINE, SEQ_LINE, COMMENT } Status status = Status.NONE; // keeps state of what is currently being read. better to use int instead of enum? - public FastaSequenceIndexBuilder(File fastaFile, ReferenceDataSourceProgressListener progress) { - this.progress = progress; + public FastaSequenceIndexBuilder(File fastaFile, boolean printProgress) { this.fastaFile = fastaFile; fileLength = fastaFile.length(); + this.printProgress = printProgress; } /** @@ -252,8 +251,8 @@ public class FastaSequenceIndexBuilder { if (System.currentTimeMillis() - lastTimestamp > 10000) { int percentProgress = (int) (100*bytesRead/fileLength); - if (progress != null) - progress.percentProgress(percentProgress); + if (printProgress) + System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percentProgress)); lastTimestamp = System.currentTimeMillis(); } } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java index 9f92df6e0..8e3f753a8 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java @@ -174,7 +174,8 @@ public class ArgumentDefinitions implements Iterable { static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() { public boolean matches( ArgumentDefinition definition, Object key ) { - return definition.validation != null; + // We can perform some sort of validation for anything that isn't a flag. + return !definition.isFlag; } }; } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java index 60ed8c899..351583c07 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java @@ -44,7 +44,7 @@ public class ArgumentMatch implements Iterable { public final String label; /** - * Maps indicies of command line arguments to values paired with that argument. + * Maps indices of command line arguments to values paired with that argument. */ public final SortedMap> indices = new TreeMap>(); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 9c33e084d..0fb8bbd3a 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -26,6 +26,8 @@ package org.broadinstitute.sting.commandline; import org.apache.log4j.Logger; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.gatk.walkers.Multiplex; import org.broadinstitute.sting.gatk.walkers.Multiplexer; import org.broadinstitute.sting.utils.classloader.JVMUtils; @@ -33,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.io.File; import java.lang.annotation.Annotation; import java.lang.reflect.*; import java.util.*; @@ -109,7 +112,7 @@ public abstract class ArgumentTypeDescriptor { * @return The parsed object. */ public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) { - return parse(parsingEngine, source, source.field.getType(), matches); + return parse(parsingEngine, source, source.field.getGenericType(), matches); } /** @@ -131,18 +134,18 @@ public abstract class ArgumentTypeDescriptor { protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) { Annotation argumentAnnotation = getArgumentAnnotation(source); return new ArgumentDefinition( ArgumentIOType.getIOType(argumentAnnotation), - source.field.getType(), - ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()), - ArgumentDefinition.getShortName(argumentAnnotation), - ArgumentDefinition.getDoc(argumentAnnotation), - source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(), - source.isFlag(), - source.isMultiValued(), - source.isHidden(), - getCollectionComponentType(source.field), - ArgumentDefinition.getExclusiveOf(argumentAnnotation), - ArgumentDefinition.getValidationRegex(argumentAnnotation), - getValidOptions(source) ); + source.field.getType(), + ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()), + ArgumentDefinition.getShortName(argumentAnnotation), + ArgumentDefinition.getDoc(argumentAnnotation), + source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(), + source.isFlag(), + source.isMultiValued(), + source.isHidden(), + makeRawTypeIfNecessary(getCollectionComponentType(source.field)), + ArgumentDefinition.getExclusiveOf(argumentAnnotation), + ArgumentDefinition.getValidationRegex(argumentAnnotation), + getValidOptions(source) ); } /** @@ -151,7 +154,7 @@ public abstract class ArgumentTypeDescriptor { * @return The parameterized component type, or String.class if the parameterized type could not be found. * @throws IllegalArgumentException If more than one parameterized type is found on the field. */ - protected Class getCollectionComponentType( Field field ) { + protected Type getCollectionComponentType( Field field ) { return null; } @@ -162,7 +165,7 @@ public abstract class ArgumentTypeDescriptor { * @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection. * @return The individual parsed object matching the argument match with Class type. */ - public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ); + public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ); /** * If the argument source only accepts a small set of options, populate the returned list with @@ -273,6 +276,113 @@ public abstract class ArgumentTypeDescriptor { public static boolean isArgumentHidden(Field field) { return field.isAnnotationPresent(Hidden.class); } + + public Class makeRawTypeIfNecessary(Type t) { + if ( t == null ) + return null; + else if ( t instanceof ParameterizedType ) + return (Class)((ParameterizedType) t).getRawType(); + else if ( t instanceof Class ) { + return (Class)t; + } else { + throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t); + } + } +} + +/** + * Parser for RodBinding objects + */ +class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { + /** + * We only want RodBinding class objects + * @param type The type to check. + * @return true if the provided class is a RodBinding.class + */ + @Override + public boolean supports( Class type ) { + return isRodBinding(type); + } + + public static boolean isRodBinding( Class type ) { + return RodBinding.class.isAssignableFrom(type); + } + + @Override + public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) { + ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); + String value = getArgumentValue( defaultDefinition, matches ); + try { + String name = defaultDefinition.fullName; + String tribbleType = null; + Tags tags = getArgumentTags(matches); + // must have one or two tag values here + if ( tags.getPositionalTags().size() > 2 ) { + throw new UserException.CommandLineException( + String.format("Unexpected number of positional tags for argument %s : %s. " + + "Rod bindings only suport -X:type and -X:name,type argument styles", + value, source.field.getName())); + } if ( tags.getPositionalTags().size() == 2 ) { + // -X:name,type style + name = tags.getPositionalTags().get(0); + tribbleType = tags.getPositionalTags().get(1); + } else { + // case with 0 or 1 positional tags + FeatureManager manager = new FeatureManager(); + + // -X:type style is a type when we cannot determine the type dynamically + String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null; + if ( tag1 != null ) { + if ( manager.getByName(tag1) != null ) // this a type + tribbleType = tag1; + else + name = tag1; + } + + if ( tribbleType == null ) { + // try to determine the file type dynamically + File file = new File(value); + if ( file.canRead() && file.isFile() ) { + FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file); + if ( featureDescriptor != null ) { + tribbleType = featureDescriptor.getName(); + logger.warn("Dynamically determined type of " + file + " to be " + tribbleType); + } + } + } + } + + if ( tribbleType == null ) // error handling + throw new UserException.CommandLineException( + String.format("Could not parse argument %s with value %s", + defaultDefinition.fullName, value)); + + Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); + Class parameterType = getParameterizedTypeClass(type); + RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags); + parsingEngine.addTags(result,tags); + parsingEngine.addRodBinding(result); + return result; + } catch (InvocationTargetException e) { + throw new UserException.CommandLineException( + String.format("Failed to parse value %s for argument %s.", + value, source.field.getName())); + } catch (Exception e) { + throw new UserException.CommandLineException( + String.format("Failed to parse value %s for argument %s.", + value, source.field.getName())); + } + } + + private Class getParameterizedTypeClass(Type t) { + if ( t instanceof ParameterizedType ) { + ParameterizedType parameterizedType = (ParameterizedType)t; + if ( parameterizedType.getActualTypeArguments().length != 1 ) + throw new ReviewedStingException("BUG: more than 1 generic type found on class" + t); + return (Class)parameterizedType.getActualTypeArguments()[0]; + } else + throw new ReviewedStingException("BUG: could not find generic type on class " + t); + } } /** @@ -282,9 +392,10 @@ public abstract class ArgumentTypeDescriptor { class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public boolean supports( Class type ) { - if( type.isPrimitive() ) return true; - if( type.isEnum() ) return true; - if( primitiveToWrapperMap.containsValue(type) ) return true; + if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false; + if ( type.isPrimitive() ) return true; + if ( type.isEnum() ) return true; + if ( primitiveToWrapperMap.containsValue(type) ) return true; try { type.getConstructor(String.class); @@ -298,7 +409,8 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { } @Override - public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) { + public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type fulltype, ArgumentMatches matches) { + Class type = makeRawTypeIfNecessary(fulltype); if (source.isFlag()) return true; @@ -339,7 +451,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { throw e; } catch (InvocationTargetException e) { throw new UserException.CommandLineException(String.format("Failed to parse value %s for argument %s. This is most commonly caused by providing an incorrect data type (e.g. a double when an int is required)", - value, source.field.getName())); + value, source.field.getName())); } catch (Exception e) { throw new DynamicClassResolutionException(String.class, e); } @@ -351,7 +463,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { return result; } - + /** * A mapping of the primitive types to their associated wrapper classes. Is there really no way to infer @@ -382,10 +494,10 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override @SuppressWarnings("unchecked") - public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) { - Class componentType; + public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Type fulltype, ArgumentMatches matches) { + Class type = makeRawTypeIfNecessary(fulltype); + Type componentType; Object result; - Tags tags; if( Collection.class.isAssignableFrom(type) ) { @@ -399,7 +511,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { } componentType = getCollectionComponentType( source.field ); - ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType); + ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType)); Collection collection; try { @@ -428,7 +540,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { } else if( type.isArray() ) { componentType = type.getComponentType(); - ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType); + ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType)); // Assemble a collection of individual values used in this computation. Collection values = new ArrayList(); @@ -436,7 +548,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { for( ArgumentMatch value: match ) values.add(value); - result = Array.newInstance(componentType,values.size()); + result = Array.newInstance(makeRawTypeIfNecessary(componentType),values.size()); int i = 0; for( ArgumentMatch value: values ) { @@ -459,16 +571,16 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { * @throws IllegalArgumentException If more than one parameterized type is found on the field. */ @Override - protected Class getCollectionComponentType( Field field ) { - // If this is a parameterized collection, find the contained type. If blow up if more than one type exists. - if( field.getGenericType() instanceof ParameterizedType) { - ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType(); - if( parameterizedType.getActualTypeArguments().length > 1 ) - throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString()); - return (Class)parameterizedType.getActualTypeArguments()[0]; - } - else - return String.class; + protected Type getCollectionComponentType( Field field ) { + // If this is a parameterized collection, find the contained type. If blow up if more than one type exists. + if( field.getGenericType() instanceof ParameterizedType) { + ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType(); + if( parameterizedType.getActualTypeArguments().length > 1 ) + throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString()); + return parameterizedType.getActualTypeArguments()[0]; + } + else + return String.class; } } @@ -515,7 +627,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor { throw new ReviewedStingException("No multiplexed ids available"); Map multiplexedMapping = new HashMap(); - Class componentType = getCollectionComponentType(source.field); + Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field)); ArgumentTypeDescriptor componentTypeDescriptor = parsingEngine.selectBestTypeDescriptor(componentType); for(Object id: multiplexedIds) { @@ -529,13 +641,13 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override - public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) { + public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) { if(multiplexedIds == null) throw new ReviewedStingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first."); Map multiplexedMapping = new HashMap(); - Class componentType = getCollectionComponentType(source.field); + Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field)); for(Object id: multiplexedIds) { @@ -606,7 +718,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor { * @throws IllegalArgumentException If more than one parameterized type is found on the field. */ @Override - protected Class getCollectionComponentType( Field field ) { + protected Type getCollectionComponentType( Field field ) { // Multiplex arguments must resolve to maps from which the clp should extract the second type. if( field.getGenericType() instanceof ParameterizedType) { ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType(); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 0dc18e6f9..9b543142b 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.commandline; +import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; @@ -41,6 +42,11 @@ import java.util.*; * A parser for Sting command-line arguments. */ public class ParsingEngine { + /** + * The loaded argument sources along with their back definitions. + */ + private Map argumentSourcesByDefinition = new HashMap(); + /** * A list of defined arguments against which command lines are matched. * Package protected for testing access. @@ -59,11 +65,17 @@ public class ParsingEngine { */ private List parsingMethods = new ArrayList(); + /** + * All of the RodBinding objects we've seen while parsing + */ + private List rodBindings = new ArrayList(); + /** * Class reference to the different types of descriptors that the create method can create. * The type of set used must be ordered (but not necessarily sorted). */ private static final Set STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet( Arrays.asList(new SimpleArgumentTypeDescriptor(), + new RodBindingArgumentTypeDescriptor(), new CompoundArgumentTypeDescriptor(), new MultiplexArgumentTypeDescriptor()) ); @@ -80,6 +92,7 @@ public class ParsingEngine { protected static Logger logger = Logger.getLogger(ParsingEngine.class); public ParsingEngine( CommandLineProgram clp ) { + RodBinding.resetNameCounter(); parsingMethods.add( ParsingMethod.FullNameParsingMethod ); parsingMethods.add( ParsingMethod.ShortNameParsingMethod ); @@ -107,8 +120,13 @@ public class ParsingEngine { */ public void addArgumentSource( String sourceName, Class sourceClass ) { List argumentsFromSource = new ArrayList(); - for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) - argumentsFromSource.addAll( argumentSource.createArgumentDefinitions() ); + for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) { + List argumentDefinitions = argumentSource.createArgumentDefinitions(); + for(ArgumentDefinition argumentDefinition: argumentDefinitions) { + argumentSourcesByDefinition.put(argumentDefinition,argumentSource); + argumentsFromSource.add( argumentDefinition ); + } + } argumentDefinitions.add( new ArgumentDefinitionGroup(sourceName, argumentsFromSource) ); } @@ -199,16 +217,25 @@ public class ParsingEngine { throw new InvalidArgumentException( invalidArguments ); } - // Find invalid argument values (arguments that fail the regexp test. + // Find invalid argument values -- invalid arguments are either completely missing or fail the specified 'validation' regular expression. if( !skipValidationOf.contains(ValidationType.InvalidArgumentValue) ) { Collection verifiableArguments = argumentDefinitions.findArgumentDefinitions( null, ArgumentDefinitions.VerifiableDefinitionMatcher ); Collection> invalidValues = new ArrayList>(); for( ArgumentDefinition verifiableArgument: verifiableArguments ) { ArgumentMatches verifiableMatches = argumentMatches.findMatches( verifiableArgument ); + // Check to see whether an argument value was specified. Argument values must be provided + // when the argument name is specified and the argument is not a flag type. + for(ArgumentMatch verifiableMatch: verifiableMatches) { + ArgumentSource argumentSource = argumentSourcesByDefinition.get(verifiableArgument); + if(verifiableMatch.values().size() == 0 && !verifiableArgument.isFlag && argumentSource.createsTypeDefault()) + invalidValues.add(new Pair(verifiableArgument,null)); + } + + // Ensure that the field contents meet the validation criteria specified by the regular expression. for( ArgumentMatch verifiableMatch: verifiableMatches ) { for( String value: verifiableMatch.values() ) { - if( !value.matches(verifiableArgument.validation) ) + if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) ) invalidValues.add( new Pair(verifiableArgument, value) ); } } @@ -304,7 +331,17 @@ public class ParsingEngine { if(!tags.containsKey(key)) return new Tags(); return tags.get(key); - } + } + + /** + * Add a RodBinding type argument to this parser. Called during parsing to allow + * us to track all of the RodBindings discovered in the command line. + * @param rodBinding the rodbinding to add. Must not be added twice + */ + @Requires("rodBinding != null") + public void addRodBinding(final RodBinding rodBinding) { + rodBindings.add(rodBinding); + } /** * Notify the user that a deprecated command-line argument has been used. @@ -344,6 +381,10 @@ public class ParsingEngine { } } + public Collection getRodBindings() { + return Collections.unmodifiableCollection(rodBindings); + } + /** * Gets a collection of the container instances of the given type stored within the given target. * @param source Argument source. @@ -390,7 +431,6 @@ public class ParsingEngine { return ArgumentTypeDescriptor.selectBest(argumentTypeDescriptors,type); } - private List extractArgumentSources(Class sourceClass, Field[] parentFields) { // now simply call into the truly general routine extract argument bindings but with a null // object so bindings aren't computed @@ -515,10 +555,14 @@ class InvalidArgumentValueException extends ArgumentException { private static String formatArguments( Collection> invalidArgumentValues ) { StringBuilder sb = new StringBuilder(); for( Pair invalidValue: invalidArgumentValues ) { - sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)", - invalidValue.first.fullName, - invalidValue.second, - invalidValue.first.validation) ); + if(invalidValue.getSecond() == null) + sb.append( String.format("%nArgument '--%s' requires a value but none was provided", + invalidValue.first.fullName) ); + else + sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)", + invalidValue.first.fullName, + invalidValue.second, + invalidValue.first.validation) ); } return sb.toString(); } diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java new file mode 100644 index 000000000..41b5bf6f3 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broad.tribble.Feature; + +import java.util.*; + +/** + * A RodBinding representing a walker argument that gets bound to a ROD track. + * + * The RodBinding is a formal GATK argument that bridges between a walker and + * the RefMetaDataTracker to obtain data about this rod track at runtime. The RodBinding + * is explicitly typed with type of the Tribble.Feature expected to be produced by this + * argument. The GATK Engine takes care of initializing the binding and connecting it + * to the RMD system. + * + * It is recommended that optional RodBindings be initialized to the value returned + * by the static method makeUnbound(). + * + * Note that this class is immutable. + */ +public final class RodBinding { + protected final static String UNBOUND_VARIABLE_NAME = ""; + protected final static String UNBOUND_SOURCE = "UNBOUND"; + protected final static String UNBOUND_TRIBBLE_TYPE = ""; + + /** + * Create an unbound Rodbinding of type. This is the correct programming + * style for an optional RodBinding + * + * At Input() + * RodBinding x = RodBinding.makeUnbound(T.class) + * + * The unbound binding is guaranteed to never match any binding. It uniquely + * returns false to isBound(). + * + * @param type the Class type produced by this unbound object + * @param any class extending Tribble Feature + * @return the UNBOUND RodBinding producing objects of type T + */ + @Requires("type != null") + public final static RodBinding makeUnbound(Class type) { + return new RodBinding(type); + } + + /** The name of this binding. Often the name of the field itself, but can be overridden on cmdline */ + final private String name; + /** where the data for this ROD is coming from. A file or special value if coming from stdin */ + final private String source; + /** the string name of the tribble type, such as vcf, bed, etc. */ + final private String tribbleType; + /** The command line tags associated with this RodBinding */ + final private Tags tags; + /** The Java class expected for this RodBinding. Must correspond to the type emited by Tribble */ + final private Class type; + /** True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments */ + final private boolean bound; + + /** + * The name counter. This is how we create unique names for collections of RodBindings + * on the command line. If you have provide the GATK with -X file1 and -X file2 to a + * RodBinding argument as List> then each binding will receive automatically + * the name of X and X2. + */ + final private static Map nameCounter = new HashMap(); + + /** for UnitTests */ + final public static void resetNameCounter() { + nameCounter.clear(); + } + + @Requires("rawName != null") + @Ensures("result != null") + final private static synchronized String countedVariableName(final String rawName) { + Integer count = nameCounter.get(rawName); + if ( count == null ) { + nameCounter.put(rawName, 1); + return rawName; + } else { + nameCounter.put(rawName, count + 1); + return rawName + (count + 1); + } + } + + @Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"}) + public RodBinding(Class type, final String rawName, final String source, final String tribbleType, final Tags tags) { + this.type = type; + this.name = countedVariableName(rawName); + this.source = source; + this.tribbleType = tribbleType; + this.tags = tags; + this.bound = true; + } + + /** + * Make an unbound RodBinding. Only available for creating the globally unique UNBOUND object + * @param type class this unbound RodBinding creates + */ + @Requires({"type != null"}) + private RodBinding(Class type) { + this.type = type; + this.name = UNBOUND_VARIABLE_NAME; // special value can never be found in RefMetaDataTracker + this.source = UNBOUND_SOURCE; + this.tribbleType = UNBOUND_TRIBBLE_TYPE; + this.tags = new Tags(); + this.bound = false; + } + + + /** + * @return True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments + */ + final public boolean isBound() { + return bound; + } + + /** + * @return The name of this binding. Often the name of the field itself, but can be overridden on cmdline + */ + @Ensures({"result != null"}) + final public String getName() { + return name; + } + + /** + * @return the string name of the tribble type, such as vcf, bed, etc. + */ + @Ensures({"result != null"}) + final public Class getType() { + return type; + } + + /** + * @return where the data for this ROD is coming from. A file or special value if coming from stdin + */ + @Ensures({"result != null"}) + final public String getSource() { + return source; + } + + /** + * @return The command line tags associated with this RodBinding. Will include the tags used to + * determine the name and type of this RodBinding + */ + @Ensures({"result != null"}) + final public Tags getTags() { + return tags; + } + + /** + * @return The Java class expected for this RodBinding. Must correspond to the type emited by Tribble + */ + @Ensures({"result != null"}) + final public String getTribbleType() { + return tribbleType; + } + + @Override + public String toString() { + return String.format("(RodBinding name=%s source=%s)", getName(), getSource()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index a080ab439..32132c7ca 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -25,21 +25,20 @@ package org.broadinstitute.sting.gatk; -import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; -import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; -import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.text.ListFileUtils; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; +import java.util.*; /** * @author aaron @@ -64,6 +63,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram { */ private final Collection argumentSources = new ArrayList(); + protected static Logger logger = Logger.getLogger(CommandLineExecutable.class); + /** * this is the function that the inheriting class can expect to have called * when the command line system has initialized. @@ -81,7 +82,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram { // File lists can require a bit of additional expansion. Set these explicitly by the engine. engine.setSAMFileIDs(ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles,parser)); - engine.setReferenceMetaDataFiles(ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings,getArgumentCollection().DBSNPFile,parser)); engine.setWalker(walker); walker.setToolkit(engine); @@ -96,6 +96,25 @@ public abstract class CommandLineExecutable extends CommandLineProgram { loadArgumentsIntoObject(walker); argumentSources.add(walker); + Collection newStyle = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser); + + // todo: remove me when the old style system is removed + if ( getArgumentCollection().RODBindings.size() > 0 ) { + logger.warn("################################################################################"); + logger.warn("################################################################################"); + logger.warn("Deprecated -B rod binding syntax detected. This syntax will be retired in GATK 1.2."); + logger.warn("Please use arguments defined by each specific walker instead."); + for ( String oldStyleRodBinding : getArgumentCollection().RODBindings ) { + logger.warn(" -B rod binding with value " + oldStyleRodBinding + " tags: " + parser.getTags(oldStyleRodBinding).getPositionalTags()); + } + logger.warn("################################################################################"); + logger.warn("################################################################################"); + } + + Collection oldStyle = ListFileUtils.unpackRODBindingsOldStyle(getArgumentCollection().RODBindings, parser); + oldStyle.addAll(newStyle); + engine.setReferenceMetaDataFiles(oldStyle); + for (ReadFilter filter: filters) { loadArgumentsIntoObject(filter); argumentSources.add(filter); @@ -112,6 +131,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram { return 0; } + /** * Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled. * This report will be written to either STDOUT or to the run repository, depending on the options @@ -142,7 +162,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram { */ protected Collection getArgumentTypeDescriptors() { return Arrays.asList( new VCFWriterArgumentTypeDescriptor(engine,System.out,argumentSources), - new SAMFileReaderArgumentTypeDescriptor(engine), new SAMFileWriterArgumentTypeDescriptor(engine,System.out), new OutputStreamArgumentTypeDescriptor(engine,System.out) ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 2af29ea70..7e96b609e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -36,6 +36,8 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.GATKDocUtils; +import org.broadinstitute.sting.utils.help.GATKDoclet; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; @@ -175,12 +177,8 @@ public class CommandLineGATK extends CommandLineExecutable { StringBuilder additionalHelp = new StringBuilder(); Formatter formatter = new Formatter(additionalHelp); - formatter.format("Description:%n"); - - WalkerManager walkerManager = engine.getWalkerManager(); - String walkerHelpText = walkerManager.getWalkerDescriptionText(walkerType); - - printDescriptorLine(formatter,WALKER_INDENT,"",WALKER_INDENT,FIELD_SEPARATOR,walkerHelpText,TextFormattingUtils.DEFAULT_LINE_WIDTH); + formatter.format("For a full description of this walker, see its GATKdocs at:%n"); + formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType)); return additionalHelp.toString(); } @@ -194,8 +192,6 @@ public class CommandLineGATK extends CommandLineExecutable { StringBuilder additionalHelp = new StringBuilder(); Formatter formatter = new Formatter(additionalHelp); - formatter.format("Available analyses:%n"); - // Get the list of walker names from the walker manager. WalkerManager walkerManager = engine.getWalkerManager(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 918bc1251..b0c4e203b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.walkers.*; @@ -370,33 +370,6 @@ public class GenomeAnalysisEngine { throw new ArgumentException("Walker does not allow a reference but one was provided."); } - /** - * Verifies that all required reference-ordered data has been supplied, and any reference-ordered data that was not - * 'allowed' is still present. - * - * @param rods Reference-ordered data to load. - */ - protected void validateSuppliedReferenceOrderedData(List rods) { - // Check to make sure that all required metadata is present. - List allRequired = WalkerManager.getRequiredMetaData(walker); - for (RMD required : allRequired) { - boolean found = false; - for (ReferenceOrderedDataSource rod : rods) { - if (rod.matchesNameAndRecordType(required.name(), required.type())) - found = true; - } - if (!found) - throw new ArgumentException(String.format("Walker requires reference metadata to be supplied named '%s' of type '%s', but this metadata was not provided. " + - "Please supply the specified metadata file.", required.name(), required.type().getSimpleName())); - } - - // Check to see that no forbidden rods are present. - for (ReferenceOrderedDataSource rod : rods) { - if (!WalkerManager.isAllowed(walker, rod)) - throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s", walker.getClass(), rod.getName())); - } - } - protected void validateSuppliedIntervals() { // Only read walkers support '-L unmapped' intervals. Trap and validate any other instances of -L unmapped. if(!(walker instanceof ReadWalker)) { @@ -926,9 +899,6 @@ public class GenomeAnalysisEngine { GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) { RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType); - // try and make the tracks given their requests - // create of live instances of the tracks - List tracks = new ArrayList(); List dataSources = new ArrayList(); for (RMDTriplet fileDescriptor : referenceMetaDataFiles) @@ -939,7 +909,6 @@ public class GenomeAnalysisEngine { flashbackData())); // validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match. - validateSuppliedReferenceOrderedData(dataSources); validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder); return dataSources; diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index 6aeb42faa..f053c299c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -177,19 +177,7 @@ public class WalkerManager extends PluginManager { * @return The list of allowed reference meta data. */ public static List getAllowsMetaData(Class walkerClass) { - Allows allowsDataSource = getWalkerAllowed(walkerClass); - if (allowsDataSource == null) - return Collections.emptyList(); - return Arrays.asList(allowsDataSource.referenceMetaData()); - } - - /** - * Get a list of RODs allowed by the walker. - * @param walker Walker to query. - * @return The list of allowed reference meta data. - */ - public static List getAllowsMetaData(Walker walker) { - return getAllowsMetaData(walker.getClass()); + return Collections.emptyList(); } /** @@ -226,24 +214,7 @@ public class WalkerManager extends PluginManager { * @return True if the walker forbids this data type. False otherwise. */ public static boolean isAllowed(Class walkerClass, ReferenceOrderedDataSource rod) { - Allows allowsDataSource = getWalkerAllowed(walkerClass); - - // Allows is less restrictive than requires. If an allows - // clause is not specified, any kind of data is allowed. - if( allowsDataSource == null ) - return true; - - // The difference between unspecified RMD and the empty set of metadata can't be detected. - // Treat an empty 'allows' as 'allow everything'. Maybe we can have a special RMD flag to account for this - // case in the future. - if( allowsDataSource.referenceMetaData().length == 0 ) - return true; - - for( RMD allowed: allowsDataSource.referenceMetaData() ) { - if( rod.matchesNameAndRecordType(allowed.name(),allowed.type()) ) - return true; - } - return false; + return true; } /** @@ -283,8 +254,7 @@ public class WalkerManager extends PluginManager { * @return The list of required reference meta data. */ public static List getRequiredMetaData(Class walkerClass) { - Requires requiresDataSource = getWalkerRequirements(walkerClass); - return Arrays.asList(requiresDataSource.referenceMetaData()); + return Collections.emptyList(); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java similarity index 66% rename from public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java rename to public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java index 8dace8fe4..b77b175bc 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java @@ -23,8 +23,26 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.datasources.reference; +package org.broadinstitute.sting.gatk.arguments; + + +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.simpleframework.xml.*; + +/** + * @author ebanks + * @version 1.0 + */ +@Root +public class DbsnpArgumentCollection { + + /** + * A dbSNP VCF file. + */ + @Input(fullName="dbsnp", shortName = "D", doc="dbSNP file", required=false) + public RodBinding dbsnp = RodBinding.makeUnbound(VariantContext.class); -public interface ReferenceDataSourceProgressListener { - public void percentProgress(int percent); } + diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index ee2e85025..62135f21b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -117,11 +117,6 @@ public class GATKArgumentCollection { @Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false) public boolean nonDeterministicRandomSeed = false; - - @Element(required = false) - @Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false) - public String DBSNPFile = null; - /** * The override mechanism in the GATK, by default, populates the command-line arguments, then * the defaults from the walker annotations. Unfortunately, walker annotations should be trumped @@ -380,9 +375,6 @@ public class GATKArgumentCollection { if (!other.excludeIntervals.equals(this.excludeIntervals)) { return false; } - if (!other.DBSNPFile.equals(this.DBSNPFile)) { - return false; - } if (!other.unsafe.equals(this.unsafe)) { return false; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java new file mode 100644 index 000000000..847120414 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.arguments; + + +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.simpleframework.xml.Root; + +/** + * @author ebanks + * @version 1.0 + */ +@Root +public class StandardVariantContextInputArgumentCollection { + + /** + * The VCF file we are using. + * + * Variants from this file are used by this tool as input. + */ + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; + +} + diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java index 223659a46..d065635c8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java @@ -1,8 +1,10 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; import java.util.ArrayList; @@ -49,11 +51,14 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView { * @param loc Locus at which to track. * @return A tracker containing information about this locus. */ - public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) { - RefMetaDataTracker tracks = new RefMetaDataTracker(states.size()); + public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) { + List bindings = states.isEmpty() ? Collections.emptyList() : new ArrayList(states.size()); + for ( ReferenceOrderedDataState state: states ) - tracks.bind( state.dataSource.getName(), state.iterator.seekForward(loc) ); - return tracks; + // todo -- warning, I removed the reference to the name from states + bindings.add( state.iterator.seekForward(loc) ); + + return new RefMetaDataTracker(bindings, referenceContext); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java index 2d46a85ac..939cbfe35 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java @@ -1,8 +1,9 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; public interface ReferenceOrderedView extends View { - RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ); + RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext refContext ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 39c632539..c38b09334 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; @@ -45,7 +46,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { */ private RODMergingIterator rodQueue = null; - RefMetaDataTracker tracker = null; + Collection allTracksHere; + GenomeLoc lastLoc = null; RODRecordList interval = null; @@ -94,12 +96,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { } rodQueue = new RODMergingIterator(iterators); - - //throw new StingException("RodLocusView currently disabled"); } - public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) { - return tracker; + public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) { + // special case the interval again -- add it into the ROD + if ( interval != null ) { allTracksHere.add(interval); } + return new RefMetaDataTracker(allTracksHere, referenceContext); } public boolean hasNext() { @@ -122,10 +124,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { if ( DEBUG ) System.out.printf("In RodLocusView.next(): creating tracker...%n"); - // Update the tracker here for use - Collection allTracksHere = getSpanningTracks(datum); - tracker = createTracker(allTracksHere); - + allTracksHere = getSpanningTracks(datum); GenomeLoc rodSite = datum.getLocation(); GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart()); @@ -137,19 +136,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases); } - private RefMetaDataTracker createTracker( Collection allTracksHere ) { - RefMetaDataTracker t = new RefMetaDataTracker(allTracksHere.size()); - for ( RODRecordList track : allTracksHere ) { - if ( ! t.hasROD(track.getName()) ) - t.bind(track.getName(), track); - } - - // special case the interval again -- add it into the ROD - if ( interval != null ) { t.bind(interval.getName(), interval); } - - return t; - } - private Collection getSpanningTracks(RODRecordList marker) { return rodQueue.allElementsLTE(marker); } @@ -197,10 +183,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { return getSkippedBases(getLocOneBeyondShard()); } - public RefMetaDataTracker getTracker() { - return tracker; - } - /** * Closes the current view. */ @@ -209,6 +191,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { state.dataSource.close( state.iterator ); rodQueue = null; - tracker = null; + allTracksHere = null; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 6064806f3..572970349 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -893,6 +893,7 @@ public class SAMDataSource { * Custom representation of interval bounds. * Makes it simpler to track current position. */ + private int[] intervalContigIndices; private int[] intervalStarts; private int[] intervalEnds; @@ -917,12 +918,14 @@ public class SAMDataSource { if(foundMappedIntervals) { if(keepOnlyUnmappedReads) throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals. Please apply this filter to only mapped or only unmapped reads"); + this.intervalContigIndices = new int[intervals.size()]; this.intervalStarts = new int[intervals.size()]; this.intervalEnds = new int[intervals.size()]; int i = 0; for(GenomeLoc interval: intervals) { - intervalStarts[i] = (int)interval.getStart(); - intervalEnds[i] = (int)interval.getStop(); + intervalContigIndices[i] = interval.getContigIndex(); + intervalStarts[i] = interval.getStart(); + intervalEnds[i] = interval.getStop(); i++; } } @@ -961,11 +964,10 @@ public class SAMDataSource { while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) { if(!keepOnlyUnmappedReads) { // Mapped read filter; check against GenomeLoc-derived bounds. - if(candidateRead.getAlignmentEnd() >= intervalStarts[currentBound] || - (candidateRead.getReadUnmappedFlag() && candidateRead.getAlignmentStart() >= intervalStarts[currentBound])) { - // This read ends after the current interval begins (or, if unmapped, starts within the bounds of the interval. + if(readEndsOnOrAfterStartingBound(candidateRead)) { + // This read ends after the current interval begins. // Promising, but this read must be checked against the ending bound. - if(candidateRead.getAlignmentStart() <= intervalEnds[currentBound]) { + if(readStartsOnOrBeforeEndingBound(candidateRead)) { // Yes, this read is within both bounds. This must be our next read. nextRead = candidateRead; break; @@ -993,6 +995,37 @@ public class SAMDataSource { candidateRead = iterator.next(); } } + + /** + * Check whether the read lies after the start of the current bound. If the read is unmapped but placed, its + * end will be distorted, so rely only on the alignment start. + * @param read The read to position-check. + * @return True if the read starts after the current bounds. False otherwise. + */ + private boolean readEndsOnOrAfterStartingBound(final SAMRecord read) { + return + // Read ends on a later contig, or... + read.getReferenceIndex() > intervalContigIndices[currentBound] || + // Read ends of this contig... + (read.getReferenceIndex() == intervalContigIndices[currentBound] && + // either after this location, or... + (read.getAlignmentEnd() >= intervalStarts[currentBound] || + // read is unmapped but positioned and alignment start is on or after this start point. + (read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound]))); + } + + /** + * Check whether the read lies before the end of the current bound. + * @param read The read to position-check. + * @return True if the read starts after the current bounds. False otherwise. + */ + private boolean readStartsOnOrBeforeEndingBound(final SAMRecord read) { + return + // Read starts on a prior contig, or... + read.getReferenceIndex() < intervalContigIndices[currentBound] || + // Read starts on this contig and the alignment start is registered before this end point. + (read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]); + } } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java index ef69a8e5f..c8c79bb14 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java @@ -41,7 +41,7 @@ import java.io.File; * Loads reference data from fasta file * Looks for fai and dict files, and tries to create them if they don't exist */ -public class ReferenceDataSource implements ReferenceDataSourceProgressListener { +public class ReferenceDataSource { private IndexedFastaSequenceFile index; /** our log, which we want to capture anything from this class */ @@ -75,7 +75,7 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener // get exclusive lock if (!indexLock.exclusiveLock()) throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile); - FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, this); + FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, true); FastaSequenceIndex sequenceIndex = faiBuilder.createIndex(); FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile); } @@ -194,13 +194,4 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener public IndexedFastaSequenceFile getReference() { return this.index; } - - /** - * Notify user of progress in creating fai file - * @param percent Percent of fasta file read as a percent - */ - public void percentProgress(int percent) { - System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percent)); - } - } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java index abd5929eb..9d5a54f58 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.rmd; import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java index 60b68bda5..18679dd77 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java @@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.utils.GenomeLoc; @@ -110,11 +110,11 @@ public class ReferenceOrderedDataSource { } public Class getType() { - return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); + return builder.getFeatureManager().getByTriplet(fileDescriptor).getCodecClass(); } public Class getRecordType() { - return builder.createCodec(getType(),getName()).getFeatureType(); + return builder.getFeatureManager().getByTriplet(fileDescriptor).getFeatureClass(); } public File getFile() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java new file mode 100644 index 000000000..50a1384fa --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.filters; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; + +/** + * A read filter (transformer) that sets all reads mapping quality to a given value. + * + *

+ * If a BAM file contains erroneous or missing mapping qualities, this 'filter' will set + * all your mapping qualities to a given value. Default being 60. + *

+ * + * + *

Input

+ *

+ * BAM file(s) + *

+ * + * + *

Output

+ *

+ * BAM file(s) with all reads mapping qualities reassigned + *

+ * + *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -rf ReassignMappingQuality
+ *      -DMQ 35
+ *  
+ * + * @author carneiro + * @since 8/8/11 + */ + +public class ReassignMappingQualityFilter extends ReadFilter { + + @Argument(fullName = "default_mapping_quality", shortName = "DMQ", doc = "Default read mapping quality to assign to all reads", required = false) + public int defaultMappingQuality = 60; + + public boolean filterOut(SAMRecord rec) { + rec.setMappingQuality(defaultMappingQuality); + return false; + } +} + diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java index 1da03e9c2..ebb4cbe66 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java @@ -87,8 +87,8 @@ public class VCFWriterStorage implements Storage, VCFWriter { writer.writeHeader(stub.getVCFHeader()); } - public void add(VariantContext vc, byte ref) { - writer.add(vc, ref); + public void add(VariantContext vc) { + writer.add(vc); } /** @@ -117,7 +117,7 @@ public class VCFWriterStorage implements Storage, VCFWriter { BasicFeatureSource source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false); for ( VariantContext vc : source.iterator() ) { - target.writer.add(vc, vc.getReferenceBaseForIndel()); + target.writer.add(vc); } source.close(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java index 8bc97c886..8fef10cd6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.OutputStream; import java.lang.reflect.Constructor; +import java.lang.reflect.Type; /** * Insert an OutputStreamStub instead of a full-fledged concrete OutputStream implementations. @@ -78,7 +79,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor { } @Override - public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { ArgumentDefinition definition = createDefaultArgumentDefinition(source); String fileName = getArgumentValue( definition, matches ); @@ -91,7 +92,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor { engine.addOutput(stub); - Object result = createInstanceOfClass(type,stub); + Object result = createInstanceOfClass(makeRawTypeIfNecessary(type),stub); // WARNING: Side effects required by engine! parsingEngine.addTags(result,getArgumentTags(matches)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java index f124c2302..8b3efd7ef 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; import java.io.File; +import java.lang.reflect.Type; /** * Describe how to parse SAMFileReaders. @@ -59,7 +60,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor } @Override - public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { SAMFileReaderBuilder builder = new SAMFileReaderBuilder(); String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 38640eda0..3fdb38b3d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.OutputStream; import java.lang.annotation.Annotation; +import java.lang.reflect.Type; import java.util.Arrays; import java.util.List; @@ -102,7 +103,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor } @Override - public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { // Extract all possible parameters that could be passed to a BAM file writer? ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source); String writerFileName = getArgumentValue( bamArgumentDefinition, matches ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 615841f02..e9eed5339 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.OutputStream; +import java.lang.reflect.Type; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; @@ -124,7 +125,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { * @return Transform from the matches into the associated argument. */ @Override - public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source); // Get the filename for the genotype file, if it exists. If not, we'll need to send output to out. String writerFileName = getArgumentValue(defaultArgumentDefinition,matches); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java index bb84f9457..936243f9d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.io.stubs; +import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.OutputTracker; @@ -177,14 +178,23 @@ public class VCFWriterStub implements Stub, VCFWriter { vcfHeader = header; // Check for the command-line argument header line. If not present, add it in. - VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine(); - boolean foundCommandLineHeaderLine = false; - for(VCFHeaderLine line: vcfHeader.getMetaData()) { - if(line.getKey().equals(commandLineArgHeaderLine.getKey())) - foundCommandLineHeaderLine = true; + if ( !skipWritingHeader ) { + VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine(); + boolean foundCommandLineHeaderLine = false; + for (VCFHeaderLine line: vcfHeader.getMetaData()) { + if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) ) + foundCommandLineHeaderLine = true; + } + if ( !foundCommandLineHeaderLine ) + vcfHeader.addMetaDataLine(commandLineArgHeaderLine); + + // also put in the reference contig header lines + String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName()); + for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() ) + vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly)); + + vcfHeader.addMetaDataLine(new VCFHeaderLine("reference", "file://" + engine.getArguments().referenceFile.getAbsolutePath())); } - if(!foundCommandLineHeaderLine && !skipWritingHeader) - vcfHeader.addMetaDataLine(commandLineArgHeaderLine); outputTracker.getStorage(this).writeHeader(vcfHeader); } @@ -192,8 +202,8 @@ public class VCFWriterStub implements Stub, VCFWriter { /** * @{inheritDoc} */ - public void add(VariantContext vc, byte ref) { - outputTracker.getStorage(this).add(vc,ref); + public void add(VariantContext vc) { + outputTracker.getStorage(this).add(vc); } /** @@ -220,4 +230,27 @@ public class VCFWriterStub implements Stub, VCFWriter { CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class); return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\""); } + + private VCFHeaderLine getContigHeaderLine(SAMSequenceRecord contig, String assembly) { + String val; + if ( assembly != null ) + val = String.format("", contig.getSequenceName(), contig.getSequenceLength(), assembly); + else + val = String.format("", contig.getSequenceName(), contig.getSequenceLength()); + return new VCFHeaderLine("contig", val); + } + + private String getReferenceAssembly(String refPath) { + // This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot + String assembly = null; + if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 ) + assembly = "b37"; + else if ( refPath.indexOf("b36") != -1 ) + assembly = "b36"; + else if ( refPath.indexOf("hg18") != -1 ) + assembly = "hg18"; + else if ( refPath.indexOf("hg19") != -1 ) + assembly = "hg19"; + return assembly; + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java deleted file mode 100644 index ce924fd87..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; -import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.text.XReadLines; - -import java.io.File; -import java.io.FileNotFoundException; -import java.lang.reflect.Constructor; -import java.util.Iterator; -import java.util.regex.Pattern; - -/** - * This is a low-level iterator designed to provide system-wide generic support for reading record-oriented data - * files. The only assumption made is that every line in the file provides a complete and separate data record. The records - * can be associated with coordinates or coordinate intervals, there can be one or more records associated with a given - * position/interval, or intervals can overlap. The records must be comprised of delimited fields, but the format is - * otherwise free. For any specific line-based data format, an appropriate implementation of ReferenceOrderedDatum must be - * provided that is capable of parsing itself from a single line of data. This implementation will be used, - * through reflection mechanism, as a callback to do all the work. - * - * The model is, hence, as follows: - * - * String dataRecord <---> RodImplementation ( ::parseLine(dataRecord.split(delimiter)) is aware of the format and fills - * an instance of RodImplementation with data values from dataRecord line). - * - * - * instantiation of RODRecordIterator(dataFile, trackName, RodImplementation.class) will immediately provide an iterator - * that walks along the dataFile line by line, and on each call to next() returns a new RodImplementation object - * representing a single line (record) of data. The returned object will be initialized with "track name" trackName - - * track names (as returned by ROD.getName()) are often used in other parts of the code to distinguish between - * multiple streams of (possibly heterogeneous) annotation data bound to an application. - * - * This generic iterator skips and ignores a) empty lines, b) lines starting with '#' (comments): they are never sent back - * to the ROD implementation class for processing. - * - * This iterator does not actually check if the ROD records (lines) in the file are indeed ordedered by coordinate, - * and it does not depend on such an order as it still implements a low-level line-based traversal of the data. Higher-level - * iterators/wrappers will perform all the necessary checks. - * - * Note: some data formats/ROD implementations may require a header line in the file. In this case the current (ugly) - * mechanism is as follows: - * 1) rod implementation's ::initialize(file) method should be able to open the file, find and read the header line - * and return the header object (to be kept by the iterator) - * 2) rod implementation's ::parseLine(header,line) method should be capable of making use of that saved header object now served to it - * and - * 3) ::parseLine(header,line) should be able to recognize the original header line in the file and skip it (after ROD's initialize() - * method is called, the iterator will re-open the file and start reading it from the very beginning; there is no - * other way, except for "smart" ::parseLine(), to avoid reading in the header line as "data"). - * - * Created by IntelliJ IDEA. - * User: asivache - * Date: Sep 10, 2009 - * Time: 1:22:23 PM - * To change this template use File | Settings | File Templates. - */ -public class RODRecordIterator implements Iterator { - - private PushbackIterator reader; - - // stores name of the track this iterator reads (will be also returned by getName() of ROD objects - // generated by this iterator) - private String name; - - // we keep the file object, only to use file name in error reports - private File file; - - // rod type; this is what we will instantiate for RODs at runtime - private Class type; - - private Object header = null; // Some RODs may use header - - // field delimiter in the file. Should it be the job of the iterator to split the lines though? RODs can do that! - private String fieldDelimiter; - - // constructor for the ROD objects we are going to return. Constructor that takes the track name as its single arg is required. - private Constructor named_constructor; - - // keep track of the lines we are reading. used for error messages only. - private long linenum = 0; - - private boolean allow_empty = true; - private boolean allow_comments = true; - public static Pattern EMPTYLINE_PATTERN = Pattern.compile("^\\s*$"); - - public RODRecordIterator(File file, String name, Class type) { - try { - reader = new PushbackIterator(new XReadLines(file)); - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(file, e); - } - this.file = file; - this.name = name; - this.type = type; - try { - named_constructor = type.getConstructor(String.class); - } - catch (java.lang.NoSuchMethodException e) { - throw new ReviewedStingException("ROD class "+type.getName()+" does not have constructor that accepts a single String argument (track name)"); - } - ROD rod = instantiateROD(name); - fieldDelimiter = rod.delimiterRegex(); // get delimiter from the ROD itself - try { - header = rod.initialize(file); - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(file, "ROD "+type.getName() + " failed to initialize properly from file "+file); - } - - } - - - /** - * Returns true if the iteration has more elements. (In other - * words, returns true if next would return an element - * rather than throwing an exception.) - * - * @return true if the iterator has more elements. - */ - public boolean hasNext() { - if ( allow_empty || allow_comments ) { - while ( reader.hasNext() ) { - String line = reader.next(); - if ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ) continue; // skip empty line - if ( allow_comments && line.charAt(0) == '#' ) continue; // skip comment lines - // the line is not empty and not a comment line, so we have next after all - reader.pushback(line); - return true; - } - return false; // oops, we end up here if there's nothing left - } else { - return reader.hasNext(); - } - } - - /** - * Returns the next valid ROD record in the file, skipping empty and comment lines. - * - * @return the next element in the iteration. - * @throws java.util.NoSuchElementException - * iteration has no more elements. - */ - public ROD next() { - ROD n = null; - boolean parsed_ok = false; - String line ; - - while ( ! parsed_ok && reader.hasNext() ) { - line = reader.next(); - linenum++; - while ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() || - allow_comments && line.charAt(0) == '#' ) { - if ( reader.hasNext() ) { - line = reader.next(); - linenum++; - } else { - line = null; - break; - } - } - - if ( line == null ) break; // if we ran out of lines while skipping empty lines/comments, then we are done - - String parts[] = line.split(fieldDelimiter); - - try { - n = instantiateROD(name); - parsed_ok = n.parseLine(header,parts) ; - } - catch ( Exception e ) { - throw new UserException.MalformedFile(file, "Failed to parse ROD data ("+type.getName()+") from file "+ file + " at line #"+linenum+ - "\nOffending line: "+line+ - "\nReason ("+e.getClass().getName()+")", e); - } - } - - - return n; - } - - /** - * Removes from the underlying collection the last element returned by the - * iterator (optional operation). This method can be called only once per - * call to next. The behavior of an iterator is unspecified if - * the underlying collection is modified while the iteration is in - * progress in any way other than by calling this method. - * - * @throws UnsupportedOperationException if the remove - * operation is not supported by this Iterator. - * @throws IllegalStateException if the next method has not - * yet been called, or the remove method has already - * been called after the last call to the next - * method. - */ - public void remove() { - throw new UnsupportedOperationException("remove() operation is not supported by RODRecordIterator"); - } - - /** Instantiates appropriate implementation of the ROD used by this iteratot. The 'name' argument is the name - * of the ROD track. - * @param name - * @return - */ - private ROD instantiateROD(final String name) { - try { - return (ROD) named_constructor.newInstance(name); - } catch (Exception e) { - throw new DynamicClassResolutionException(named_constructor.getDeclaringClass(), e); - } - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index d03b122e2..b9aaf47de 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -1,13 +1,15 @@ package org.broadinstitute.sting.gatk.refdata; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import org.apache.log4j.Logger; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; @@ -18,348 +20,406 @@ import java.util.*; * The standard interaction model is: * * Traversal system arrives at a site, which has a bunch of RMDs covering it -Genotype * Traversal calls tracker.bind(name, RMD) for each RMDs in RMDs - * Traversal passes tracker to the walker - * walker calls lookup(name, default) to obtain the RMDs values at this site, or default if none was - * bound at this site. + * Traversal passes creates a tracker and passes it to the walker + * walker calls get(rodBinding) to obtain the RMDs values at this site for the track + * associated with rodBinding. + * + * Note that this is an immutable class. Once created the underlying data structures + * cannot be modified * * User: mdepristo * Date: Apr 3, 2009 * Time: 3:05:23 PM */ public class RefMetaDataTracker { + // TODO: this should be a list, not a map, actually + private final static RODRecordList EMPTY_ROD_RECORD_LIST = new RODRecordListImpl("EMPTY"); + final Map map; - protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class); + final ReferenceContext ref; + final protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class); - public RefMetaDataTracker(int nBindings) { - if ( nBindings == 0 ) + // ------------------------------------------------------------------------------------------ + // + // + // Special ENGINE interaction functions + // + // + // ------------------------------------------------------------------------------------------ + + public RefMetaDataTracker(final Collection allBindings, final ReferenceContext ref) { + this.ref = ref; + + // set up the map + if ( allBindings.isEmpty() ) map = Collections.emptyMap(); - else - map = new HashMap(nBindings); + else { + Map tmap = new HashMap(allBindings.size()); + for ( RODRecordList rod : allBindings ) { + if ( rod != null && ! rod.isEmpty() ) + tmap.put(canonicalName(rod.getName()), rod); + } + + // ensure that no one modifies the map itself + map = Collections.unmodifiableMap(tmap); + } + } + + // ------------------------------------------------------------------------------------------ + // + // + // Generic accessors + // + // + // ------------------------------------------------------------------------------------------ + + /** + * Gets all of the Tribble features spanning this locus, returning them as a list of specific + * type T extending Feature. This function looks across all tracks to find the Features, so + * if you have two tracks A and B each containing 1 Feature, then getValues will return + * a list containing both features. + * + * Note that this function assumes that all of the bound features are instances of or + * subclasses of T. A ClassCastException will occur if this isn't the case. If you want + * to get all Features without any danger of such an exception use the root Tribble + * interface Feature. + * + * @param type The type of the underlying objects bound here + * @param as above + * @return A freshly allocated list of all of the bindings, or an empty list if none are bound. + */ + @Requires({"type != null"}) + @Ensures("result != null") + public List getValues(final Class type) { + return addValues(map.keySet(), type, new ArrayList(), null, false, false); } /** - * get all the reference meta data associated with a track name. - * @param name the name of the track we're looking for - * @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a - * dbSNP RMD this will be a RodDbSNP, etc. + * Provides the same functionality as @link #getValues(Class) but will only include + * Features that start as the GenomeLoc provide onlyAtThisLoc. * - * Important: The list returned by this function is guaranteed not to be null, but may be empty! + * @param type The type of the underlying objects bound here + * @param onlyAtThisLoc + * @param as above + * @return A freshly allocated list of all of the bindings, or an empty list if none are bound. */ - public List getReferenceMetaData(final String name) { - RODRecordList list = getTrackDataByName(name, true); - List objects = new ArrayList(); - if (list == null) return objects; - for (GATKFeature feature : list) - objects.add(feature.getUnderlyingObject()); - return objects; + @Requires({"type != null", "onlyAtThisLoc != null"}) + @Ensures("result != null") + public List getValues(final Class type, final GenomeLoc onlyAtThisLoc) { + return addValues(map.keySet(), type, new ArrayList(), onlyAtThisLoc, true, false); } /** - * get all the reference meta data associated with a track name. - * @param name the name of the track we're looking for - * @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with - * the passed in parameter (false). - * @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a - * dbSNP rod this will be a RodDbSNP, etc. + * Uses the same logic as @link #getValues(Class) but arbitrary select one of the resulting + * elements of the list to return. That is, if there would be two elements in the result of + * @link #getValues(Class), one of these two is selected, and which one it will be isn't + * specified. Consequently, this method is only really safe if (1) you absolutely know + * that only one binding will meet the constraints of @link #getValues(Class) or (2) + * you truly don't care which of the multiple bindings available you are going to examine. * - * Important: The list returned by this function is guaranteed not to be null, but may be empty! + * If there are no bindings here, getFirstValue() return null + * + * @param type The type of the underlying objects bound here + * @param as above + * @return A random single element the RODs bound here, or null if none are bound. */ - public List getReferenceMetaData(final String name, boolean requireExactMatch) { - RODRecordList list = getTrackDataByName(name, requireExactMatch); - List objects = new ArrayList(); - if (list == null) return objects; - for (GATKFeature feature : list) - objects.add(feature.getUnderlyingObject()); - return objects; + @Requires({"type != null"}) + public T getFirstValue(final Class type) { + return safeGetFirst(getValues(type)); } /** - * get all the GATK features associated with a specific track name - * @param name the name of the track we're looking for - * @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with - * the passed in parameter (false). - * @return a list of GATKFeatures for the target rmd + * Uses the same logic as @link #getValue(Class,GenomeLoc) to determine the list + * of eligible Features and @link #getFirstValue(Class) to select a single + * element from the interval list. * - * Important: The list returned by this function is guaranteed not to be null, but may be empty! + * @param type The type of the underlying objects bound here + * @param as above + * @param onlyAtThisLoc only Features starting at this site are considered + * @return A random single element the RODs bound here starting at onlyAtThisLoc, or null if none are bound. */ - public List getGATKFeatureMetaData(final String name, boolean requireExactMatch) { - List feat = getTrackDataByName(name,requireExactMatch); - return (feat == null) ? new ArrayList() : feat; // to satisfy the above requirement that we don't return null + @Requires({"type != null", "onlyAtThisLoc != null"}) + public T getFirstValue(final Class type, final GenomeLoc onlyAtThisLoc) { + return safeGetFirst(getValues(type, onlyAtThisLoc)); + } /** - * get a singleton record, given the name and a type. This function will return the first record at the current position seen, - * and emit a logger warning if there were more than one option. + * Gets all of the Tribble features bound to RodBinding spanning this locus, returning them as + * a list of specific type T extending Feature. * - * WARNING: this method is deprecated, since we now suppport more than one RMD at a single position for all tracks. If there are - * are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets - * picked may change from time to time! BE WARNED! - * - * @param name the name of the track - * @param clazz the underlying type to return - * @param the type to parameterize on, matching the clazz argument - * @return a record of type T, or null if no record is present. + * Note that this function assumes that all of the bound features are instances of or + * subclasses of T. A ClassCastException will occur if this isn't the case. + * + * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched + * @param The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features + * @return A freshly allocated list of all of the bindings, or an empty list if none are bound. */ - @Deprecated - public T lookup(final String name, Class clazz) { - RODRecordList objects = getTrackDataByName(name, true); + @Requires({"rodBinding != null"}) + @Ensures("result != null") + public List getValues(final RodBinding rodBinding) { + return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList(1), getTrackDataByName(rodBinding), null, false, false); + } - // if emtpy or null return null; - if (objects == null || objects.size() < 1) return null; + /** + * Gets all of the Tribble features bound to any RodBinding in rodBindings, + * spanning this locus, returning them as a list of specific type T extending Feature. + * + * Note that this function assumes that all of the bound features are instances of or + * subclasses of T. A ClassCastException will occur if this isn't the case. + * + * @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched + * @param The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features + * @return A freshly allocated list of all of the bindings, or an empty list if none are bound. + */ + @Requires({"rodBindings != null"}) + @Ensures("result != null") + public List getValues(final Collection> rodBindings) { + List results = new ArrayList(1); + for ( RodBinding rodBinding : rodBindings ) + results.addAll(getValues(rodBinding)); + return results; + } - if (objects.size() > 1) - logger.info("lookup is choosing the first record from " + (objects.size() - 1) + " options"); + /** + * The same logic as @link #getValues(RodBinding) but enforces that each Feature start at onlyAtThisLoc + * + * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched + * @param The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features + * @param onlyAtThisLoc only Features starting at this site are considered + * @return A freshly allocated list of all of the bindings, or an empty list if none are bound. + */ + @Requires({"rodBinding != null", "onlyAtThisLoc != null"}) + @Ensures("result != null") + public List getValues(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { + return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList(1), getTrackDataByName(rodBinding), onlyAtThisLoc, true, false); + } - Object obj = objects.get(0).getUnderlyingObject(); - if (!(clazz.isAssignableFrom(obj.getClass()))) - throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString() - + " it's of type " + obj.getClass()); + /** + * The same logic as @link #getValues(List) but enforces that each Feature start at onlyAtThisLoc + * + * @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched + * @param The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features + * @param onlyAtThisLoc only Features starting at this site are considered + * @return A freshly allocated list of all of the bindings, or an empty list if none are bound. + */ + @Requires({"rodBindings != null", "onlyAtThisLoc != null"}) + @Ensures("result != null") + public List getValues(final Collection> rodBindings, final GenomeLoc onlyAtThisLoc) { + List results = new ArrayList(1); + for ( RodBinding rodBinding : rodBindings ) + results.addAll(getValues(rodBinding, onlyAtThisLoc)); + return results; + } - return (T)obj; + /** + * Uses the same logic as @getValues(RodBinding) to determine the list + * of eligible Features and select a single element from the resulting set + * of eligible features. + * + * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched + * @param as above + * @return A random single element the eligible Features found, or null if none are bound. + */ + @Requires({"rodBinding != null"}) + public T getFirstValue(final RodBinding rodBinding) { + return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), null, false, true)); + } + + /** + * Uses the same logic as @getValues(RodBinding, GenomeLoc) to determine the list + * of eligible Features and select a single element from the resulting set + * of eligible features. + * + * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched + * @param as above + * @param onlyAtThisLoc only Features starting at this site are considered + * @return A random single element the eligible Features found, or null if none are bound. + */ + @Requires({"rodBinding != null", "onlyAtThisLoc != null"}) + public T getFirstValue(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { + return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), onlyAtThisLoc, true, true)); + } + + /** + * Uses the same logic as @getValues(List) to determine the list + * of eligible Features and select a single element from the resulting set + * of eligible features. + * + * @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched + * @param as above + * @return A random single element the eligible Features found, or null if none are bound. + */ + @Requires({"rodBindings != null"}) + public T getFirstValue(final Collection> rodBindings) { + for ( RodBinding rodBinding : rodBindings ) { + T val = getFirstValue(rodBinding); + if ( val != null ) + return val; + } + return null; + } + + /** + * Uses the same logic as @getValues(RodBinding,GenomeLoc) to determine the list + * of eligible Features and select a single element from the resulting set + * of eligible features. + * + * @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched + * @param as above + * @param onlyAtThisLoc only Features starting at this site are considered + * @return A random single element the eligible Features found, or null if none are bound. + */ + @Requires({"rodBindings != null", "onlyAtThisLoc != null"}) + public T getFirstValue(final Collection> rodBindings, final GenomeLoc onlyAtThisLoc) { + for ( RodBinding rodBinding : rodBindings ) { + T val = getFirstValue(rodBinding, onlyAtThisLoc); + if ( val != null ) + return val; + } + return null; } /** * Is there a binding at this site to a ROD/track with the specified name? * - * @param name the name of the rod - * @return true if it has the rod + * @param rodBinding the rod binding we want to know about + * @return true if any Features are bound in this tracker to rodBinding */ - public boolean hasROD(final String name) { - return map.containsKey(canonicalName(name)); - } - - - /** - * Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records - * at the current site, they all will be added to the list as separate elements. - * - * @return collection of all rods - */ - public Collection getAllRods() { - List l = new ArrayList(); - for ( RODRecordList rl : map.values() ) { - if ( rl == null ) continue; // how do we get null value stored for a track? shouldn't the track be missing from the map alltogether? - l.addAll(rl); - } - return l; - + @Requires({"rodBinding != null"}) + public boolean hasValues(final RodBinding rodBinding) { + return map.containsKey(canonicalName(rodBinding.getName())); } /** * Get all of the RMD tracks at the current site. Each track is returned as a single compound * object (RODRecordList) that may contain multiple RMD records associated with the current site. * - * @return collection of all tracks + * @return List of all tracks */ - public Collection getBoundRodTracks() { - LinkedList bound = new LinkedList(); - - for ( RODRecordList value : map.values() ) { - if ( value != null && value.size() != 0 ) bound.add(value); - } - - return bound; + public List getBoundRodTracks() { + return new ArrayList(map.values()); } /** - * @return the number of ROD bindings (name -> value) where value is not empty in this tracker + * The number of tracks with at least one value bound here + * @return the number of tracks with at least one bound Feature */ - public int getNBoundRodTracks() { - return getNBoundRodTracks(null); + public int getNTracksWithBoundFeatures() { + return map.size(); } - public int getNBoundRodTracks(final String excludeIn ) { - final String exclude = excludeIn == null ? null : canonicalName(excludeIn); + // ------------------------------------------------------------------------------------------ + // + // + // old style accessors + // + // TODO -- DELETE ME + // + // + // ------------------------------------------------------------------------------------------ - int n = 0; - for ( RODRecordList value : map.values() ) { - if ( value != null && ! value.isEmpty() ) { - if ( exclude == null || ! value.getName().equals(exclude) ) - n++; - } - } - - return n; + @Deprecated + public boolean hasValues(final String name) { + return map.containsKey(canonicalName(name)); } + @Deprecated + public List getValues(final Class type, final String name) { + return addValues(name, type, new ArrayList(), getTrackDataByName(name), null, false, false); + } + @Deprecated + public List getValues(final Class type, final String name, final GenomeLoc onlyAtThisLoc) { + return addValues(name, type, new ArrayList(), getTrackDataByName(name), onlyAtThisLoc, true, false); + } + @Deprecated + public List getValues(final Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { + return addValues(names, type, new ArrayList(), onlyAtThisLoc, true, false); + } + @Deprecated + public T getFirstValue(final Class type, final String name) { + return safeGetFirst(getValues(type, name)); + } + @Deprecated + public T getFirstValue(final Class type, final String name, final GenomeLoc onlyAtThisLoc) { + return safeGetFirst(getValues(type, name, onlyAtThisLoc)); + } + + // ------------------------------------------------------------------------------------------ + // + // + // Private utility functions + // + // + // ------------------------------------------------------------------------------------------ /** - * Binds the list of reference ordered data records (RMDs) to track name at this site. Should be used only by the traversal - * system to provide access to RMDs in a structured way to the walkers. + * Helper function for getFirst() operations that takes a list of and + * returns the first element, or null if no such element exists. * - * @param name the name of the track - * @param rod the collection of RMD data - */ - public void bind(final String name, RODRecordList rod) { - //logger.debug(String.format("Binding %s to %s", name, rod)); - map.put(canonicalName(name), rod); - } - - - /** - * Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number - * of entries per ROD. - * The name of each VariantContext corresponds to the ROD name. - * - * @param ref reference context - * @return variant context - */ - public Collection getAllVariantContexts(ReferenceContext ref) { - return getAllVariantContexts(ref, null, null, false, false); - } - - /** - * Returns all of the variant contexts that start at the current location - * @param ref - * @param curLocation + * @param l + * @param * @return */ - public Collection getAllVariantContexts(ReferenceContext ref, GenomeLoc curLocation) { - return getAllVariantContexts(ref, null, curLocation, true, false); + @Requires({"l != null"}) + final private T safeGetFirst(final List l) { + return l.isEmpty() ? null : l.get(0); } - /** - * Converts all possible ROD tracks to VariantContexts objects. If allowedTypes != null, then only - * VariantContexts in the allow set of types will be returned. If requireStartsHere is true, then curLocation - * must not be null, and only records whose start position is == to curLocation.getStart() will be returned. - * If takeFirstOnly is true, then only a single VariantContext will be converted from any individual ROD. Of course, - * this single object must pass the allowed types and start here options if provided. Note that the result - * may return multiple VariantContexts with the same name if that particular track contained multiple RODs spanning - * the current location. - * - * The name of each VariantContext corresponds to the ROD name. - * - * @param ref reference context - * @param allowedTypes allowed types - * @param curLocation location - * @param requireStartHere do we require the rod to start at this location? - * @param takeFirstOnly do we take the first rod only? - * @return variant context - */ - public Collection getAllVariantContexts(ReferenceContext ref, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { - List contexts = new ArrayList(); - - for ( RODRecordList rodList : getBoundRodTracks() ) { - addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly); - } - - return contexts; - } - - /** - * Gets the variant contexts associated with track name name - * - * see getVariantContexts for more information. - * - * @param ref ReferenceContext to enable conversion to variant context - * @param name name - * @param curLocation location - * @param allowedTypes allowed types - * @param requireStartHere do we require the rod to start at this location? - * @param takeFirstOnly do we take the first rod only? - * @return variant context - */ -// public Collection getVariantContexts(String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { -// return getVariantContexts(null, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly); -// } - - public Collection getVariantContexts(ReferenceContext ref, String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { - return getVariantContexts(ref, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly); - } - -// public Collection getVariantContexts(Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { -// return getVariantContexts(null, names, allowedTypes, curLocation, requireStartHere, takeFirstOnly); -// } - - public Collection getVariantContexts(ReferenceContext ref, Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { - Collection contexts = new ArrayList(); - + private List addValues(final Collection names, + final Class type, + List values, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { for ( String name : names ) { - RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match - - if ( rodList != null ) - addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly ); + RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match + values = addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly ); + if ( takeFirstOnly && ! values.isEmpty() ) + break; } - return contexts; - } - - public Collection getVariantContextsByPrefix(ReferenceContext ref, Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { - Collection contexts = new ArrayList(); - - for ( String name : names ) { - RODRecordList rodList = getTrackDataByName(name,false); // require that the name is an exact match - - if ( rodList != null ) - addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly ); - } - - return contexts; - } - - /** - * Gets the variant context associated with name, and assumes the system only has a single bound track at this location. Throws an exception if not. - * see getVariantContexts for more information. - * - * @param name name - * @param curLocation location - * @param allowedTypes allowed types - * @param requireStartHere do we require the rod to start at this location? - * @return variant context - */ - public VariantContext getVariantContext(ReferenceContext ref, String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere ) { - Collection contexts = getVariantContexts(ref, name, allowedTypes, curLocation, requireStartHere, false ); - - if ( contexts.size() > 1 ) - throw new ReviewedStingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation); - else if ( contexts.size() == 0 ) - return null; - else - return contexts.iterator().next(); - } - - /** - * Very simple accessor that gets the first (and only!) VC associated with name at the current location, or - * null if there's no binding here. - * - * @param ref - * @param name - * @param curLocation - * @return - */ - public VariantContext getVariantContext(ReferenceContext ref, String name, GenomeLoc curLocation) { - return getVariantContext(ref, name, null, curLocation, true); + return values; } - private void addVariantContexts(Collection contexts, RODRecordList rodList, ReferenceContext ref, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { + + private List addValues(final String name, + final Class type, + List values, + final RODRecordList rodList, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { for ( GATKFeature rec : rodList ) { - if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) { - // ok, we might actually be able to turn this record in a variant context - VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref); + if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing + Object obj = rec.getUnderlyingObject(); + if (!(type.isAssignableFrom(obj.getClass()))) + throw new UserException.CommandLineException("Unable to cast track named " + name + " to type of " + type.toString() + + " it's of type " + obj.getClass()); - if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted - continue; + T objT = (T)obj; + if ( takeFirstOnly ) { + if ( values == null ) + values = Arrays.asList(objT); + else + values.add(objT); - // now, let's decide if we want to keep it - boolean goodType = allowedTypes == null || allowedTypes.contains(vc.getType()); - boolean goodPos = ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart(); - - if ( goodType && goodPos ) { // ok, we are going to keep this thing - contexts.add(vc); - - if ( takeFirstOnly ) - // we only want the first passing instance, so break the loop over records in rodList - break; + break; + } else { + if ( values == null ) + values = new ArrayList(); + values.add(objT); } } } + + return values == null ? Collections.emptyList() : values; } /** * Finds the reference metadata track named 'name' and returns all ROD records from that track associated - * with the current site as a RODRecordList collection object. If no data track with specified name is available, + * with the current site as a RODRecordList List object. If no data track with specified name is available, * returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up * with track name set to 'name' and location set to null; otherwise the wrapper object will have name and * location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution, @@ -367,29 +427,16 @@ public class RefMetaDataTracker { * for instance, on locus traversal, location is usually expected to be a single base we are currently looking at, * regardless of the presence of "extended" RODs overlapping with that location). * @param name track name - * @param requireExactMatch do we require an exact match of the rod name? * @return track data for the given rod */ - private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) { - //logger.debug(String.format("Lookup %s%n", name)); - + private RODRecordList getTrackDataByName(final String name) { final String luName = canonicalName(name); - RODRecordList trackData = null; + RODRecordList l = map.get(luName); + return l == null ? EMPTY_ROD_RECORD_LIST : l; + } - if ( requireExactMatch ) { - if ( map.containsKey(luName) ) - trackData = map.get(luName); - } else { - for ( Map.Entry datum : map.entrySet() ) { - final String rodName = datum.getKey(); - if ( datum.getValue() != null && rodName.startsWith(luName) ) { - if ( trackData == null ) trackData = new RODRecordListImpl(name); - //System.out.printf("Adding bindings from %s to %s at %s%n", rodName, name, datum.getValue().getLocation()); - ((RODRecordListImpl)trackData).add(datum.getValue(), true); - } - } - } - return trackData; + private RODRecordList getTrackDataByName(final RodBinding binding) { + return getTrackDataByName(binding.getName()); } /** @@ -398,6 +445,7 @@ public class RefMetaDataTracker { * @return canonical name of the rod */ private final String canonicalName(final String name) { + // todo -- remove me after switch to RodBinding syntax return name.toLowerCase(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java deleted file mode 100644 index 5cdb6e9f7..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java +++ /dev/null @@ -1,130 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.*; -import java.lang.reflect.Method; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -/** - * Class for representing arbitrary reference ordered data sets - *

- * User: mdepristo - * Date: Feb 27, 2009 - * Time: 10:47:14 AM - * To change this template use File | Settings | File Templates. - */ -public class ReferenceOrderedData implements Iterable { - private String name; - private File file = null; -// private String fieldDelimiter; - - /** Header object returned from the datum */ -// private Object header = null; - - private Class type = null; // runtime type information for object construction - - /** our log, which we want to capture anything from this class */ - private static Logger logger = Logger.getLogger(ReferenceOrderedData.class); - - /** - * given an existing file, open it and append all the valid triplet lines to an existing list - * - * @param rodTripletList the list of existing triplets - * @param filename the file to attempt to extract ROD triplets from - */ - protected static void extractRodsFromFile(List rodTripletList, String filename) { - BufferedReader str; - try { - str = new BufferedReader(new FileReader(new File(filename))); - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(new File(filename), "Unable to load the ROD input file", e); - } - String line = "NO LINES READ IN"; - try { - while ((line = str.readLine()) != null) { - if (line.matches(".+,.+,.+")) rodTripletList.add(line.trim()); - else logger.warn("the following file line didn't parsing into a triplet -> " + line); - } - } catch (IOException e) { - throw new UserException.CouldNotReadInputFile(new File(filename), "Failed reading the input rod file; last line read was " + line, e); - } - } - - - // ---------------------------------------------------------------------- - // - // Constructors - // - // ---------------------------------------------------------------------- - public ReferenceOrderedData(final String name, File file, Class type ) { - this.name = name; - this.file = file; - this.type = type; -// this.header = initializeROD(name, file, type); -// this.fieldDelimiter = newROD(name, type).delimiterRegex(); - } - - public String getName() { return name; } - - public File getFile() { return file; } - - public Class getType() { return type; } - - /** - * Special equals override to see if this ROD is compatible with the given - * name and type. 'Compatible' means that this ROD has the name that's passed - * in and its data can fit into the container specified by type. - * - * @param name Name to check. - * @param type Type to check. - * - * @return True if these parameters imply this rod. False otherwise. - */ - public boolean matches(String name, Class type) { - return this.name.equals(name) && type.isAssignableFrom(this.type); - } - - public Iterator iterator() { - Iterator it; - try { - Method m = type.getDeclaredMethod("createIterator", String.class, java.io.File.class); - it = (Iterator) m.invoke(null, name, file); - } catch (java.lang.NoSuchMethodException e) { - it = new RODRecordIterator(file,name,type); - } catch (java.lang.NullPointerException e) { - throw new RuntimeException(e); - } catch (java.lang.SecurityException e) { - throw new RuntimeException(e); - } catch (java.lang.IllegalAccessException e) { - throw new RuntimeException(e); - } catch (java.lang.IllegalArgumentException e) { - throw new RuntimeException(e); - } catch (java.lang.reflect.InvocationTargetException e) { - throw new RuntimeException(e); - } - // return new RODIterator(it); - return it; - } - - // ---------------------------------------------------------------------- - // - // Manipulations of all of the data - // - // ---------------------------------------------------------------------- - - public static void write(ArrayList data, File output) throws IOException { - final FileWriter out = new FileWriter(output); - - for (ReferenceOrderedDatum rec : data) { - out.write(rec.repl() + "\n"); - } - - out.close(); - } - - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java new file mode 100644 index 000000000..de781b839 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata; + +import java.io.File; + +/** + * An interface marking that a given Tribble codec can look at the file and determine whether the + * codec specifically parsing the contents of the file. + */ +public interface SelfScopingFeatureCodec { + /** + * This function returns true iff the File potentialInput can be parsed by this + * codec. + * + * The GATK assumes that there's never a situation where two SelfScopingFeaetureCodecs + * return true for the same file. If this occurs the GATK splits out an error. + * + * Note this function must never throw an error. All errors should be trapped + * and false returned. + * + * @param potentialInput the file to test for parsiability with this codec + * @return true if potentialInput can be parsed, false otherwise + */ + public boolean canDecode(final File potentialInput); +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 1d622e2c7..216edaf87 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -4,7 +4,7 @@ import org.broad.tribble.Feature; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.gelitext.GeliTextFeature; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; @@ -112,24 +112,28 @@ public class VariantContextAdaptors { alleles.add(refAllele); // add all of the alt alleles + boolean sawNullAllele = refAllele.isNull(); for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) { if ( ! Allele.acceptableAlleleBases(alt) ) { //System.out.printf("Excluding dbsnp record %s%n", dbsnp); return null; } - alleles.add(Allele.create(alt, false)); + Allele altAllele = Allele.create(alt, false); + alleles.add(altAllele); + if ( altAllele.isNull() ) + sawNullAllele = true; } Map attributes = new HashMap(); attributes.put(VariantContext.ID_KEY, dbsnp.getRsID()); - if ( DbSNPHelper.isDeletion(dbsnp) ) { - int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; - if ( index < 0 ) - return null; // we weren't given enough reference context to create the VariantContext - attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index])); - } - Collection genotypes = null; - VariantContext vc = new VariantContext(name, dbsnp.getChr(),dbsnp.getStart() - (DbSNPHelper.isDeletion(dbsnp) ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); + + int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; + if ( index < 0 ) + return null; // we weren't given enough reference context to create the VariantContext + Byte refBaseForIndel = new Byte(ref.getBases()[index]); + + Map genotypes = null; + VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel); return vc; } else return null; // can't handle anything else @@ -159,16 +163,6 @@ public class VariantContextAdaptors { @Override public Class getAdaptableFeatureType() { return GeliTextFeature.class; } - /** - * convert to a Variant Context, given: - * @param name the name of the ROD - * @param input the Rod object, in this case a RodGeliText - * @return a VariantContext object - */ -// VariantContext convert(String name, Object input) { -// return convert(name, input, null); -// } - /** * convert to a Variant Context, given: * @param name the name of the ROD @@ -234,16 +228,6 @@ public class VariantContextAdaptors { @Override public Class getAdaptableFeatureType() { return HapMapFeature.class; } - /** - * convert to a Variant Context, given: - * @param name the name of the ROD - * @param input the Rod object, in this case a RodGeliText - * @return a VariantContext object - */ -// VariantContext convert(String name, Object input) { -// return convert(name, input, null); -// } - /** * convert to a Variant Context, given: * @param name the name of the ROD @@ -258,6 +242,11 @@ public class VariantContextAdaptors { HapMapFeature hapmap = (HapMapFeature)input; + int index = hapmap.getStart() - ref.getWindow().getStart(); + if ( index < 0 ) + return null; // we weren't given enough reference context to create the VariantContext + Byte refBaseForIndel = new Byte(ref.getBases()[index]); + HashSet alleles = new HashSet(); Allele refSNPAllele = Allele.create(ref.getBase(), true); int deletionLength = -1; @@ -316,7 +305,7 @@ public class VariantContextAdaptors { long end = hapmap.getEnd(); if ( deletionLength > 0 ) end += deletionLength; - VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs); + VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs, refBaseForIndel); return vc; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java similarity index 74% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java rename to public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java index 3201769e0..e6e7a7588 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java @@ -1,6 +1,31 @@ -package org.broadinstitute.sting.gatk.refdata.utils.helpers; +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.features; import net.sf.samtools.util.SequenceUtil; +import org.broad.tribble.Feature; import org.broad.tribble.annotation.Strand; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.utils.Utils; @@ -34,12 +59,12 @@ public class DbSNPHelper { return dbsnp; } - public static String rsIDOfFirstRealSNP(List featureList) { + public static String rsIDOfFirstRealSNP(List featureList, boolean deleteMe) { if (featureList == null) return null; String rsID = null; - for ( Object d : featureList ) { + for ( Feature d : featureList ) { if ( d instanceof DbSNPFeature ) { if ( DbSNPHelper.isSNP((DbSNPFeature)d) ) { rsID = ((DbSNPFeature)d).getRsID(); @@ -56,14 +81,29 @@ public class DbSNPHelper { return rsID; } - public static String rsIDOfFirstRealIndel(List featureList) { + public static String rsIDOfFirstRealSNP(List VCs) { + if ( VCs == null ) + return null; + + String rsID = null; + for ( VariantContext vc : VCs ) { + if ( vc.isSNP() ) { + rsID = vc.getID(); + break; + } + } + + return rsID; + } + + public static String rsIDOfFirstRealIndel(List featureList) { if (featureList == null) return null; String rsID = null; - for ( Object d : featureList ) { + for ( Feature d : featureList ) { if ( d instanceof DbSNPFeature ) { - if ( DbSNPHelper.isIndel((DbSNPFeature)d) ) { + if ( DbSNPHelper.isIndel((DbSNPFeature) d) ) { rsID = ((DbSNPFeature)d).getRsID(); break; } @@ -117,7 +157,11 @@ public class DbSNPHelper { } public static boolean isIndel(DbSNPFeature feature) { - return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || feature.getVariantType().contains("in-del"); + return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || DbSNPHelper.isComplexIndel(feature); + } + + public static boolean isComplexIndel(DbSNPFeature feature) { + return feature.getVariantType().contains("in-del"); } public static boolean isHapmap(DbSNPFeature feature) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java deleted file mode 100755 index 6bba754be..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata.features.annotator; - -import org.apache.log4j.Logger; -import org.broad.tribble.Feature; -import org.broad.tribble.exception.CodecLineParsingException; -import org.broad.tribble.readers.AsciiLineReader; -import org.broad.tribble.readers.LineReader; -import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.StringTokenizer; - -public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec { - - private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class); - - public static final String DELIMITER = "\t"; - - private ArrayList header; - - /** - * The parser to use when resolving genome-wide locations. - */ - private GenomeLocParser genomeLocParser; - - /** - * Set the parser to use when resolving genetic data. - * @param genomeLocParser The supplied parser. - */ - public void setGenomeLocParser(GenomeLocParser genomeLocParser) { - this.genomeLocParser = genomeLocParser; - } - - /** - * Parses the header. - * - * @param reader - * - * @return The # of header lines for this file. - */ - public Object readHeader(LineReader reader) - { - int[] lineCounter = new int[1]; - try { - header = readHeader(reader, lineCounter); - } catch(IOException e) { - throw new IllegalArgumentException("Unable to read from file.", e); - } - return header; - } - - public Class getFeatureType() { - return AnnotatorInputTableFeature.class; - } - - @Override - public Feature decodeLoc(String line) { - StringTokenizer st = new StringTokenizer(line, DELIMITER); - if ( st.countTokens() < 1 ) - throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line); - - GenomeLoc loc; - String chr = st.nextToken(); - if ( chr.indexOf(":") != -1 ) { - loc = genomeLocParser.parseGenomeLoc(chr); - } else { - if ( st.countTokens() < 3 ) - throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line); - loc = genomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken())); - } - return new AnnotatorInputTableFeature(loc.getContig(), loc.getStart(), loc.getStop()); - } - - - /** - * Parses the line into an AnnotatorInputTableFeature object. - * - * @param line - */ - public AnnotatorInputTableFeature decode(String line) { - final ArrayList header = this.header; //optimization - final ArrayList values = Utils.split(line, DELIMITER, header.size()); - - if ( values.size() != header.size()) { - throw new CodecLineParsingException(String.format("Encountered a line that has %d columns while the header has %d columns.\nHeader: " + header + "\nLine: " + values, values.size(), header.size())); - } - - final AnnotatorInputTableFeature feature = new AnnotatorInputTableFeature(header); - for ( int i = 0; i < header.size(); i++ ) { - feature.putColumnValue(header.get(i), values.get(i)); - } - - GenomeLoc loc; - if ( values.get(0).indexOf(":") != -1 ) - loc = genomeLocParser.parseGenomeLoc(values.get(0)); - else - loc = genomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2))); - - //parse the location - feature.setChr(loc.getContig()); - feature.setStart((int)loc.getStart()); - feature.setEnd((int)loc.getStop()); - - return feature; - } - - /** - * Returns the header. - * @param source - * @return - * @throws IOException - */ - public static ArrayList readHeader(final File source) throws IOException { - FileInputStream is = new FileInputStream(source); - try { - return readHeader(new AsciiLineReader(is), null); - } finally { - is.close(); - } - } - - - /** - * Returns the header, and also sets the 2nd arg to the number of lines in the header. - * @param source - * @param lineCounter An array of length 1 or null. If not null, array[0] will be set to the number of lines in the header. - * @return The header fields. - * @throws IOException - */ - private static ArrayList readHeader(final LineReader source, int[] lineCounter) throws IOException { - - ArrayList header = null; - int numLines = 0; - - //find the 1st line that's non-empty and not a comment - String line = null; - while( (line = source.readLine()) != null ) { - numLines++; - if ( line.trim().isEmpty() || line.startsWith("#") ) { - continue; - } - - //parse the header - header = Utils.split(line, DELIMITER); - break; - } - - // check that we found the header - if ( header == null ) { - throw new IllegalArgumentException("No header in " + source + ". All lines are either comments or empty."); - } - - if(lineCounter != null) { - lineCounter[0] = numLines; - } - - logger.debug(String.format("Found header line containing %d columns:\n[%s]", header.size(), Utils.join("\t", header))); - - return header; - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java deleted file mode 100755 index d12badd28..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata.features.annotator; - -import org.broad.tribble.Feature; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -/** - * This class represents a single record in an AnnotatorInputTable. - */ -public class AnnotatorInputTableFeature implements Feature { - - private ArrayList columnNames; - private HashMap columnValues; //maps colum names to column values - - private String chr; - private int start; - private int end; - private String strRep = null; - - /** - * Constructor. - * @param chr The chromosome name. - * @param start The start position - * @param end The end position - */ - public AnnotatorInputTableFeature(String chr, int start, int end) { - this.chr = chr; - this.start = start; - this.end = end; - } - - - /** - * Constructor. - * @param columnNames The column names as parsed out of the file header. - */ - public AnnotatorInputTableFeature(ArrayList columnNames) { - this.columnNames = columnNames; - this.columnValues = new HashMap(); - } - - - - /** - * @return the list of column names from the file header. - */ - public ArrayList getHeader() { - return columnNames; - } - - - /** - * Returns the value of the given column. - * - * @param columnName The column name as it appears in the file header. - * @return The value - */ - public String getColumnValue(final String columnName) { - return columnValues.get(columnName); - } - - - public boolean containsColumnName(final String columnName) { - return columnValues.containsKey(columnName); - } - - - /** - * Sets the value for the given column. - * - * @param columnName The column name as it appears in the file header. - * @param value The value - * @return The existing value associated with the columnName, if there is one. - */ - protected String putColumnValue(final String columnName, final String value) { - return columnValues.put(columnName, value); - } - - /** - * @return all values in this line, hashed by their column names. - */ - public Map getColumnValues() { - return Collections.unmodifiableMap(columnValues); - } - - - public String getChr() { - return chr; - } - - public int getStart() { - return start; - } - - public int getEnd() { - return end; - } - - protected void setChr(String chr) { - this.chr = chr; - } - - protected void setStart(int start) { - this.start = start; - } - - protected void setEnd(int end) { - this.end = end; - } - - @Override - public String toString() { - if ( strRep == null ) { - StringBuilder sb = new StringBuilder(); - - for(String columnName : columnNames ) { - if ( sb.length() == 0 ) - sb.append("["); - else - sb.append(", "); - sb.append(columnName + "=" + columnValues.get(columnName)); - } - sb.append("]"); - - strRep = sb.toString(); - } - - return strRep; - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java index d12114f9a..a38d45428 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.refdata.features.refseq; import org.broad.tribble.Feature; -import org.broadinstitute.sting.gatk.refdata.Transcript; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/Transcript.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java rename to public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/Transcript.java index b8a0868dd..d8bf12810 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/Transcript.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.refdata; +package org.broadinstitute.sting.gatk.refdata.features.refseq; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java index 085d6b5b3..029800aea 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java @@ -12,14 +12,13 @@ import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import java.io.File; import java.io.FileOutputStream; -import java.util.Map; /** * a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory @@ -83,14 +82,14 @@ public class RMDIndexer extends CommandLineProgram { RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL); // find the types available to the track builders - Map typeMapping = builder.getAvailableTrackNamesAndTypes(); + FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType); // check that the type is valid - if (!typeMapping.containsKey(inputFileType)) - throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + Utils.join(",",typeMapping.keySet())); + if (descriptor == null) + throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures()); // create the codec - FeatureCodec codec = builder.createByType(typeMapping.get(inputFileType)); + FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser); // check if it's a reference dependent feature codec if (codec instanceof ReferenceDependentFeatureCodec) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java new file mode 100644 index 000000000..26a400071 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; +import org.broad.tribble.NameAwareCodec; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; +import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.util.*; + + +/** + * Class for managing Tribble Feature readers available to the GATK. The features + * are dynamically determined via a PluginManager. This class provides convenient + * getter methods for obtaining FeatureDescriptor objects that collect all of the + * useful information about the Tribble Codec, Feature, and name in one place. + * + * @author depristo + */ +public class FeatureManager { + public static class FeatureDescriptor { + final String name; + final FeatureCodec codec; + + public FeatureDescriptor(final String name, final FeatureCodec codec) { + this.name = name; + this.codec = codec; + } + + public String getName() { + return name; + } + public FeatureCodec getCodec() { + return codec; + } + public Class getCodecClass() { return codec.getClass(); } + public Class getFeatureClass() { return codec.getFeatureType(); } + + @Override + public String toString() { + return String.format("FeatureDescriptor name=%s codec=%s feature=%s", getName(), getCodecClass().getName(), getFeatureClass().getName()); + } + } + + private final PluginManager pluginManager; + private final Collection featureDescriptors = new HashSet(); + + + /** + * Construct a FeatureManager + */ + public FeatureManager() { + pluginManager = new PluginManager(FeatureCodec.class, "Codecs", "Codec"); + + for (final String rawName: pluginManager.getPluginsByName().keySet()) { + FeatureCodec codec = pluginManager.createByName(rawName); + String name = rawName.toUpperCase(); + FeatureDescriptor featureDescriptor = new FeatureDescriptor(name, codec); + featureDescriptors.add(featureDescriptor); + } + } + + /** + * Return the FeatureDescriptor whose getCodecClass().equals(codecClass). + * + * @param codecClass + * @return A FeatureDescriptor or null if none is found + */ + @Requires("codecClass != null") + public FeatureDescriptor getByCodec(Class codecClass) { + for ( FeatureDescriptor descriptor : featureDescriptors ) + if ( descriptor.getCodecClass().equals(codecClass) ) + return descriptor; + return null; + } + + /** + * Returns a collection of FeatureDescriptors that emit records of type featureClass + * + * @param featureClass + * @return A FeatureDescriptor or null if none is found + */ + @Requires("featureClass != null") + public Collection getByFeature(Class featureClass) { + Set consistentDescriptors = new HashSet(); + + if (featureClass == null) + throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object"); + + for ( FeatureDescriptor descriptor : featureDescriptors ) { + if ( featureClass.isAssignableFrom(descriptor.getFeatureClass())) + consistentDescriptors.add(descriptor); + } + return consistentDescriptors; + } + + /** + * Return the FeatureDescriptor with getName().equals(name) + * + * @param name + * @return A FeatureDescriptor or null if none is found + */ + @Requires("name != null") + public FeatureDescriptor getByName(String name) { + for ( FeatureDescriptor descriptor : featureDescriptors ) + if ( descriptor.getName().equalsIgnoreCase(name) ) + return descriptor; + return null; + } + + /** + * Returns the FeatureDescriptor that can read the contexts of File file, is one can be determined + * + * @param file + * @return A FeatureDescriptor or null if none is found + */ + @Requires({"file != null", "file.isFile()", "file.canRead()"}) + public FeatureDescriptor getByFiletype(File file) { + List canParse = new ArrayList(); + for ( FeatureDescriptor descriptor : featureDescriptors ) + if ( descriptor.getCodec() instanceof SelfScopingFeatureCodec ) { + if ( ((SelfScopingFeatureCodec) descriptor.getCodec()).canDecode(file) ) { + canParse.add(descriptor); + } + } + + if ( canParse.size() == 0 ) + return null; + else if ( canParse.size() > 1 ) + throw new ReviewedStingException("BUG: multiple feature descriptors can read file " + file + ": " + canParse); + else + return canParse.get(0); + } + + /** + * Returns the FeatureDescriptor associated with the type described by triplet, or null if none is found + * @param triplet + * @return + */ + @Requires("triplet != null") + public FeatureDescriptor getByTriplet(RMDTriplet triplet) { + return getByName(triplet.getType()); + } + + /** + * @return all of the FeatureDescriptors available to the GATK. Never null + */ + @Ensures("result != null") + public Collection getFeatureDescriptors() { + return Collections.unmodifiableCollection(featureDescriptors); + } + + + /** + * Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load + * @return + */ + @Ensures("result != null") + public String userFriendlyListOfAvailableFeatures() { + List names = new ArrayList(); + for ( final FeatureDescriptor descriptor : featureDescriptors ) + names.add(descriptor.getName()); + return Utils.join(",", names); + } + + /** + * Create a new FeatureCodec of the type described in descriptor, assigning it the + * name (if possible) and providing it the genomeLocParser (where necessary) + * + * @param descriptor FeatureDescriptor of the Tribble FeatureCodec we want to create + * @param name the name to assign this codec + * @return the feature codec itself + */ + @Requires({"descriptor != null", "name != null", "genomeLocParser != null"}) + @Ensures("result != null") + public FeatureCodec createCodec(FeatureDescriptor descriptor, String name, GenomeLocParser genomeLocParser) { + FeatureCodec codex = pluginManager.createByType(descriptor.getCodecClass()); + if ( codex instanceof NameAwareCodec ) + ((NameAwareCodec)codex).setName(name); + if ( codex instanceof ReferenceDependentFeatureCodec ) + ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); + return codex; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java deleted file mode 100644 index 731df997d..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2010. The Broad Institute - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata.tracks; - -import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.io.IOException; - -/** - * @author aaron - *

- * Interface QueryableTrack - *

- * a decorator interface for tracks that are queryable - */ -public interface QueryableTrack { - public CloseableIterator query(final GenomeLoc interval) throws IOException; - public CloseableIterator query(final GenomeLoc interval, final boolean contained) throws IOException; - public CloseableIterator query(final String contig, final int start, final int stop) throws IOException; - public CloseableIterator query(final String contig, final int start, final int stop, final boolean contained) throws IOException; - public void close(); -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java similarity index 80% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java rename to public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index 19c91be1b..d352894e8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The Broad Institute + * Copyright (c) 2011, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -12,18 +12,17 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.refdata.tracks.builders; +package org.broadinstitute.sting.gatk.refdata.tracks; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; @@ -36,12 +35,11 @@ import org.broad.tribble.util.LittleEndianOutputStream; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SequenceDictionaryUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -67,7 +65,7 @@ import java.util.*; * that gets iterators from the FeatureReader using Tribble. * */ -public class RMDTrackBuilder extends PluginManager { +public class RMDTrackBuilder { // extends PluginManager { /** * our log, which we use to capture anything from this class */ @@ -76,8 +74,6 @@ public class RMDTrackBuilder extends PluginManager { // a constant we use for marking sequence dictionary entries in the Tribble index property list public static final String SequenceDictionaryPropertyPredicate = "DICT:"; - private Map classes = null; - // private sequence dictionary we use to set our tracks with private SAMSequenceDictionary dict = null; @@ -91,6 +87,8 @@ public class RMDTrackBuilder extends PluginManager { */ private ValidationExclusion.TYPE validationExclusionType; + FeatureManager featureManager; + /** * Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally * used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor, @@ -102,29 +100,14 @@ public class RMDTrackBuilder extends PluginManager { public RMDTrackBuilder(SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) { - super(FeatureCodec.class, "Codecs", "Codec"); this.dict = dict; - this.genomeLocParser = genomeLocParser; this.validationExclusionType = validationExclusionType; - - classes = new HashMap(); - for (String name: this.getPluginsByName().keySet()) { - classes.put(name.toUpperCase(), getPluginsByName().get(name)); - } } - - /** @return a list of all available track types we currently have access to create */ - public Map getAvailableTrackNamesAndTypes() { - return Collections.unmodifiableMap(classes); + this.genomeLocParser = genomeLocParser; + featureManager = new FeatureManager(); } - /** @return a list of all available track record types we currently have access to create */ - public Map getAvailableTrackNamesAndRecordTypes() { - HashMap classToRecord = new HashMap(); - for (String name: this.getPluginsByName().keySet()) { - FeatureCodec codec = this.createByName(name); - classToRecord.put(name, codec.getFeatureType()); - } - return classToRecord; + public FeatureManager getFeatureManager() { + return featureManager; } /** @@ -133,45 +116,38 @@ public class RMDTrackBuilder extends PluginManager { * @param fileDescriptor a description of the type of track to build. * * @return an instance of the track - * @throws RMDTrackCreationException - * if we don't know of the target class or we couldn't create it */ - public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) throws RMDTrackCreationException { + public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) { String name = fileDescriptor.getName(); File inputFile = new File(fileDescriptor.getFile()); - Class featureCodecClass = getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); - if (featureCodecClass == null) + FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByTriplet(fileDescriptor); + if (descriptor == null) throw new UserException.BadArgumentValue("-B",fileDescriptor.getType()); // return a feature reader track Pair pair; if (inputFile.getAbsolutePath().endsWith(".gz")) - pair = createTabixIndexedFeatureSource(featureCodecClass, name, inputFile); + pair = createTabixIndexedFeatureSource(descriptor, name, inputFile); else - pair = getFeatureSource(featureCodecClass, name, inputFile, fileDescriptor.getStorageType()); + pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType()); if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file"); - return new RMDTrack(featureCodecClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(featureCodecClass,name)); + return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name)); } /** * Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream. - * @param targetClass Type of Tribble class to build. + * @param codecClass Type of Tribble codec class to build. * @param inputFile Input file type to use. * @return An RMDTrack, suitable for accessing reference metadata. */ - public RMDTrack createInstanceOfTrack(Class targetClass, File inputFile) { - // TODO: Update RMDTriplet to contain an actual class object rather than a name to avoid these gymnastics. - String typeName = null; - for(Map.Entry trackType: getAvailableTrackNamesAndTypes().entrySet()) { - if(trackType.getValue().equals(targetClass)) - typeName = trackType.getKey(); - } + public RMDTrack createInstanceOfTrack(Class codecClass, File inputFile) { + final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass); - if(typeName == null) - throw new ReviewedStingException("Unable to find type name for class " + targetClass.getName()); + if (descriptor == null) + throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName()); - return createInstanceOfTrack(new RMDTriplet("anonymous",typeName,inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags())); + return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags())); } /** @@ -179,16 +155,16 @@ public class RMDTrackBuilder extends PluginManager { * reader of the appropriate type will figure out what the right index type is, and determine if it * exists. * - * @param targetClass the codec class type + * @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create * @param name the name of the track * @param inputFile the file to load * @return a feature reader implementation */ - private Pair createTabixIndexedFeatureSource(Class targetClass, String name, File inputFile) { + private Pair createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) { // we might not know the index type, try loading with the default reader constructor logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file"); try { - return new Pair(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null); + return new Pair(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null); } catch (TribbleException e) { throw new UserException(e.getMessage(), e); } @@ -196,28 +172,26 @@ public class RMDTrackBuilder extends PluginManager { /** * add a name to the codec, if it takes one - * @param targetClass the class to create a codec for + * @param descriptor the class to create a codec for * @param name the name to assign this codec * @return the feature codec itself */ - public FeatureCodec createCodec(Class targetClass, String name) { - FeatureCodec codex = this.createByType(targetClass); - if ( codex instanceof NameAwareCodec ) - ((NameAwareCodec)codex).setName(name); - if(codex instanceof ReferenceDependentFeatureCodec) - ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); - return codex; + private FeatureCodec createCodec(FeatureManager.FeatureDescriptor descriptor, String name) { + return featureManager.createCodec(descriptor, name, genomeLocParser); } /** * create a feature source object given: - * @param targetClass the target class + * @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create * @param name the name of the codec * @param inputFile the tribble file to parse * @param storageType How the RMD is streamed into the input file. * @return the input file as a FeatureReader */ - private Pair getFeatureSource(Class targetClass, String name, File inputFile, RMDStorageType storageType) { + private Pair getFeatureSource(FeatureManager.FeatureDescriptor descriptor, + String name, + File inputFile, + RMDStorageType storageType) { // Feature source and sequence dictionary to use as the ultimate reference FeatureSource featureSource = null; SAMSequenceDictionary sequenceDictionary = null; @@ -227,7 +201,7 @@ public class RMDTrackBuilder extends PluginManager { if(canBeIndexed) { try { - Index index = loadIndex(inputFile, createCodec(targetClass, name)); + Index index = loadIndex(inputFile, createCodec(descriptor, name)); try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); } catch (ReviewedStingException e) { } @@ -240,7 +214,7 @@ public class RMDTrackBuilder extends PluginManager { sequenceDictionary = getSequenceDictionaryFromProperties(index); } - featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name)); + featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name)); } catch (TribbleException e) { throw new UserException(e.getMessage()); @@ -250,7 +224,7 @@ public class RMDTrackBuilder extends PluginManager { } } else { - featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(targetClass, name),false); + featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(descriptor, name),false); } return new Pair(featureSource,sequenceDictionary); @@ -385,22 +359,6 @@ public class RMDTrackBuilder extends PluginManager { return idx; } - /** - * Returns a collection of track names that match the record type. - * @param trackRecordType the record type specified in the @RMD annotation - * @return a collection of available track record type names that match the record type - */ - public Collection getTrackRecordTypeNames(Class trackRecordType) { - Set names = new TreeSet(); - if (trackRecordType == null) - throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object"); - - for (Map.Entry availableTrackRecordType: getAvailableTrackNamesAndRecordTypes().entrySet()) { - if (availableTrackRecordType.getValue() != null && trackRecordType.isAssignableFrom(availableTrackRecordType.getValue())) - names.add(availableTrackRecordType.getKey()); - } - return names; - } // --------------------------------------------------------------------------------------------------------- // static functions to work with the sequence dictionaries of indexes diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java index 59e8471a3..6f8c9680f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java @@ -57,6 +57,7 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation { public abstract GenomeLoc getLocation(); + // TODO: this should be a Feature public abstract Object getUnderlyingObject(); /** @@ -98,48 +99,9 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation { return feature.getEnd(); } + // TODO: this should be a Feature, actually public Object getUnderlyingObject() { return feature; } } - - /** - * wrapping a old style rod into the new GATK feature style - */ - public static class RODGATKFeature extends GATKFeature { - - // our data - private ReferenceOrderedDatum datum; - - public RODGATKFeature(ReferenceOrderedDatum datum) { - super(datum.getName()); - this.datum = datum; - } - - @Override - public GenomeLoc getLocation() { - return datum.getLocation(); - } - - @Override - public Object getUnderlyingObject() { - return datum; - } - - @Override - public String getChr() { - return datum.getLocation().getContig(); - } - - @Override - public int getStart() { - return (int)datum.getLocation().getStart(); - } - - @Override - public int getEnd() { - return (int)datum.getLocation().getStop(); - } - } - } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeatureIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeatureIterator.java deleted file mode 100644 index 17c9fa718..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeatureIterator.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2010. The Broad Institute - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata.utils; - -import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; - -import java.util.Iterator; - - -/** - * - * @author aaron - * - * Class GATKFeatureIterator - * - * Takes a RODatum iterator and makes it an iterator of GATKFeatures. Shazam! - */ -public class GATKFeatureIterator implements CloseableIterator { - private final Iterator iter; - public GATKFeatureIterator(Iterator iter) { - this.iter = iter; - } - - @Override - public boolean hasNext() { - return iter.hasNext(); - } - - @Override - public GATKFeature next() { - return new GATKFeature.RODGATKFeature(iter.next()); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Remove not supported"); - } - - @Override - public void close() { - // do nothing, our underlying iterator doesn't support this - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index 59d496828..608b5d1d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -1,21 +1,25 @@ package org.broadinstitute.sting.gatk.report; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.io.*; +import java.util.Collection; +import java.util.List; import java.util.TreeMap; /** * Container class for GATK report tables */ public class GATKReport { - private TreeMap tables; + public static final String GATKREPORT_HEADER_PREFIX = "##:GATKReport.v"; + private TreeMap tables = new TreeMap(); /** * Create a new, empty GATKReport. */ public GATKReport() { - tables = new TreeMap(); } /** @@ -23,7 +27,7 @@ public class GATKReport { * @param filename the path to the file to load */ public GATKReport(String filename) { - loadReport(new File(filename)); + this(new File(filename)); } /** @@ -31,7 +35,6 @@ public class GATKReport { * @param file the file to load */ public GATKReport(File file) { - tables = new TreeMap(); loadReport(file); } @@ -46,11 +49,17 @@ public class GATKReport { GATKReportTable table = null; String[] header = null; int id = 0; + GATKReportVersion version = null; + List columnStarts = null; String line; while ( (line = reader.readLine()) != null ) { - if (line.startsWith("##:GATKReport.v0.1 ")) { - line = line.replaceFirst("##:GATKReport.v0.1 ", ""); + + if (line.startsWith(GATKREPORT_HEADER_PREFIX)) { + + version = GATKReportVersion.fromHeader(line); + + line = line.replaceFirst("##:GATKReport." + version.versionString + " ", ""); String[] pieces = line.split(" : "); String tableName = pieces[0]; @@ -58,14 +67,35 @@ public class GATKReport { addTable(tableName, tableDesc); table = getTable(tableName); + table.setVersion(version); header = null; - } else if ( line.isEmpty() ) { + columnStarts = null; + } else if ( line.trim().isEmpty() ) { // do nothing } else { if (table != null) { + + String[] splitLine; + + switch (version) { + case V0_1: + splitLine = TextFormattingUtils.splitWhiteSpace(line); + break; + + case V0_2: + if (header == null) { + columnStarts = TextFormattingUtils.getWordStarts(line); + } + splitLine = TextFormattingUtils.splitFixedWidth(line, columnStarts); + break; + + default: + throw new ReviewedStingException("GATK report version parsing not implemented for: " + line); + } + if (header == null) { - header = line.split("\\s+"); + header = splitLine; table.addPrimaryKey("id", false); @@ -75,10 +105,8 @@ public class GATKReport { id = 0; } else { - String[] entries = line.split("\\s+"); - for (int columnIndex = 0; columnIndex < header.length; columnIndex++) { - table.set(id, header[columnIndex], entries[columnIndex]); + table.set(id, header[columnIndex], splitLine[columnIndex]); } id++; @@ -125,7 +153,10 @@ public class GATKReport { * @return the table object */ public GATKReportTable getTable(String tableName) { - return tables.get(tableName); + GATKReportTable table = tables.get(tableName); + if (table == null) + throw new ReviewedStingException("Table is not in GATKReport: " + tableName); + return table; } /** @@ -140,4 +171,8 @@ public class GATKReport { } } } + + public Collection getTables() { + return tables.values(); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index 440597754..347e870c8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -37,10 +37,10 @@ public class GATKReportColumn extends TreeMap { * tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero * values) in the table. * - * @param primaryKey the primary key position in the column that should be set + * @param primaryKey the primary key position in the column that should be retrieved * @return the value at the specified position in the column, or the default value if the element is not set */ - public Object getWithoutSideEffects(Object primaryKey) { + private Object getWithoutSideEffects(Object primaryKey) { if (!this.containsKey(primaryKey)) { return defaultValue; } @@ -48,6 +48,16 @@ public class GATKReportColumn extends TreeMap { return this.get(primaryKey); } + /** + * Return an object from the column, but if it doesn't exist, return the default value. + * + * @param primaryKey the primary key position in the column that should be retrieved + * @return the string value at the specified position in the column, or the default value if the element is not set + */ + public String getStringValue(Object primaryKey) { + return toString(getWithoutSideEffects(primaryKey)); + } + /** * Return the displayable property of the column. If true, the column will be displayed in the final output. * If not, printing will be suppressed for the contents of the table. @@ -67,7 +77,7 @@ public class GATKReportColumn extends TreeMap { for (Object obj : this.values()) { if (obj != null) { - int width = obj.toString().length(); + int width = toString(obj).length(); if (width > maxWidth) { maxWidth = width; @@ -77,4 +87,27 @@ public class GATKReportColumn extends TreeMap { return maxWidth; } + + /** + * Returns a string version of the values. + * @param obj The object to convert to a string + * @return The string representation of the column + */ + private static String toString(Object obj) { + String value; + if (obj == null) { + value = "null"; + } else if (obj instanceof Float) { + value = String.format("%.8f", (Float) obj); + } else if (obj instanceof Double) { + value = String.format("%.8f", (Double) obj); + } else { + value = obj.toString(); + } + return value; + } + + public String getColumnName() { + return columnName; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackCreationException.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java similarity index 54% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackCreationException.java rename to public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java index 29aefacc6..a33631c85 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackCreationException.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java @@ -1,5 +1,6 @@ /* - * Copyright (c) 2010. The Broad Institute + * Copyright (c) 2011, The Broad Institute + * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without @@ -11,7 +12,7 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT @@ -21,25 +22,34 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.refdata.tracks; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +package org.broadinstitute.sting.gatk.report; +import java.util.*; /** - * - * @author aaron - * - * Class RMDTrackCreationException - * - * if we fail for some reason to make a track, throw this exception + * Tracks a linked list of GATKReportColumn in order by name. */ -public class RMDTrackCreationException extends ReviewedStingException { - public RMDTrackCreationException(String msg) { - super(msg); +public class GATKReportColumns extends LinkedHashMap { + private List columnNames = new ArrayList(); + + /** + * Returns the column by index + * @param i the index + * @return The column + */ + public GATKReportColumn getByIndex(int i) { + return get(columnNames.get(i)); } - public RMDTrackCreationException(String message, Throwable throwable) { - super(message, throwable); + @Override + public GATKReportColumn remove(Object key) { + columnNames.remove(key); + return super.remove(key); + } + + @Override + public GATKReportColumn put(String key, GATKReportColumn value) { + columnNames.add(key); + return super.put(key, value); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportParser.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportParser.java deleted file mode 100644 index 6915d5cb2..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportParser.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.report; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.broadinstitute.sting.utils.text.XReadLines; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; - -public class GATKReportParser { - private List tables = new ArrayList(); - - public void parse(File file) throws IOException { - InputStream stream = FileUtils.openInputStream(file); - try { - parse(stream); - } finally { - IOUtils.closeQuietly(stream); - } - } - - public void parse(InputStream input) throws IOException { - GATKReportTableParser table = null; - - for (String line: new XReadLines(input)) { - if (line.startsWith("##:GATKReport.v0.1 ")) { - table = newTableParser(line); - tables.add(table); - table.parse(line); - } else if (table != null) { - if (line.trim().length() == 0) - table = null; - else - table.parse(line); - } - } - } - - public String getValue(String tableName, String[] key, String column) { - for (GATKReportTableParser table: tables) - if (table.getTableName().equals(tableName)) - return table.getValue(key, column); - return null; - } - - public String getValue(String tableName, String key, String column) { - for (GATKReportTableParser table: tables) - if (table.getTableName().equals(tableName)) - return table.getValue(key, column); - return null; - } - - private GATKReportTableParser newTableParser(String header) { - return new GATKReportTableParser(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index f7ea25696..152e1a57b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.report; +import org.apache.commons.lang.ObjectUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.PrintStream; @@ -88,17 +89,20 @@ import java.util.regex.Pattern; * but at least the prototype contained herein works. * * @author Kiran Garimella + * @author Khalid Shakir */ public class GATKReportTable { + private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2; private String tableName; private String tableDescription; + private GATKReportVersion version = LATEST_REPORT_VERSION; private String primaryKeyName; private Collection primaryKeyColumn; private boolean primaryKeyDisplay; - boolean sortByPrimaryKey = true; + private boolean sortByPrimaryKey = true; - private LinkedHashMap columns; + private GATKReportColumns columns; /** * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed @@ -113,6 +117,19 @@ public class GATKReportTable { return !m.find(); } + /** + * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed + * + * @param description the name of the table or column + * @return true if the name is valid, false if otherwise + */ + private boolean isValidDescription(String description) { + Pattern p = Pattern.compile("\\r|\\n"); + Matcher m = p.matcher(description); + + return !m.find(); + } + /** * Construct a new GATK report table with the specified name and description * @@ -128,11 +145,23 @@ public class GATKReportTable { throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed."); } + if (!isValidDescription(tableDescription)) { + throw new ReviewedStingException("Attempted to set a GATKReportTable description of '" + tableDescription + "'. GATKReportTable descriptions must not contain newlines."); + } + this.tableName = tableName; this.tableDescription = tableDescription; this.sortByPrimaryKey = sortByPrimaryKey; - columns = new LinkedHashMap(); + columns = new GATKReportColumns(); + } + + public GATKReportVersion getVersion() { + return version; + } + + protected void setVersion(GATKReportVersion version) { + this.version = version; } /** @@ -161,6 +190,57 @@ public class GATKReportTable { primaryKeyDisplay = display; } + /** + * Returns the first primary key matching the dotted column values. + * Ex: dbsnp.eval.called.all.novel.all + * @param dottedColumnValues Period concatenated values. + * @return The first primary key matching the column values or throws an exception. + */ + public Object getPrimaryKey(String dottedColumnValues) { + Object key = findPrimaryKey(dottedColumnValues); + if (key == null) + throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues); + return key; + } + + /** + * Returns true if there is at least on row with the dotted column values. + * Ex: dbsnp.eval.called.all.novel.all + * @param dottedColumnValues Period concatenated values. + * @return true if there is at least one row matching the columns. + */ + public boolean containsPrimaryKey(String dottedColumnValues) { + return findPrimaryKey(dottedColumnValues) != null; + } + + /** + * Returns the first primary key matching the dotted column values. + * Ex: dbsnp.eval.called.all.novel.all + * @param dottedColumnValues Period concatenated values. + * @return The first primary key matching the column values or null. + */ + private Object findPrimaryKey(String dottedColumnValues) { + return findPrimaryKey(dottedColumnValues.split("\\.")); + } + + /** + * Returns the first primary key matching the column values. + * Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" } + * @param columnValues column values. + * @return The first primary key matching the column values. + */ + private Object findPrimaryKey(Object[] columnValues) { + for (Object primaryKey : primaryKeyColumn) { + boolean matching = true; + for (int i = 0; matching && i < columnValues.length; i++) { + matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1)); + } + if (matching) + return primaryKey; + } + return null; + } + /** * Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set. * @@ -230,6 +310,17 @@ public class GATKReportTable { return columns.get(columnName).get(primaryKey); } + /** + * Get a value from the given position in the table + * + * @param primaryKey the primary key value + * @param columnIndex the index of the column + * @return the value stored at the specified position in the table + */ + private Object get(Object primaryKey, int columnIndex) { + return columns.getByIndex(columnIndex).get(primaryKey); + } + /** * Increment an element in the table. This implementation is awful - a functor would probably be better. * @@ -515,7 +606,7 @@ public class GATKReportTable { String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s"; // Emit the table definition - out.printf("##:GATKReport.v0.1 %s : %s%n", tableName, tableDescription); + out.printf("##:GATKReport.%s %s : %s%n", LATEST_REPORT_VERSION.versionString, tableName, tableDescription); // Emit the table header, taking into account the padding requirement if the primary key is a hidden column boolean needsPadding = false; @@ -545,22 +636,8 @@ public class GATKReportTable { for (String columnName : columns.keySet()) { if (columns.get(columnName).isDisplayable()) { - Object obj = columns.get(columnName).getWithoutSideEffects(primaryKey); - if (needsPadding) { out.printf(" "); } - - String value = "null"; - if (obj != null) { - if (obj instanceof Float) { - value = String.format("%.8f", (Float) obj); - } else if (obj instanceof Double) { - value = String.format("%.8f", (Double) obj); - } else { - value = obj.toString(); - } - } - - //out.printf(columnWidths.get(columnName), obj == null ? "null" : obj.toString()); + String value = columns.get(columnName).getStringValue(primaryKey); out.printf(columnWidths.get(columnName), value); needsPadding = true; @@ -577,4 +654,16 @@ public class GATKReportTable { public int getNumRows() { return primaryKeyColumn.size(); } + + public String getTableName() { + return tableName; + } + + public String getTableDescription() { + return tableDescription; + } + + public GATKReportColumns getColumns() { + return columns; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTableParser.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTableParser.java deleted file mode 100644 index 6fd9f9627..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTableParser.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.report; - -import org.apache.commons.lang.StringUtils; - -import java.util.*; - -public class GATKReportTableParser { - private int lineNum = 0; - private String[] descriptions; - private Map headers = new HashMap(); - private List values = new ArrayList(); - - public void parse(String line) { - lineNum++; - switch (lineNum) { - case 1: - descriptions = parseLine(line); - case 2: - String[] columnHeaders = parseLine(line); - for (int i = 0; i < columnHeaders.length; i++) - headers.put(columnHeaders[i], i); - default: - values.add(parseLine(line)); - } - } - - public String getTableName() { - return descriptions[1]; - } - - public String getValue(String[] key, String column) { - if (!headers.containsKey(column)) - return null; - for (String[] row: values) - if (Arrays.equals(key, Arrays.copyOfRange(row, 1, key.length + 1))) - return row[headers.get(column)]; - return null; - } - - public String getValue(String key, String column) { - return getValue(key.split("\\."), column); - } - - private String generateKey(String[] row, int i) { - return StringUtils.join(row, ".", 0, i); - } - - private String[] parseLine(String line) { - return line.split(" +"); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java new file mode 100644 index 000000000..5f1159a43 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.report; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +public enum GATKReportVersion { + /** + * Differences between other versions: + * - Does not allow spaces in cells. + * - Mostly fixed width but has a bug where the string width of floating point + * values was not measured correctly leading to columns that aren't aligned + */ + V0_1("v0.1"), + + /** + * Differences between other versions: + * - Spaces allowed in cells, for example in sample names with spaces in them ex: "C507/FG-CR 6". + * - Fixed width fixed for floating point values + */ + V0_2("v0.2"); + + public final String versionString; + + private GATKReportVersion(String versionString) { + this.versionString = versionString; + } + + @Override + public String toString() { + return versionString; + } + + /** + * Returns the GATK Report Version from the file header. + * @param header Header from the file starting with ##:GATKReport.v[version] + * @return The version as an enum. + */ + public static GATKReportVersion fromHeader(String header) { + if (header.startsWith("##:GATKReport.v0.1 ")) + return GATKReportVersion.V0_1; + + if (header.startsWith("##:GATKReport.v0.2 ")) + return GATKReportVersion.V0_2; + + throw new ReviewedStingException("Unknown GATK report version in header: " + header); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 232989fb0..08eb8f1d4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -65,13 +65,13 @@ public class TraverseLoci extends TraversalEngine,Locu referenceView.expandBoundsToAccomodateLoc(location); } - // Iterate forward to get all reference ordered data covering this location - final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation()); - // create reference context. Note that if we have a pileup of "extended events", the context will // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). ReferenceContext refContext = referenceView.getReferenceContext(location); + // Iterate forward to get all reference ordered data covering this location + final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); + final boolean keepMeP = walker.filter(tracker, refContext, locus); if (keepMeP) { M x = walker.map(tracker, refContext, locus); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java index 2541921e9..80cb30598 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java @@ -23,5 +23,4 @@ import java.lang.annotation.*; @Target(ElementType.TYPE) public @interface Allows { DataSource[] value(); - RMD[] referenceMetaData() default {}; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index 508d1f6ee..6243a6cc0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -25,15 +25,18 @@ package org.broadinstitute.sting.gatk.walkers; +import org.broad.tribble.Feature; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; @@ -41,6 +44,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.PrintStream; import java.util.ArrayList; +import java.util.Collections; import java.util.List; /** @@ -68,6 +72,9 @@ public class PileupWalker extends LocusWalker implements TreeR @Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events") public boolean SHOW_INDEL_PILEUPS = false; + @Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false) + public List> rods = Collections.emptyList(); + public void initialize() { } @@ -112,18 +119,11 @@ public class PileupWalker extends LocusWalker implements TreeR */ private String getReferenceOrderedData( RefMetaDataTracker tracker ) { ArrayList rodStrings = new ArrayList(); - for ( GATKFeature datum : tracker.getAllRods() ) { - if ( datum != null && datum.getUnderlyingObject() instanceof ReferenceOrderedDatum ) { - rodStrings.add(((ReferenceOrderedDatum)datum.getUnderlyingObject()).toSimpleString()); // TODO: Aaron: this line still survives, try to remove it - } + for ( Feature datum : tracker.getValues(rods) ) { + rodStrings.add(datum.toString()); } String rodString = Utils.join(", ", rodStrings); - DbSNPFeature dbsnp = tracker.lookup(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, DbSNPFeature.class); - - if ( dbsnp != null) - rodString += DbSNPHelper.toMediumString(dbsnp); - if ( !rodString.equals("") ) rodString = "[ROD: " + rodString + "]"; @@ -132,8 +132,6 @@ public class PileupWalker extends LocusWalker implements TreeR @Override public void onTraversalDone(Integer result) { - // Double check traversal result to make count is the same. - // TODO: Is this check necessary? out.println("[REDUCE RESULT] Traversal result is: " + result); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java index 158992a22..84549b13a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -61,11 +62,8 @@ public class PrintRODsWalker extends RodWalker { if ( tracker == null ) return 0; - Iterator rods = tracker.getAllRods().iterator(); - while ( rods.hasNext() ) { - Object rod = rods.next().getUnderlyingObject(); - if (VariantContextAdaptors.canBeConvertedToVariantContext(rod) ) - out.println(rod.toString()); + for ( Feature feature : tracker.getValues(Feature.class) ) { + out.println(feature.toString()); } return 1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 3144098a8..219ccbc0c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -42,9 +44,9 @@ import java.util.List; import java.util.Map; -public class AlleleBalance implements InfoFieldAnnotation { +public class AlleleBalance extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index a99f87a70..df9890d64 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -15,9 +17,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAnnotation { +public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { Double ratio = annotateSNP(stratifiedContext, vc, g); if (ratio == null) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java index 6c14e7445..dc41dbc81 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java @@ -8,7 +8,7 @@ import java.util.Map; -public abstract class AnnotationByDepth implements InfoFieldAnnotation { +public abstract class AnnotationByDepth extends InfoFieldAnnotation { protected int annotationByVariantDepth(final Map genotypes, Map stratifiedContexts) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index 66416ce11..76daaa06b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -31,6 +31,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -46,9 +48,9 @@ import java.util.List; import java.util.Map; -public class BaseCounts implements InfoFieldAnnotation { +public class BaseCounts extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 74f7f9d80..b2a3e6a26 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -43,14 +45,14 @@ import java.util.List; import java.util.Map; -public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation { +public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation { private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"), new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") }; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( ! vc.hasGenotypes() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index c384e0d09..f2abbc5b8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -16,9 +18,9 @@ import java.util.List; import java.util.Map; -public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation { +public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index e3e8bc258..958075a92 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -22,13 +24,13 @@ import java.util.List; import java.util.Map; -public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnotation { +public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { private static String REF_ALLELE = "REF"; private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { if ( g == null || !g.isCalled() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 97ed221e7..0ad643a4e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import cern.jet.math.Arithmetic; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -42,11 +44,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -public class FisherStrand implements InfoFieldAnnotation, StandardAnnotation { +public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation { private static final String FS = "FS"; private static final double MIN_PVALUE = 1E-320; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( ! vc.isVariant() || vc.isFiltered() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index 48677bbe5..f8e422e23 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -16,9 +18,9 @@ import java.util.List; import java.util.Map; -public class GCContent implements InfoFieldAnnotation, ExperimentalAnnotation { +public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { double content = computeGCContent(ref); Map map = new HashMap(); map.put(getKeyNames().get(0), String.format("%.2f", content)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java index cca0ad4bc..8fde3a20f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -23,11 +25,11 @@ import java.util.Map; */ // A set of annotations calculated directly from the GLs -public class GLstats implements InfoFieldAnnotation, StandardAnnotation { +public class GLstats extends InfoFieldAnnotation implements StandardAnnotation { private static final int MIN_SAMPLES = 10; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { final Map genotypes = vc.getGenotypes(); if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index b175579f1..24571aee1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import net.sf.samtools.SAMRecord; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -48,13 +50,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { +public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation { private final static boolean DEBUG = false; private final static int MIN_CONTEXT_WING_SIZE = 10; private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50; private final static char REGEXP_WILDCARD = '.'; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if (stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index d86728d5e..292a6c5e5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -1,6 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -18,13 +20,13 @@ import java.util.List; import java.util.Map; -public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotation { +public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation { private static final int MIN_SAMPLES = 10; private static final int MIN_GENOTYPE_QUALITY = 10; private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { final Map genotypes = vc.getGenotypes(); if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 02efd854c..97ac3ab6d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -16,11 +18,11 @@ import java.util.List; import java.util.Map; -public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation { +public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation { private boolean ANNOTATE_INDELS = true; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( !vc.isBiallelic() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index 2fd62ddf3..7308f7f25 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -19,9 +21,9 @@ import java.util.*; * Time: 11:47:33 AM * To change this template use File | Settings | File Templates. */ -public class IndelType implements InfoFieldAnnotation, ExperimentalAnnotation { +public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { int run; if (vc.isMixed()) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index 1d999c531..492578839 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -16,9 +18,9 @@ import java.util.List; import java.util.Map; -public class LowMQ implements InfoFieldAnnotation { +public class LowMQ extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index f240d02bc..2c5314822 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -18,9 +20,9 @@ import java.util.List; import java.util.Map; -public class MappingQualityZero implements InfoFieldAnnotation, StandardAnnotation { +public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java index 0ca53adf2..f4d80f554 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -49,9 +51,9 @@ import java.util.Map; * Time: 6:46:25 PM * To change this template use File | Settings | File Templates. */ -public class MappingQualityZeroBySample implements GenotypeAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, - AlignmentContext context, VariantContext vc, Genotype g) { +public class MappingQualityZeroBySample extends GenotypeAnnotation { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, + ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) { if ( g == null || !g.isCalled() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java index 08a25a7e3..2b8296778 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -18,9 +20,9 @@ import java.util.Map; -public class MappingQualityZeroFraction implements InfoFieldAnnotation, ExperimentalAnnotation { +public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index 1c70a1b33..3ce01bc2a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -21,8 +23,8 @@ import java.util.Map; * Date: 5/16/11 */ -public class NBaseCount implements InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { +public class NBaseCount extends InfoFieldAnnotation { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 2175d39e6..7b97a9c38 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -16,9 +18,9 @@ import java.util.List; import java.util.Map; -public class QualByDepth extends AnnotationByDepth implements InfoFieldAnnotation, StandardAnnotation { +public class QualByDepth extends AnnotationByDepth implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index d52f07b58..38345a1c4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -20,9 +22,9 @@ import java.util.List; import java.util.Map; -public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotation { +public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 5466828f6..cad10c77d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -21,11 +23,11 @@ import java.util.Map; -public abstract class RankSumTest implements InfoFieldAnnotation, StandardAnnotation { +public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation { static final double INDEL_LIKELIHOOD_THRESH = 0.1; static final boolean DEBUG = false; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java index c56e2622d..68cc86478 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -52,13 +54,13 @@ import java.util.Map; * Time: 3:59:27 PM * To change this template use File | Settings | File Templates. */ -public class ReadDepthAndAllelicFractionBySample implements GenotypeAnnotation { +public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation { private static String REF_ALLELE = "REF"; private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { if ( g == null || !g.isCalled() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java index a5ebd8db2..662b5cdce 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -18,7 +20,7 @@ import java.util.Map; public class SBByDepth extends AnnotationByDepth { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index ff9092a71..0977a041f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -41,9 +43,9 @@ import java.util.List; import java.util.Map; -public class SampleList implements InfoFieldAnnotation { +public class SampleList extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( vc.isMonomorphic() || !vc.hasGenotypes() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java new file mode 100644 index 000000000..26a9b2edd --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants; +import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; + +/** + * A set of genomic annotations based on the output of the SnpEff variant effect predictor tool + * (http://snpeff.sourceforge.net/). + * + * For each variant, chooses one of the effects of highest biological impact from the SnpEff + * output file (which must be provided on the command line via --snpEffFile:SnpEff ), + * and adds annotations on that effect. + * + * The possible biological effects and their associated impacts are defined in the class: + * org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants + * + * @author David Roazen + */ +public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation { + + // SnpEff annotation key names: + public static final String GENE_ID_KEY = "GENE_ID"; + public static final String GENE_NAME_KEY = "GENE_NAME"; + public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID"; + public static final String EXON_ID_KEY = "EXON_ID"; + public static final String EXON_RANK_KEY = "EXON_RANK"; + public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE"; + public static final String EFFECT_KEY = "EFFECT"; + public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT"; + public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION"; + public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA"; + public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON"; + public static final String CODON_NUM_KEY = "CODON_NUM"; + public static final String CDS_SIZE_KEY = "CDS_SIZE"; + + public static final String SNPEFF_ROD_NAME = "snpEffFile"; + + public Map annotate ( RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { + RodBinding snpEffRodBinding = (RodBinding)rodBindings.get(SNPEFF_ROD_NAME); + List features = tracker.getValues(snpEffRodBinding); + + // Add only annotations for one of the most biologically-significant effects as defined in + // the SnpEffConstants class: + SnpEffFeature mostSignificantEffect = getMostSignificantEffect(features); + + if ( mostSignificantEffect == null ) { + return null; + } + + return generateAnnotations(mostSignificantEffect); + } + + private SnpEffFeature getMostSignificantEffect ( List snpEffFeatures ) { + SnpEffFeature mostSignificantEffect = null; + + for ( SnpEffFeature snpEffFeature : snpEffFeatures ) { + if ( mostSignificantEffect == null || + snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) { + + mostSignificantEffect = snpEffFeature; + } + } + + return mostSignificantEffect; + } + + private Map generateAnnotations ( SnpEffFeature mostSignificantEffect ) { + Map annotations = new LinkedHashMap(Utils.optimumHashSize(getKeyNames().size())); + + if ( mostSignificantEffect.hasGeneID() ) + annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID()); + if ( mostSignificantEffect.hasGeneName() ) + annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName()); + if ( mostSignificantEffect.hasTranscriptID() ) + annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID()); + if ( mostSignificantEffect.hasExonID() ) + annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID()); + if ( mostSignificantEffect.hasExonRank() ) + annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank())); + if ( mostSignificantEffect.isNonCodingGene() ) + annotations.put(WITHIN_NON_CODING_GENE_KEY, null); + + annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString()); + annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString()); + if ( mostSignificantEffect.hasEffectExtraInformation() ) + annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation()); + + if ( mostSignificantEffect.hasOldAndNewAA() ) + annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA()); + if ( mostSignificantEffect.hasOldAndNewCodon() ) + annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon()); + if ( mostSignificantEffect.hasCodonNum() ) + annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum())); + if ( mostSignificantEffect.hasCdsSize() ) + annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize())); + + return annotations; + } + + public List getKeyNames() { + return Arrays.asList( GENE_ID_KEY, + GENE_NAME_KEY, + TRANSCRIPT_ID_KEY, + EXON_ID_KEY, + EXON_RANK_KEY, + WITHIN_NON_CODING_GENE_KEY, + EFFECT_KEY, + EFFECT_IMPACT_KEY, + EFFECT_EXTRA_INFORMATION_KEY, + OLD_NEW_AA_KEY, + OLD_NEW_CODON_KEY, + CODON_NUM_KEY, + CDS_SIZE_KEY + ); + } + + public List getDescriptions() { + return Arrays.asList( + new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID"), + new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name"), + new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID"), + new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID"), + new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank"), + new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If present, gene is non-coding"), + new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "One of the most high-impact effects across all transcripts at this site"), + new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the effect " + Arrays.toString(SnpEffConstants.EffectImpact.values())), + new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the effect"), + new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid"), + new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon"), + new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number"), + new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size") + ); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index a4668eeb6..c11634c94 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -16,9 +18,9 @@ import java.util.List; import java.util.Map; -public class SpanningDeletions implements InfoFieldAnnotation, StandardAnnotation { +public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java index b46d82d8b..12e7259a9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -24,12 +26,12 @@ import java.util.Map; * Time: 3:14 PM * To change this template use File | Settings | File Templates. */ -public class TechnologyComposition implements ExperimentalAnnotation,InfoFieldAnnotation { +public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation { private String nSLX = "NumSLX"; private String n454 ="Num454"; private String nSolid = "NumSOLiD"; private String nOther = "NumOther"; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index acbeee3b2..ce9b9a5f0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -25,9 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Output; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -39,6 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @@ -49,12 +51,42 @@ import java.util.*; /** * Annotates variant calls with context information. Users can specify which of the available annotations to use. */ -@Requires(value={},referenceMetaData=@RMD(name="variant",type=VariantContext.class)) +@Requires(value={}) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @Reference(window=@Window(start=-50,stop=50)) @By(DataSource.REFERENCE) public class VariantAnnotator extends RodWalker { + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + + /** + * A SnpEff output file from which to add annotations. + * + * The INFO field will be annotated with information on the most biologically-significant effect + * listed in the SnpEff output file for each variant. + */ + @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file", required=false) + public RodBinding snpEffFile = RodBinding.makeUnbound(SnpEffFeature.class); + + /** + * A dbSNP VCF file from which to annotate. + * + * rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. + */ + @ArgumentCollection + protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); + + /** + * A comparisons VCF file from which to annotate. + * + * If a record in the 'variant' track overlaps with a record from the provided comp track, the INFO field will be annotated + * as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field). Records that are filtered in the comp track will be ignored. + * Note that 'dbSNP' has been special-cased (see the --dbsnp argument). + */ + @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false) + public RodBinding comps = RodBinding.makeUnbound(VariantContext.class); + @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; @@ -90,6 +122,8 @@ public class VariantAnnotator extends RodWalker { private Collection indelBufferContext; + private Map> rodBindings = new HashMap>(); + private void listAnnotationsAndExit() { List> infoAnnotationClasses = new PluginManager(InfoFieldAnnotation.class).getPlugins(); @@ -118,8 +152,7 @@ public class VariantAnnotator extends RodWalker { listAnnotationsAndExit(); // get the list of all sample names from the variant VCF input rod, if applicable - Set rodName = new HashSet(); - rodName.add("variant"); + List rodName = Arrays.asList(variantCollection.variants.getName()); Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName); // add the non-VCF sample from the command-line, if applicable @@ -133,17 +166,19 @@ public class VariantAnnotator extends RodWalker { logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired."); } + initializeRodBindingMap(); + if ( USE_ALL_ANNOTATIONS ) - engine = new VariantAnnotatorEngine(getToolkit()); + engine = new VariantAnnotatorEngine(getToolkit(), rodBindings); else - engine = new VariantAnnotatorEngine(getToolkit(), annotationGroupsToUse, annotationsToUse); + engine = new VariantAnnotatorEngine(getToolkit(), annotationGroupsToUse, annotationsToUse, rodBindings); engine.initializeExpressions(expressionsToUse); // setup the header fields // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones Set hInfo = new HashSet(); hInfo.addAll(engine.getVCFAnnotationDescriptions()); - for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList("variant")) ) { + for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variantCollection.variants.getName())) ) { if ( isUniqueHeaderLine(line, hInfo) ) hInfo.add(line); } @@ -156,6 +191,13 @@ public class VariantAnnotator extends RodWalker { } } + private void initializeRodBindingMap() { + rodBindings.put(variantCollection.variants.getName(), variantCollection.variants); + rodBindings.put(snpEffFile.getName(), snpEffFile); + rodBindings.put(dbsnp.dbsnp.getName(), dbsnp.dbsnp); + rodBindings.put(comps.getName(), comps); + } + public static boolean isUniqueHeaderLine(VCFHeaderLine line, Set currentSet) { if ( !(line instanceof VCFCompoundHeaderLine) ) return true; @@ -202,7 +244,7 @@ public class VariantAnnotator extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false); + Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation()); if ( VCs.size() == 0 ) return 0; @@ -219,18 +261,18 @@ public class VariantAnnotator extends RodWalker { if ( stratifiedContexts != null ) { annotatedVCs = new ArrayList(VCs.size()); for ( VariantContext vc : VCs ) - annotatedVCs.addAll(engine.annotateContext(tracker, ref, stratifiedContexts, vc)); + annotatedVCs.add(engine.annotateContext(tracker, ref, stratifiedContexts, vc)); } } if ( ! indelsOnly ) { for ( VariantContext annotatedVC : annotatedVCs ) - vcfWriter.add(annotatedVC, ref.getBase()); + vcfWriter.add(annotatedVC); } else { // check to see if the buffered context is different (in location) this context if ( indelBufferContext != null && ! VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),annotatedVCs.iterator().next())) ) { for ( VariantContext annotatedVC : indelBufferContext ) - vcfWriter.add(annotatedVC, ref.getBase()); + vcfWriter.add(annotatedVC); indelBufferContext = annotatedVCs; } else { indelBufferContext = annotatedVCs; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index fdf498a3d..cae9ab00c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -25,14 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; -import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.GenomicAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.JoinTable; +import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationInterfaceManager; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; @@ -45,7 +45,6 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -import java.util.Map.Entry; public class VariantAnnotatorEngine { @@ -57,20 +56,7 @@ public class VariantAnnotatorEngine { private List requestedExpressions = new ArrayList(); private HashMap dbAnnotations = new HashMap(); - - // command-line option from GenomicAnnotator. - private Map> requestedColumnsMap; - - // command-line option from GenomicAnnotator. - private boolean oneToMany; - - // command-line option from GenomicAnnotator. - private List joinTables; - - // used by GenomicAnnotator. Maps binding name to number of output VCF records - // annotated with records from the input table with this binding name. Only used for - // printing out stats at the end. - private Map inputTableHitCounter = new HashMap(); + private Map> rodBindings; private static class VAExpression { public String fullName, bindingName, fieldName; @@ -87,16 +73,18 @@ public class VariantAnnotatorEngine { } // use this constructor if you want all possible annotations - public VariantAnnotatorEngine(GenomeAnalysisEngine engine) { + public VariantAnnotatorEngine(GenomeAnalysisEngine engine, Map> rodBindings) { requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations(); requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations(); initializeDBs(engine); + this.rodBindings = rodBindings; } // use this constructor if you want to select specific annotations (and/or interfaces) - public VariantAnnotatorEngine(GenomeAnalysisEngine engine, List annotationGroupsToUse, List annotationsToUse) { + public VariantAnnotatorEngine(GenomeAnalysisEngine engine, List annotationGroupsToUse, List annotationsToUse, Map> rodBindings) { initializeAnnotations(annotationGroupsToUse, annotationsToUse); initializeDBs(engine); + this.rodBindings = rodBindings; } // select specific expressions to use @@ -140,7 +128,7 @@ public class VariantAnnotatorEngine { return descriptions; } - public Collection annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public VariantContext annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { Map infoAnnotations = new LinkedHashMap(vc.getAttributes()); @@ -150,42 +138,18 @@ public class VariantAnnotatorEngine { // annotate expressions where available annotateExpressions(tracker, ref, infoAnnotations); - // process the info field - List> infoAnnotationOutputsList = new LinkedList>(); //each element in infoAnnotationOutputs corresponds to a single line in the output VCF file - infoAnnotationOutputsList.add(new LinkedHashMap(vc.getAttributes())); //keep the existing info-field annotations. After this infoAnnotationOutputsList.size() == 1, which means the output VCF file has 1 additional line. - infoAnnotationOutputsList.get(0).putAll(infoAnnotations); // put the DB membership info in - // go through all the requested info annotationTypes - for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) - { - Map annotationsFromCurrentType = annotationType.annotate(tracker, ref, stratifiedContexts, vc); - if ( annotationsFromCurrentType == null ) { - continue; - } - - if(annotationType instanceof GenomicAnnotation) - { - infoAnnotationOutputsList = processGenomicAnnotation( infoAnnotationOutputsList, annotationsFromCurrentType ); - } - else - { - // add the annotations to each output line. - for(Map infoAnnotationOutput : infoAnnotationOutputsList) { - infoAnnotationOutput.putAll(annotationsFromCurrentType); - } - } + for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) { + Map annotationsFromCurrentType = annotationType.annotate(tracker, rodBindings, ref, stratifiedContexts, vc); + if ( annotationsFromCurrentType != null ) + infoAnnotations.putAll(annotationsFromCurrentType); } - // annotate genotypes - Map genotypes = annotateGenotypes(tracker, ref, stratifiedContexts, vc); + // generate a new annotated VC + final VariantContext annotatedVC = VariantContext.modifyAttributes(vc, infoAnnotations); - // create a separate VariantContext (aka. output line) for each element in infoAnnotationOutputsList - Collection returnValue = new LinkedList(); - for(Map infoAnnotationOutput : infoAnnotationOutputsList) { - returnValue.add( new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, infoAnnotationOutput) ); - } - - return returnValue; + // annotate genotypes, creating another new VC in the process + return VariantContext.modifyGenotypes(annotatedVC, annotateGenotypes(tracker, ref, stratifiedContexts, vc)); } private void annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map infoAnnotations) { @@ -194,16 +158,16 @@ public class VariantAnnotatorEngine { String rsID = null; if (vc.isSNP()) - rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); + rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME), true); else if (vc.isIndel()) - rsID = DbSNPHelper.rsIDOfFirstRealIndel(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); + rsID = DbSNPHelper.rsIDOfFirstRealIndel(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null ); // annotate dbsnp id if available and not already there if ( rsID != null && (!vc.hasID() || vc.getID().equals(VCFConstants.EMPTY_ID_FIELD)) ) infoAnnotations.put(VariantContext.ID_KEY, rsID); } else { boolean overlapsComp = false; - for ( VariantContext comp : tracker.getVariantContexts(ref, dbSet.getKey(), null, ref.getLocus(), false, false) ) { + for ( VariantContext comp : tracker.getValues(VariantContext.class, dbSet.getKey()) ) { if ( !comp.isFiltered() ) { overlapsComp = true; break; @@ -216,7 +180,7 @@ public class VariantAnnotatorEngine { private void annotateExpressions(RefMetaDataTracker tracker, ReferenceContext ref, Map infoAnnotations) { for ( VAExpression expression : requestedExpressions ) { - Collection VCs = tracker.getVariantContexts(ref, expression.bindingName, null, ref.getLocus(), false, true); + Collection VCs = tracker.getValues(VariantContext.class, expression.bindingName); if ( VCs.size() == 0 ) continue; @@ -241,7 +205,7 @@ public class VariantAnnotatorEngine { Map genotypeAnnotations = new HashMap(genotype.getAttributes()); for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) { - Map result = annotation.annotate(tracker, ref, context, vc, genotype); + Map result = annotation.annotate(tracker, rodBindings, ref, context, vc, genotype); if ( result != null ) genotypeAnnotations.putAll(result); } @@ -251,6 +215,9 @@ public class VariantAnnotatorEngine { return genotypes; } + +/* + // Finish processing data from GenomicAnnotation. private List> processGenomicAnnotation( List> infoAnnotationOutputsList, Map annotationsForCurrentLocusFromAllAnnotatorInputTables) { @@ -403,12 +370,14 @@ public class VariantAnnotatorEngine { incrementStatsCounter(bindingName, infoAnnotationOutputsList.size()); } - /** + */ +/** * Records statistics that will be printed when GenomicAnnotator finishes. * * @param bindingName The table from which annotations were gotten * @param numNewRecords The number of new output VCF records created with annotations from this table - */ + *//* + private void incrementStatsCounter( final String bindingName, int numNewRecords) { //record some stats - there were infoAnnotationOutputsList.size() output VCF records annotated with data from the 'bindingName' input table. Integer counter = inputTableHitCounter.get(bindingName); @@ -453,13 +422,15 @@ public class VariantAnnotatorEngine { } - /** + */ +/** * Records statistics for the explodeInfoAnnotationOutputsList(..) calculation. * @param bindingName The table from which annotations were gotten * @param numNewVCFRecordsAnnotatedWithBindingNameData The number of new output VCF records created with annotations from this table * @param infoAnnotationOutputsList output list * @param matchingRecordsSize matching records size - */ + *//* + private void recordStats( final String bindingName, int numNewVCFRecordsAnnotatedWithBindingNameData, final List> infoAnnotationOutputsList, int matchingRecordsSize ) { //update stats for the 'bindingName' table @@ -509,13 +480,14 @@ public class VariantAnnotatorEngine { } - /** + */ +/** * Determines whether to exclude the given column from the annotations. * @param key The fully qualified columnName * @return Whether the -S arg specifies that this column should be included in the annotations. * - * TODO this function can be optimized through memoization - */ + *//* + private boolean isKeyFilteredOutBySelectArg(String key) { for(final String bindingName : requestedColumnsMap.keySet()) { @@ -536,10 +508,8 @@ public class VariantAnnotatorEngine { return false; //the -S arg doesn't have anything with the same binding name as this key, so the user implicitly requested this key } - - - - /** + */ +/** * Determines how the engine will handle the case where multiple records in a ROD file * overlap a particular single locus. If oneToMany is set to true, the output will be * one-to-many, so that each locus in the input VCF file could result in multiple @@ -551,18 +521,21 @@ public class VariantAnnotatorEngine { * See class-level comments for more details. * * @param oneToMany true if we should break out from one to many - */ + *//* + public void setOneToMany(boolean oneToMany) { this.oneToMany = oneToMany; } - /** + */ +/** * Sets the columns that will be used for the info annotation field. * Column names should be of the form bindingName.columnName (eg. dbsnp.avHet). * * @param columns An array of strings where each string is a comma-separated list * of columnNames (eg ["dbsnp.avHet,dbsnp.valid", "file2.col1,file3.col1"] ). - */ + *//* + public void setRequestedColumns(String[] columns) { if(columns == null) { throw new IllegalArgumentException("columns arg is null. Please check the -s command-line arg."); @@ -574,17 +547,20 @@ public class VariantAnnotatorEngine { } - /** + */ +/** * Passes in a pointer to the JoinTables. * * @param joinTables The list of JoinTables. There should be one JoinTable object for each -J arg. - */ + *//* + public void setJoinTables(List joinTables) { this.joinTables = joinTables; } - /** + */ +/** * Parses the columns arg and returns a Map of columns hashed by their binding name. * For example: * The command line: @@ -604,7 +580,8 @@ public class VariantAnnotatorEngine { * @param columnsArg The -s command line arg value. * * @return Map representing a parsed version of this arg - see above. - */ + *//* + private static Map> parseColumnsArg(String[] columnsArg) { Map> result = new HashMap>(); @@ -635,5 +612,6 @@ public class VariantAnnotatorEngine { return Collections.unmodifiableMap(inputTableHitCounter); } +*/ } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java deleted file mode 100644 index 05c1b3c52..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.*; -import java.util.Map.Entry; - -/** - * This plugin for {@link VariantAnnotatorEngine} serves as the core - * of the {@link GenomicAnnotator}. It finds all records in the -B input files - * that match the given variant's position and, optionally, the variant's reference and alternate alleles. - * - * For details, see: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator - */ -public class GenomicAnnotation implements InfoFieldAnnotation { - - public static final String CHR_COLUMN = "chr"; - public static final String START_COLUMN = "start"; - public static final String END_COLUMN = "end"; - public static final String HAPLOTYPE_REFERENCE_COLUMN = "haplotypeReference"; - public static final String HAPLOTYPE_ALTERNATE_COLUMN = "haplotypeAlternate"; - - public static final String NUM_MATCHES_SPECIAL_INFO_FIELD = "numMatchingRecords"; - - /** Characters that aren't allowed within VCF info field key-value pairs */ - public static final char[] ILLEGAL_INFO_FIELD_VALUES = { ' ', '=', ';' }; - /** Replacement for each character in ILLEGAL_INFO_FIELD_VALUES */ - public static final char[] ILLEGAL_INFO_FIELD_VALUE_SUBSTITUTES = { '_', '-', '!' }; - - - private void modifyAnnotationsForIndels(VariantContext vc, String featureName, Map annotationsForRecord) { - String inCodingRegionKey = featureName + ".inCodingRegion"; - String referenceCodonKey = featureName + ".referenceCodon"; - String variantCodonKey = featureName + ".variantCodon"; - String codingCoordStrKey = featureName + ".codingCoordStr"; - String proteinCoordStrKey = featureName + ".proteinCoordStr"; - String haplotypeReferenceKey = featureName + "." + HAPLOTYPE_REFERENCE_COLUMN; - String haplotypeAlternateKey = featureName + "." + HAPLOTYPE_ALTERNATE_COLUMN; - String functionalClassKey = featureName + ".functionalClass"; - String startKey = featureName + "." + START_COLUMN; - String endKey = featureName + "." + END_COLUMN; - String referenceAAKey = featureName + ".referenceAA"; - String variantAAKey = featureName + ".variantAA"; - String changesAAKey = featureName + ".changesAA"; - - annotationsForRecord.put(variantCodonKey, "unknown"); - annotationsForRecord.put(codingCoordStrKey, "unknown"); - annotationsForRecord.put(proteinCoordStrKey, "unknown"); - annotationsForRecord.put(referenceAAKey, "unknown"); - annotationsForRecord.put(variantAAKey, "unknown"); - - String refAllele = vc.getReference().getDisplayString(); - if (refAllele.length() == 0) { refAllele = "-"; } - - String altAllele = vc.getAlternateAllele(0).toString(); - if (altAllele.length() == 0) { altAllele = "-"; } - - annotationsForRecord.put(haplotypeReferenceKey, refAllele); - annotationsForRecord.put(haplotypeAlternateKey, altAllele); - annotationsForRecord.put(startKey, String.format("%d", vc.getStart())); - annotationsForRecord.put(endKey, String.format("%d", vc.getEnd())); - - boolean isCodingRegion = annotationsForRecord.containsKey(inCodingRegionKey) && annotationsForRecord.get(inCodingRegionKey).equalsIgnoreCase("true") ? true : false; - boolean isFrameshift = (vc.getIndelLengths().get(0) % 3 == 0) ? false : true; - - String functionalClass; - if (isCodingRegion) { - functionalClass = isFrameshift ? "frameshift" : "inframe"; - annotationsForRecord.put(changesAAKey, "true"); - } else { - functionalClass = "noncoding"; - } - - annotationsForRecord.put(functionalClassKey, functionalClass); - } - - /** - * For each -B input file, for each record which overlaps the current locus, generates a - * set of annotations of the form: - * - * bindingName.columnName1=columnValue, bindingName.columnName2=columnValue2, etc. - * - * For example: dbSNP.avHet=0.7, dbSNP.ref_allele=A, etc. - * - * @return The following is an explanation of this method's return value: - * - * The annotations from a matching in a particular file are stored in a Map - * where the key is bindingName.columnName and the value is the columnValue. - * Since a single input file can have multiple records that overlap the current - * locus (eg. dbSNP can have multiple entries for the same genomic position), a different - * Map is created for each matching record in a particular file. - * The set of matching records for each file is then represented as a List> - * - * The return value of this method is a Map of the form: - * rodName1 -> List> - * rodName2 -> List> - * rodName3 -> List> - * ... - * Where the rodNames are the -B binding names for each file that were specified on the command line (eg. -B bindingName,AnnotatorInputTable,/path/to/file). - * - * NOTE: The lists (List>) are guaranteed to have size > 0 - * because a rodName -> List> entry will only - * be created in Map if the List has at least one element. - */ - public Map annotate(final RefMetaDataTracker tracker, - final ReferenceContext ref, - final Map stratifiedContexts, - final VariantContext vc) { - - //iterate over each record that overlaps the current locus, and, if it passes certain filters, - //add its values to the list of annotations for this locus. - final Map annotations = new HashMap(); - for(final GATKFeature gatkFeature : tracker.getAllRods()) - { - final String name = gatkFeature.getName(); - if( name.equals("variant") || name.equals("interval") ) { - continue; - } - - if( ! (gatkFeature.getUnderlyingObject() instanceof AnnotatorInputTableFeature) ) { - continue; //GenericAnnotation only works with TabularRODs because it needs to be able to select individual columns. - } - - final Map annotationsForRecord = convertRecordToAnnotations( gatkFeature.getName(), ((AnnotatorInputTableFeature) gatkFeature.getUnderlyingObject()).getColumnValues()); - - //If this record contains the HAPLOTYPE_REFERENCE_COLUMN and/or HAPLOTYPE_ALTERNATE_COLUMN, check whether the - //alleles specified match the the variant's reference allele and alternate allele. - //If they don't match, this record will be skipped, and its values will not be used for annotations. - // - //If one of these columns doesn't exist in the current rod, or if its value is * (star), then this is treated as an automatic match. - //Otherwise, the HAPLOTYPE_REFERENCE_COLUMN is only considered to be matching the variant's reference if the string values of the two - //are exactly equal (case-insensitive). - - //The HAPLOTYPE_REFERENCE_COLUMN matches the variant's reference allele based on a case-insensitive string comparison. - //The HAPLOTYPE_ALTERNATE_COLUMN can optionally list more than allele separated by one of these chars: ,\/:| - // only check this value for SNPs - String hapAltValue = vc.isSNP() ? annotationsForRecord.get( generateInfoFieldKey(name, HAPLOTYPE_ALTERNATE_COLUMN) ) : null; - if ( hapAltValue != null && !hapAltValue.equals("*") ) { - Set alternateAlleles = vc.getAlternateAlleles(); - //if(alternateAlleles.isEmpty()) { - //handle a site that has been called monomorphic reference - //alternateAlleles.add(vc.getReference()); - //continue; //TODO If this site is monomorphic in the VC, and the current record specifies a particular alternate allele, skip this record. Right? - //} else - if(alternateAlleles.size() > 1) { - throw new UserException.MalformedFile("File associated with " + vc.getSource() + " contains record [" + vc + "] contains " + alternateAlleles.size() + " alternate alleles. GenomicAnnotion currently only supports annotating 1 alternate allele."); - } - - Allele vcAlt; - if(alternateAlleles.isEmpty()) { - vcAlt = vc.getReference(); - } else { - vcAlt = alternateAlleles.iterator().next(); - } - - boolean matchFound = false; - for(String hapAlt : hapAltValue.split("[,\\\\/:|]")) { - - if(!hapAlt.isEmpty() && vcAlt.basesMatch(hapAlt)) { - matchFound = true; - break; - } - } - if(!matchFound) { - continue; //skip record - none of its alternate alleles match the variant's alternate allele - } - } - - // only check this value for SNPs - String hapRefValue = vc.isSNP() ? annotationsForRecord.get( generateInfoFieldKey(name, HAPLOTYPE_REFERENCE_COLUMN) ) : null; - if(hapRefValue != null) - { - hapRefValue = hapRefValue.trim(); - if(!hapRefValue.equals("*")) - { - //match against hapolotypeReference. - Allele vcRef = vc.getReference(); - if(!vcRef.basesMatch(hapRefValue)) { - continue; //skip record - } - } - } - - if (vc.isIndel()) { - modifyAnnotationsForIndels(vc, name, annotationsForRecord); - } - - //filters passed, so add this record. - List> listOfMatchingRecords = (List>) annotations.get( name ); - if(listOfMatchingRecords == null) { - listOfMatchingRecords = new LinkedList>(); - listOfMatchingRecords.add( annotationsForRecord ); - annotations.put(name, listOfMatchingRecords); - } else { - listOfMatchingRecords.add( annotationsForRecord ); - } - } - - return annotations; - } - - - - - /** - * Converts the given record to a set of key-value pairs of the form: - * bindingName.columnName1=column1Value, bindingName.columnName2=column2Value - * (eg. dbSNP.avHet=0.7, dbSNP.ref_allele=A) - * - * @param record AnnotatorInputTableFeature corresponding to one record in one -B input file. - * @param bindingName The binding name of the given AnnotatorInputTableFeature. - * @return The map of columnName -> columnValue pairs. - */ - public static Map convertRecordToAnnotations( String bindingName, Map record) { - final Map result = new HashMap(); - - for(final Entry entry : record.entrySet()) { - final String value = entry.getValue(); - if(!value.trim().isEmpty()) { - result.put( generateInfoFieldKey(bindingName, entry.getKey()), scrubInfoFieldValue(entry.getValue())); - } - } - - return result; - } - - /** - * Combines the 2 values into a full key. - * @param rodBindingName -B name - * @param columnName column name - * @return info field key - */ - public static String generateInfoFieldKey(String rodBindingName, String columnName ) { - return rodBindingName + '.' + columnName; - } - - - - /** - * Replaces any characters that are not allowed in the info field of a VCF file. - * - * @param value info field value - * @return the value with any illegal characters replaced by legal ones. - */ - private static String scrubInfoFieldValue(String value) { - for(int i = 0; i < GenomicAnnotation.ILLEGAL_INFO_FIELD_VALUES.length; i++) { - value = value.replace(GenomicAnnotation.ILLEGAL_INFO_FIELD_VALUES[i], GenomicAnnotation.ILLEGAL_INFO_FIELD_VALUE_SUBSTITUTES[i]); - } - - return value; - } - - - - public List getDescriptions() { - return Arrays.asList(new VCFInfoHeaderLine("GenericAnnotation", 1, VCFHeaderLineType.Integer, "For each variant in the 'variants' ROD, finds all entries in the other -B files that overlap the variant's position.")); - } - - public List getKeyNames() { - return Arrays.asList("GenericAnnotation"); - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java deleted file mode 100644 index b42310780..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; -import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.io.File; -import java.io.IOException; -import java.util.*; -import java.util.Map.Entry; - -/** - * Annotates variant calls with information from user-specified tabular files. - * - * For details, see: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator - */ -@Requires(value={DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=VariantContext.class)) -@By(DataSource.REFERENCE) -public class GenomicAnnotator extends RodWalker implements TreeReducible { - - @Output(doc="File to which variants should be written",required=true) - protected VCFWriter vcfWriter = null; - - @Argument(fullName="vcfOutput", shortName="vcf", doc="Please use --out instead", required=false) - @Deprecated - protected String oldOutArg; - - @Argument(fullName="sampleName", shortName="sample", doc="The sample (NA-ID) corresponding to the variant input (for non-VCF input only)", required=false) - protected String sampleName = null; - - @Argument(fullName="select", shortName="s", doc="Optionally specifies which subset of columns from which -B inputs should be used for annotations. For example, -B:mydbsnp,AnnotatorInputTable /path/to/mydbsnp.txt -B:mytable,AnnotatorInputTable /path/mytable.txt -s mydbsnp.avHet,mydbsnp.name,mytable.column3 will cause annotations to only be generated from the 3 columns specified using -s.", required=false) - protected String[] SELECT_COLUMNS = {}; - - @Argument(fullName="join", shortName="J", doc="Optionally specifies a file and column within that file that should be LEFT-JOIN'ed to a column in a previously-specified file. The file provided to -J must be tab-delimited, with the first non-comment/non-empty line containing column names. (example: -B:name,AnnotatorInputTable /path/to/file1 -J name2,/path/to/file2,name.columnName=name2.columnName2 - this will join the table in file2 to the table in file1) ", required=false) - protected String[] JOIN_ARGS = {}; - - @Argument(fullName="oneToMany", shortName="m", doc="If more than one record from the same file matches a particular locus (for example, multiple dbSNP records with the same position), create multiple entries in the ouptut VCF file - one for each match. If a particular tabular file has J matches, and another tabular file has K matches for a given locus, then J*K output VCF records will be generated - one for each pair of K, J. If this flag is not provided, the multiple records are still generated, but they are stored in the INFO field of a single output VCF record, with their annotation keys differentiated by appending '_i' with i varying from 1 to K*J. ", required=false) - protected Boolean ONE_TO_MANY = false; - - @Argument(fullName="maxJoinTableSize", shortName="maxJoin", doc="The maximum allowed size (i.e. number of rows) for a table provided with the -J argument", required=false) - protected Integer MAX_JOIN_TABLE_SIZE = 500000; - - @Argument(fullName="ignoreFilteredSites", shortName="noFilt", doc="If specified, don't annotate sites marked as filtered out") - protected Boolean IGNORE_FILTERED_SITES = false; - - private VariantAnnotatorEngine engine; - - /** - * Prepare the output file and the list of available features. - */ - public void initialize() { - - //read all ROD file headers and construct a set of all column names to be used for validation of command-line args - final Set allFullyQualifiedColumnNames = new LinkedHashSet(); - final Set allBindingNames = new LinkedHashSet(); - for(ReferenceOrderedDataSource ds : getToolkit().getRodDataSources()) { - if(! ds.getType().equals(AnnotatorInputTableCodec.class)) { - continue; //skip all non-AnnotatorInputTable files. - } - final String bindingName = ds.getName(); - File file = ds.getFile(); - allBindingNames.add(bindingName); - try { - final ArrayList header = AnnotatorInputTableCodec.readHeader(file); - for(String columnName : header) { - allFullyQualifiedColumnNames.add(bindingName + "." + columnName); - } - } catch(IOException e) { - throw new UserException.CouldNotReadInputFile(file, "Failed when attempting to read file header. ", e); - } - } - - //parse the JOIN_COLUMNS args, read in the specified files, and validate column names in the = relation. This end result of this loop is to populate the List of joinTables with one entry per -J arg. - final List joinTables = new LinkedList(); - for(String joinArg : JOIN_ARGS) { - - //parse the tokens - final String[] arg = joinArg.split(","); - if(arg.length != 3) { - throw new UserException.BadArgumentValue("-J", "The following -J arg: \"" + joinArg + "\" must contain 3 comma-separated values. (ex: -J name,/path/to/file,name.columnName=name2.columnName2)"); - } - final String bindingName = arg[0]; - final String filename = arg[1]; - final String columnsToJoin = arg[2]; - - if(allBindingNames.contains(bindingName)) { - throw new UserException.BadArgumentValue("-J", "The name \"" + bindingName + "\" in the -J arg: \"" + joinArg + "\" has already been used in another binding."); - } - - String[] splitOnEquals = columnsToJoin.split("=+"); - if(splitOnEquals.length != 2) { - throw new UserException.BadArgumentValue("-J", "The -J arg: \"" + joinArg + "\" must specify the columns to join on. (ex: -J name,/path/to/file,name.columnName=name2.columnName2)"); - } - - String[] splitOnDot1 = splitOnEquals[0].split("\\."); - String[] splitOnDot2 = splitOnEquals[1].split("\\."); - if(splitOnDot1.length != 2 || splitOnDot2.length != 2) { - throw new UserException.BadArgumentValue("-J", "The -J arg: \"" + joinArg + "\" must fully specify the columns to join on. (ex: -J name,/path/to/file,name.columnName=name2.columnName2)"); - } - - final String bindingName1 = splitOnDot1[0]; - final String columnName1 = splitOnDot1[1]; - final String bindingName2 = splitOnDot2[0]; - final String columnName2 = splitOnDot2[1]; - - //figure out which of the 2 binding names within the = relation matches the -J bindingName - final String localBindingName = bindingName; //alias - final String localColumnName; - final String externalBindingName; - final String externalColumnName; - if(bindingName1.equals(bindingName)) { - localColumnName = columnName1; - externalBindingName = bindingName2; - externalColumnName = columnName2; - } else if(bindingName2.equals(bindingName)) { - localColumnName = columnName2; - externalBindingName = bindingName1; - externalColumnName = columnName1; - } else { - throw new UserException.BadArgumentValue("-J", "The name \"" + bindingName + "\" in the -J arg: \"" + joinArg + "\" must be specified in one the columns to join on. (ex: -J name,/path/to/file,name.columnName=name2.columnName2)"); - } - - //validate externalColumnName - final String fullyQualifiedExternalColumnName = externalBindingName + '.' + externalColumnName; - if( !allFullyQualifiedColumnNames.contains(fullyQualifiedExternalColumnName) ) { - throw new UserException.BadArgumentValue("-J", "The -J arg: \"" + joinArg + "\" specifies an unknown column name: \"" + fullyQualifiedExternalColumnName + "\""); - } - - //read in the file contents into a JoinTable object - final JoinTable joinTable = new JoinTable(MAX_JOIN_TABLE_SIZE); - joinTable.parseFromFile(filename, localBindingName, localColumnName, externalBindingName, externalColumnName); - joinTables.add(joinTable); - - //validate localColumnName, and add all column names in this file to the list of allFullyQualifiedColumnNames so that they can be referenced from subsequent -J args. - final List columnNames = joinTable.getColumnNames(); - final List fullyQualifiedColumnNames = new LinkedList(); - boolean found = false; - for ( String columnName : columnNames ) { - if ( columnName.equals(localColumnName) ) - found = true; - fullyQualifiedColumnNames.add(localBindingName + '.' + columnName); - } - if ( !found ) - throw new UserException.BadArgumentValue("-J", "The -J arg: \"" + joinArg + "\" specifies an unknown column name: \"" + localColumnName + "\". It's not one of the column names in the header " + columnNames + " of the file: " + filename); - - allFullyQualifiedColumnNames.addAll(fullyQualifiedColumnNames); - } - - //parse the SELECT_COLUMNS arg and validate the column names - List parsedSelectColumns = new LinkedList(); - for ( String token : SELECT_COLUMNS ) - parsedSelectColumns.addAll(Arrays.asList(token.split(","))); - SELECT_COLUMNS = parsedSelectColumns.toArray(SELECT_COLUMNS); - - for ( String columnName : SELECT_COLUMNS ) { - if ( !allFullyQualifiedColumnNames.contains(columnName) ) - throw new UserException.BadArgumentValue("-s", "The column name '" + columnName + "' provided to -s doesn't match any of the column names in any of the -B files. Here is the list of available column names: " + allFullyQualifiedColumnNames); - } - - //instantiate the VariantAnnotatorEngine - ArrayList annotationsToUse = new ArrayList(); - annotationsToUse.add("GenomicAnnotation"); - engine = new VariantAnnotatorEngine(getToolkit(), new ArrayList(), annotationsToUse); - engine.setOneToMany(ONE_TO_MANY); - engine.setRequestedColumns(SELECT_COLUMNS); - engine.setJoinTables(joinTables); - - // set up the header fields - Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList("variant"))); - hInfo.addAll(engine.getVCFAnnotationDescriptions()); - - Set rodName = new HashSet(); - rodName.add("variant"); - Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName); - VCFHeader vcfHeader = new VCFHeader(hInfo, samples); - vcfWriter.writeHeader(vcfHeader); - } - - /** - * Initialize the number of loci processed to zero. - * - * @return 0 - */ - public Integer reduceInit() { return 0; } - - /** - * We want reads that span deletions - * - * @return true - */ - public boolean includeReadsWithDeletionAtLoci() { return true; } - - /** - * For each site of interest, annotate based on the requested annotation types - * - * @param tracker the meta-data tracker - * @param ref the reference base - * @param context the context for the given locus - * @return 1 if the locus was successfully processed, 0 if otherwise - */ - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return 0; - - Set results = new LinkedHashSet(); - for (VariantContext vc : tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false)) { - if ( (vc.isFiltered() && IGNORE_FILTERED_SITES) || - (vc.isVariant() && !vc.isBiallelic()) ) { - results.add(vc); - } else { - Map stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context); - if ( stratifiedContexts != null ) - results.addAll(engine.annotateContext(tracker, ref, stratifiedContexts, vc)); - else - results.add(vc); - } - } - - for ( VariantContext vc : results ) - vcfWriter.add(vc ,ref.getBase()); - - return 1; - } - - public Integer reduce(Integer value, Integer sum) { - return sum + value; - } - - public Integer treeReduce(Integer lhs, Integer rhs) { - return lhs + rhs; - } - - public void onTraversalDone(Integer sum) { - - //out.printf("Generated %d annotated VCF records.\n", totalOutputVCFRecords); - Map inputTableHitCounter = engine.getInputTableHitCounter(); - for ( Entry e : inputTableHitCounter.entrySet() ) { - final String bindingName = e.getKey(); - final int counter = e.getValue(); - //final float percent = 100 * counter /(float) totalOutputVCFRecords; - //out.printf(" %-6.1f%% (%d) annotated with %s.\n", percent, counter, bindingName ); - System.out.printf(" %d annotated with %s.\n", counter, bindingName ); - } - } -} - diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java deleted file mode 100755 index 714f374cf..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -/** - * This is a container that holds all data corresponding to a single join table as specified by one -J arg (ex: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2). - * Some terminology: - * 'bindingName' is an arbitrary label for a given table that is specified on the command line with either the -B or -J arg. - * In the example above, bindingName1 is the 'local' binding name because it is attached to the join table file provided with this -J arg. bindingName2 is the 'external' binding name because - * it corresponds to some other table specified previously with another -B or -J arg. - * - * The JoinTable object stores a map entry for each record in the join table. The entry's key is the value of the join column in a given record (eg. bindingName1.columnName in the above example), - * and the entry value is an ArrayList representing the entire join table record. - * The JoinTable object also stores some other join table parameters such as the column names that were parsed out of the file header, and the bindingNames and columnNames from the -J arg. - * - * The join operation is performed by looking up the value of the join column in the external table (the one that this table is being joined to), and then using this value to do a lookup - * on the map - if there's a hit, it will provide the record from the join table that is to be joined with the record in the external table. - * - * More information can be found here: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator - */ -public class JoinTable -{ - //the list of join table column names parsed out of the file header. - private List columnNames; //not fully-qualified - - private String localBindingName; - private String externalBindingName; - private String externalColumnName; - - //stores a map entry for each record in the join table. The entry's key is the value of the join column in a given record (eg. bindingName.columnName in the above example), - //and the entry value is an ArrayList representing the entire join table record. - private HashMap> joinColumnValueToRecords = new HashMap>(); - - private int maxSize; - private boolean parsedFromFile = false; - - public JoinTable(int maxSize) { - this.maxSize = maxSize; - } - - /** - * Parses the table from the given file using the JoinTableParser. - * - * @param filename The file containing the table. - * @param localBindingName The binding name within the given file to join on. - * @param localColumnName The column name within the given file to join on. - * @param externalBindingName The binding name of another file (previously specified with either -B or -J). - * @param externalColumnName The column name in this other file to join on. - */ - public void parseFromFile(String filename, String localBindingName, String localColumnName, String externalBindingName, String externalColumnName) { - if(parsedFromFile) { - throw new ReviewedStingException("parseFromFile(" + filename +", ..) called more than once"); - } - parsedFromFile = true; - - setLocalBindingName(localBindingName); - setExternalBindingName(externalBindingName); - setExternalColumnName(externalColumnName); - - BufferedReader br = null; - try - { - br = new BufferedReader(new FileReader(filename)); - final JoinTableParser parser = new JoinTableParser(); - - //read in the header - columnNames = parser.readHeader(br); - - //get the index of the localJoinColumnName - int localColumnNameIdx = -1; - for(int i = 0; i < columnNames.size(); i++) { - final String columnName = columnNames.get(i); - if(columnName.equals(localColumnName)) { - localColumnNameIdx = i; - break; - } - } - - if(localColumnNameIdx == -1) { - throw new UserException.BadArgumentValue("-J", "The -J arg specifies an unknown column name: \"" + localColumnName + "\". It's not one of the column names in the header " + columnNames + " of the file: " + filename); - } - - //read in all records and create a map entry for each - String line; - while((line = br.readLine()) != null) { - final ArrayList columnValues = parser.parseLine(line); - if ( columnValues.size() < columnNames.size() ) - throw new UserException.BadInput("the file: " + filename + " is malformed as there are not a sufficient number of columns for this line: " + line); - final String joinColumnValue = columnValues.get(localColumnNameIdx); - put(joinColumnValue, columnValues, filename); - } - } - catch(IOException e) - { - throw new UserException.CouldNotReadInputFile(new File(filename), "Unable to parse file", e); - } - finally - { - try { - if(br != null) { - br.close(); - } - } catch(IOException e) { - throw new ReviewedStingException("Unable to close file: " + filename, e); - } - } - } - - /** - * If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2, - * this returns bindingName1. - * @return local binding name - */ - public String getLocalBindingName() { - return localBindingName; - } - - public void setLocalBindingName(String localBindingName) { - this.localBindingName = localBindingName; - } - - /** - * @return the list of join table column names parsed out of the file header. - */ - public List getColumnNames() { - return columnNames; //not fully-qualified - } - - protected void setColumnNames(List columnNames) { - this.columnNames = columnNames; - } - - /** - * If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2, - * this returns columnName2. - * @return external column name - */ - public String getExternalColumnName() { - return externalColumnName; - } - - protected void setExternalColumnName( - String externalColumnName) { - this.externalColumnName = externalColumnName; - } - - /** - * If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2, - * this returns bindingName2. - * @return external binding name - */ - public String getExternalBindingName() { - return externalBindingName; - } - - protected void setExternalBindingName( - String externalBindingName) { - this.externalBindingName = externalBindingName; - } - - /** - * Whether any join table records have the given value in the join column. - * @param joinColumnValue value - * @return true if the given name value exists in the file - */ - public boolean containsJoinColumnValue(String joinColumnValue) { - return joinColumnValueToRecords.containsKey(joinColumnValue); - } - - /** - * Returns all records in the table where the join column has the given value. - * @param joinColumnValue column value - * @return row - */ - public ArrayList get(String joinColumnValue) { - return joinColumnValueToRecords.get(joinColumnValue); - } - - /** - * Adds the given record to the map. - * @param joinColumnValue value - * @param record row - * @param filename the source file name - */ - protected void put(String joinColumnValue, ArrayList record, String filename) { - if ( joinColumnValueToRecords.containsKey(joinColumnValue) ) - throw new UserException.BadInput("the file " + filename + " contains non-unique entries for the requested column, which isn't allowed."); - joinColumnValueToRecords.put(joinColumnValue, record); - if ( joinColumnValueToRecords.size() > maxSize ) - throw new UserException.BadInput("the file " + filename + " contains more than the maximum number (" + maxSize + ") of allowed rows (see the --maxJoinTableSize argument)."); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java deleted file mode 100755 index 3b6c87f90..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; - -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * Used to parse files passed to the GenomicAnnotator via the -J arg. - * The files must be tab-delimited, and the first non-empty/non-commented line - * must be a header containing column names. - * - * More information can be found here: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator - */ -public class JoinTableParser -{ - public static final String DELIMITER = "\t"; - - private List header; //column names parsed out of the header line - - - /** - * Constructor. - */ - public JoinTableParser() {} - - /** - * Returns the header and returns it. - * @param br source - * @return column names - * @throws IOException on read - */ - public List readHeader(BufferedReader br) throws IOException - { - if(header != null) { - throw new ReviewedStingException("readHeader(..) called more than once. Header is currently set to: " + header); - } - - header = Collections.unmodifiableList(parseHeader(br)); - - return header; - } - - - /** - * @return A list containing the column names. - */ - public List getHeader() { - return header; - } - - - /** - * Parses the line into an ArrayList containing the values for each column. - * - * @param line to parse - * @return tokens - */ - public ArrayList parseLine(String line) { - - final ArrayList values = Utils.split(line, DELIMITER, header.size()); - - if ( values.size() != header.size() ) { - throw new UserException.MalformedFile(String.format("Encountered a row with %d columns which is different from the number or columns in the header: %d\nHeader: " + header + "\nLine: " + values, values.size(), header.size())); - } - - return values; - } - - - /** - * Returns the header. - * @param br The file to read. - * @return ArrayList containing column names from the header. - * @throws IOException on reading - */ - public static ArrayList parseHeader(final BufferedReader br) throws IOException - { - ArrayList header = null; - - //find the 1st line that's non-empty and not a comment - String line; - while( (line = br.readLine()) != null ) { - line = line.trim(); - if ( line.isEmpty() || line.startsWith("#") ) { - continue; - } - - //parse the header - header = Utils.split(line, DELIMITER); - break; - } - - // check that header was found - if ( header == null ) { - throw new IllegalArgumentException("No header in " + br + ". All lines are either comments or empty."); - } - - return header; - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java deleted file mode 100755 index 0bbfa51b4..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java +++ /dev/null @@ -1,1032 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; -import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; -import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.IOException; -import java.io.PrintStream; -import java.util.*; - -/** - * Takes a table of transcripts (eg. UCSC refGene, knownGene, and CCDS tables) and generates the big table which contains - * annotations for each possible variant at each transcript position (eg. 4 variants at each genomic position). - * - * Required args: - * -B - specifies the input file (ex. -B transcripts,AnnotatorInputTable,/path/to/transcript_table_file.txt) - * -n - Specifies which column(s) from the transcript table contain the gene name(s). (ex. -n name,name2 (for the UCSC refGene table)) - * WARNING: The gene names for each record, when taken together, should provide a unique id for that record relative to all other records in the file. - * - * - * The map & reduce types are both TreeMap. - * Each TreeMap entry represents one line in the output file. The TreeMap key is a combination of a given output line's position (so that this key can be used to sort all output lines - * by reference order), as well as allele and gene names (so that its unique across all output lines). The String value is the output line itself. - */ -@Reference(window=@Window(start=-4,stop=4)) -@By(DataSource.REFERENCE) -@Requires(value={DataSource.REFERENCE}, referenceMetaData={ @RMD(name=TranscriptToGenomicInfo.ROD_NAME,type=AnnotatorInputTableFeature.class) } ) -public class TranscriptToGenomicInfo extends RodWalker { - public static final String ROD_NAME = "transcripts"; - - //@Argument(fullName="pass-through", shortName="t", doc="Optionally specifies which columns from the transcript table should be copied verbatim (aka. passed-through) to the records in the output table. For example, -B transcripts,AnnotatorInputTable,/data/refGene.txt -t id will cause the refGene id column to be copied to the output table.", required=false) - //protected String[] PASS_THROUGH_COLUMNS = {}; - - @Output - private PrintStream out; - - @Argument(fullName="unique-gene-name-columns", shortName="n", doc="Specifies which column(s) from the transcript table contains the gene name(s). For example, -B transcripts,AnnotatorInputTable,/data/refGene.txt -n name,name2 specifies that the name and name2 columns are gene names. WARNING: the gene names for each record, when taken together, should provide a unique id for that record relative to all other records in the file. If this is not the case, an error will be thrown. ", required=true) - private String[] GENE_NAME_COLUMNS = {}; - - private final char[] ALLELES = {'A','C','G','T'}; - - /** Output columns */ - private static final String[] GENOMIC_ANNOTATION_COLUMNS = { - GenomicAnnotation.CHR_COLUMN, - GenomicAnnotation.START_COLUMN, - GenomicAnnotation.END_COLUMN, - GenomicAnnotation.HAPLOTYPE_REFERENCE_COLUMN, - GenomicAnnotation.HAPLOTYPE_ALTERNATE_COLUMN}; - - private static final String OUTPUT_TRANSCRIPT_STRAND = "transcriptStrand"; //rg. +/- - private static final String OUTPUT_IN_CODING_REGION = "inCodingRegion"; //eg. true - private static final String OUTPUT_FRAME = "frame"; //eg. 0,1,2 - private static final String OUTPUT_POSITION_TYPE = "positionType"; //eg. utr5, cds, utr3, intron, intergenic - private static final String OUTPUT_MRNA_COORD = "mrnaCoord"; //1-based offset within the transcript - private static final String OUTPUT_SPLICE_DISTANCE = "spliceDist"; //eg. integer, bp to nearest exon/intron boundary - private static final String OUTPUT_CODON_NUMBER = "codonCoord"; //eg. 20 - private static final String OUTPUT_REFERENCE_CODON = "referenceCodon"; - private static final String OUTPUT_REFERENCE_AA = "referenceAA"; - private static final String OUTPUT_VARIANT_CODON = "variantCodon"; - private static final String OUTPUT_VARIANT_AA = "variantAA"; - private static final String OUTPUT_CHANGES_AMINO_ACID = "changesAA"; //eg. true - private static final String OUTPUT_FUNCTIONAL_CLASS = "functionalClass"; //eg. missense - private static final String OUTPUT_CODING_COORD_STR = "codingCoordStr"; - private static final String OUTPUT_PROTEIN_COORD_STR = "proteinCoordStr"; - private static final String OUTPUT_SPLICE_INFO = "spliceInfo"; //(eg "splice-donor -4", or "splice-acceptor 3") for the 10bp surrounding each exon/intron boundary - private static final String OUTPUT_UORF_CHANGE = "uorfChange"; // (eg +1 or -1, indicating the addition or interruption of an ATG trinucleotide in the annotated utr5) - private static final String[] TRANSCRIPT_COLUMNS = { - OUTPUT_TRANSCRIPT_STRAND, - OUTPUT_POSITION_TYPE, - OUTPUT_FRAME, - OUTPUT_MRNA_COORD, - OUTPUT_CODON_NUMBER, - OUTPUT_SPLICE_DISTANCE, - OUTPUT_REFERENCE_CODON, - OUTPUT_REFERENCE_AA, - OUTPUT_VARIANT_CODON, - OUTPUT_VARIANT_AA, - OUTPUT_CHANGES_AMINO_ACID, - OUTPUT_FUNCTIONAL_CLASS, - OUTPUT_CODING_COORD_STR, - OUTPUT_PROTEIN_COORD_STR, - OUTPUT_IN_CODING_REGION, - OUTPUT_SPLICE_INFO, - OUTPUT_UORF_CHANGE }; - - //This list specifies the order of output columns in the big table. - private final List outputColumnNames = new LinkedList(); - - private int transcriptsProcessedCounter = 0; - - private long transcriptsThatDontStartWithMethionineOrEndWithStopCodonCounter = 0; - private long transcriptsThatDontStartWithMethionineCounter = 0; - private long transcriptsThatDontEndWithStopCodonCounter = 0; - private long skippedTranscriptCounter = 0; - - private long skippedPositionsCounter = 0; - private long totalPositionsCounter = 0; - - /** Possible values for the "POSITION_TYPE" output column. */ - private enum PositionType { - intergenic, intron, utr5, CDS, utr3, non_coding_exon, non_coding_intron - } - - /** - * Store rods until we hit their ends so that we don't have to recompute - * basic information every time we see them in map(). - */ - private Map storedTranscriptInfo = new HashMap(); - - /** - * Prepare the output file and the list of available features. - */ - public void initialize() { - - //parse the GENE_NAME_COLUMNS arg and validate the column names - final List parsedGeneNameColumns = new LinkedList(); - for(String token : GENE_NAME_COLUMNS) { - parsedGeneNameColumns.addAll(Arrays.asList(token.split(","))); - } - GENE_NAME_COLUMNS = parsedGeneNameColumns.toArray(GENE_NAME_COLUMNS); - - ReferenceOrderedDataSource transcriptsDataSource = null; - for(ReferenceOrderedDataSource ds : getToolkit().getRodDataSources()) { - if(ds.getName().equals(ROD_NAME)) { - transcriptsDataSource = ds; - break; - } - } - - // sanity check - if ( transcriptsDataSource == null ) - throw new IllegalStateException("No rod bound to " + ROD_NAME + " found in rod sources"); - - final ArrayList header; - try { - header = AnnotatorInputTableCodec.readHeader(transcriptsDataSource.getFile()); - } catch(Exception e) { - throw new UserException.MalformedFile(transcriptsDataSource.getFile(), "Failed when attempting to read header from file", e); - } - - for ( String columnName : GENE_NAME_COLUMNS ) { - if ( !header.contains(columnName) ) - throw new UserException.CommandLineException("The column name '" + columnName + "' provided to -n doesn't match any of the column names in: " + transcriptsDataSource.getFile()); - } - - //init outputColumnNames list - outputColumnNames.addAll(Arrays.asList(GENOMIC_ANNOTATION_COLUMNS)); - outputColumnNames.addAll(Arrays.asList(GENE_NAME_COLUMNS)); - outputColumnNames.addAll(Arrays.asList(TRANSCRIPT_COLUMNS)); - - //init OUTPUT_HEADER_LINE - StringBuilder outputHeaderLine = new StringBuilder(); - for( final String column : outputColumnNames ) { - if(outputHeaderLine.length() != 0) { - outputHeaderLine.append( AnnotatorInputTableCodec.DELIMITER ); - } - outputHeaderLine.append(column); - } - - out.println(outputHeaderLine.toString()); - } - - public Integer reduceInit() { return 0; } - - /** - * For each site of interest, generate the appropriate fields. - * - * @param tracker the meta-data tracker - * @param ref the reference base - * @param context the context for the given locus - * @return 1 if the locus was successfully processed, 0 if otherwise - */ - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return 0; - - final Collection rods = tracker.getBoundRodTracks(); - //if there's nothing overlapping this locus, skip it. - if ( rods.size() == 0 ) - return 0; - - final List transcriptRODs = tracker.getReferenceMetaData(ROD_NAME); - - //there may be multiple transcriptRODs that overlap this locus - for ( Object transcriptRodObject : transcriptRODs ) { - //parse this ROD if it hasn't been already. - final AnnotatorInputTableFeature transcriptRod = (AnnotatorInputTableFeature) transcriptRodObject; - String featureKey = transcriptRod.toString(); - - TranscriptTableRecord parsedTranscriptRod = storedTranscriptInfo.get(featureKey); - if ( parsedTranscriptRod == null ) { - parsedTranscriptRod = new TranscriptTableRecord(transcriptRod, GENE_NAME_COLUMNS); - storedTranscriptInfo.put(featureKey, parsedTranscriptRod); - } - - //populate parsedTranscriptRod.txSequence - if(parsedTranscriptRod.positiveStrand) { - parsedTranscriptRod.txSequence.append((char)ref.getBase()); - } else { - final char complementBase = (char)BaseUtils.simpleComplement(ref.getBase()); - parsedTranscriptRod.txSequence.insert(0, complementBase); - } - - //populate parsedTranscriptRod.utr5Sequence and parsedTranscriptRod.cdsSequence - final int position = (int) ref.getLocus().getStart(); - if(parsedTranscriptRod.isProteinCodingTranscript() && parsedTranscriptRod.isWithinExon(position) ) - { - //we're within an exon of a proteinCodingTranscript - - if(parsedTranscriptRod.positiveStrand) - { - if(position < parsedTranscriptRod.cdsStart) - { - parsedTranscriptRod.utr5Sequence.append((char)ref.getBase()); //within utr5 - } - else if(position >= parsedTranscriptRod.cdsStart && position <= parsedTranscriptRod.cdsEnd) - { - parsedTranscriptRod.cdsSequence.append((char)ref.getBase()); //within CDS - } - } - else - { - final char complementBase = (char)BaseUtils.simpleComplement(ref.getBase()); - if(position > parsedTranscriptRod.cdsEnd) - { - //As we move left to right (aka. 3' to 5'), we do insert(0,..) to reverse the sequence so that it become 5' to 3' in parsedTranscriptRod.utr5Sequence. - parsedTranscriptRod.utr5Sequence.insert(0,complementBase); //within utr5. - } - else if(position >= parsedTranscriptRod.cdsStart && position <= parsedTranscriptRod.cdsEnd) - { - parsedTranscriptRod.cdsSequence.insert(0,complementBase); //within CDS - } - } - } - - if ( position == parsedTranscriptRod.txEnd ) { - //we've reached the end of the transcript - compute all data and write it out. - try { - generateOutputRecordsForROD(parsedTranscriptRod); - } - catch(IOException e) { - throw new RuntimeException(Thread.currentThread().getName() + " - Unexpected error occurred at position: [" + parsedTranscriptRod.txChrom + ":" + position + "] in transcript: " + parsedTranscriptRod, e); - } - - // remove it from the cache - storedTranscriptInfo.remove(featureKey); - - transcriptsProcessedCounter++; - if ( transcriptsProcessedCounter % 100 == 0 ) - logger.info(new Date() + ": " + transcriptsProcessedCounter + " transcripts processed"); - } - } - - return 1; - } - - private static boolean isChrM(final TranscriptTableRecord record) { - return record.txChrom.equals("chrM") || record.txChrom.equals("MT")|| record.txChrom.equals("CRS"); - } - - private void generateOutputRecordsForROD(TranscriptTableRecord parsedTranscriptRod) throws IOException { - //Transcripts that don't produce proteins are indicated in transcript by cdsStart == cdsEnd - //These will be handled by generating only one record, with haplotypeAlternate == "*". - final boolean isProteinCodingTranscript = parsedTranscriptRod.isProteinCodingTranscript(); - final boolean isMitochondrialTranscript = isChrM(parsedTranscriptRod); - - final boolean positiveStrand = parsedTranscriptRod.positiveStrand; //alias - - - if(isProteinCodingTranscript && parsedTranscriptRod.cdsSequence.length() % 3 != 0) { - if (!isMitochondrialTranscript) { - logger.error("ERROR: Transcript " + parsedTranscriptRod +" at position ["+ parsedTranscriptRod.txChrom + ":" +parsedTranscriptRod.txStart + "-" + parsedTranscriptRod.txEnd + "] has " + parsedTranscriptRod.cdsSequence.length() + " nucleotides in its CDS region, which is not divisible by 3. Skipping..."); - //discard transcripts where CDS length is not a multiple of 3 - skippedTranscriptCounter++; - return; - } else { - - //In mitochondrial genes, the polyA tail may complete the stop codon, allowing transcript . To check for this special case: - //1. check that the CDS covers the entire transcript - //2. add 1 or 2 A's to the 3' end of the transcript (as needed to make it divisible by 3) - //3. check whether the last 3 letters now form a stop codon using the mitochondrial AA table - //4. If not, skip this gene, else incorporate the A's and process it like any other gene. - - if( parsedTranscriptRod.txSequence.length() == parsedTranscriptRod.cdsSequence.length()) { - do { //append A's until sequence length is divisible by 3 - parsedTranscriptRod.txSequence.append('*'); - parsedTranscriptRod.cdsSequence.append('a'); - if(positiveStrand) { - parsedTranscriptRod.txEnd++; - parsedTranscriptRod.cdsEnd++; - parsedTranscriptRod.exonEnds[0]++; - } else { - parsedTranscriptRod.txStart--; - parsedTranscriptRod.cdsStart--; - parsedTranscriptRod.exonStarts[0]--; - } - } while( parsedTranscriptRod.cdsSequence.length() % 3 != 0); - - } else { - logger.error("ERROR: Mitochnodrial transcript " + parsedTranscriptRod +" at position ["+ parsedTranscriptRod.txChrom + ":" +parsedTranscriptRod.txStart + "-" + parsedTranscriptRod.txEnd + "] has " + parsedTranscriptRod.cdsSequence.length() + " nucleotides in its CDS region, which is not divisible by 3. The CDS does not cover the entire transcript, so its not possible to use A's from the polyA tail. Skipping..."); - skippedTranscriptCounter++; - return; - } - } - } - - - //warn if the first codon isn't Methionine and/or the last codon isn't a stop codon. - if(isProteinCodingTranscript) { - final int cdsSequenceLength = parsedTranscriptRod.cdsSequence.length(); - - final String firstCodon = parsedTranscriptRod.cdsSequence.substring(0, 3); - final AminoAcid firstAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA( firstCodon, true ) : AminoAcidTable.getEukaryoticAA( firstCodon ) ; - - final String lastCodon = parsedTranscriptRod.cdsSequence.substring(cdsSequenceLength - 3, cdsSequenceLength); - final AminoAcid lastAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA( lastCodon, false ) : AminoAcidTable.getEukaryoticAA( lastCodon ) ; - - if( firstAA != AminoAcidTable.METHIONINE && !lastAA.isStop()) { - transcriptsThatDontStartWithMethionineOrEndWithStopCodonCounter++; - logger.warn("WARNING: The CDS of transcript " + parsedTranscriptRod.geneNames[0] +" at position ["+ parsedTranscriptRod.txChrom + ":" +parsedTranscriptRod.txStart + "-" + parsedTranscriptRod.txEnd + "] does not start with Methionine or end in a stop codon. The first codon is: " + firstCodon + " (" + firstAA + "). The last codon is: " + lastCodon + " (" + lastAA + "). NOTE: This is just a warning - the transcript will be included in the output."); - } else if( firstAA != AminoAcidTable.METHIONINE) { - transcriptsThatDontStartWithMethionineCounter++; - logger.warn("WARNING: The CDS of transcript " + parsedTranscriptRod.geneNames[0] +" at position ["+ parsedTranscriptRod.txChrom + ":" +parsedTranscriptRod.txStart + "-" + parsedTranscriptRod.txEnd + "] does not start with Methionine. The first codon is: " + firstCodon + " (" + firstAA + "). NOTE: This is just a warning - the transcript will be included in the output."); - } else if(!lastAA.isStop()) { - transcriptsThatDontEndWithStopCodonCounter++; - logger.warn("WARNING: The CDS of transcript " + parsedTranscriptRod.geneNames[0] +" at position ["+ parsedTranscriptRod.txChrom + ":" +parsedTranscriptRod.txStart + "-" + parsedTranscriptRod.txEnd + "] does not end in a stop codon. The last codon is: " + lastCodon + " (" + lastAA + "). NOTE: This is just a warning - the transcript will be included in the output."); - } - } - - final int txStart_5prime = positiveStrand ? parsedTranscriptRod.txStart : parsedTranscriptRod.txEnd; //1-based, inclusive - final int txEnd_3prime = positiveStrand ? parsedTranscriptRod.txEnd : parsedTranscriptRod.txStart; //1-based, inclusive - final int increment_5to3 = positiveStrand ? 1 : -1; //whether to increment or decrement - final int strandSign = increment_5to3; //alias - - final int cdsStart_5prime = positiveStrand ? parsedTranscriptRod.cdsStart : parsedTranscriptRod.cdsEnd; //1-based, inclusive - final int cdsEnd_3prime = positiveStrand ? parsedTranscriptRod.cdsEnd : parsedTranscriptRod.cdsStart ; //1-based, inclusive - - int frame = 0; //the frame of the current position - int txOffset_from5 = 1; //goes from txStart 5' to txEnd 3' for both + and - strand - int utr5Count_from5 = 0; - int mrnaCoord_from5 = 1; //goes from txStart 5' to txEnd 3' for both + and - strand, but only counts bases within exons. - char[] utr5NucBuffer_5to3 = null; //used to find uORFs - size = 5 because to hold the 3 codons that overlap any given position: [-2,-1,0], [-1,0,1], and [0,1,2] - - int codonCount_from5 = 1; //goes from cdsStart 5' to cdsEnd 3' for both + and - strand - counts the number of codons - 1-based - int codingCoord_from5 = isProteinCodingTranscript ? parsedTranscriptRod.computeInitialCodingCoord() : -1; //goes from cdsStart 5' to cdsEnd 3' for both + and - strand - boolean codingCoordResetForCDS = false; - boolean codingCoordResetForUtr3 = false; - final char[] currentCodon_5to3 = isProteinCodingTranscript ? new char[3] : null; //holds the current RNA codon - 5' to 3' - - PositionType positionType = null; - boolean isWithinIntronAndFarFromSpliceJunction = false; - int intronStart_5prime = -1; - int intronEnd_5prime; - - final Map outputLineFields = new HashMap(); - - for(int txCoord_5to3 = txStart_5prime; txCoord_5to3 != txEnd_3prime + increment_5to3; txCoord_5to3 += increment_5to3) - { - ++totalPositionsCounter; - - //compute certain attributes of the current position - final boolean isWithinExon = parsedTranscriptRod.isWithinExon(txCoord_5to3); //TODO if necessary, this can be sped up by keeping track of current exon/intron - - final int distanceToNearestSpliceSite = parsedTranscriptRod.computeDistanceToNearestSpliceSite(txCoord_5to3); - final boolean isWithin10bpOfSpliceJunction = Math.abs(distanceToNearestSpliceSite) <= 10; - - - //increment coding coord is necessary - if(isWithinExon) { - codingCoord_from5++; - } - - //figure out the current positionType - final PositionType prevPositionType = positionType; //save the position before it is updated - if(isProteinCodingTranscript) - { - if(isWithinExon) - { - if( strandSign*(txCoord_5to3 - cdsStart_5prime) < 0 ) { //utr5 (multiplying by strandSign is like doing absolute value.) - positionType = PositionType.utr5; - } else if( strandSign*(txCoord_5to3 - cdsEnd_3prime) > 0 ) { //utr3 (multiplying by strandSign is like doing absolute value.) - positionType = PositionType.utr3; - } else { - positionType = PositionType.CDS; - } - } else { - positionType = PositionType.intron; - } - } else { - if(isWithinExon) { - positionType = PositionType.non_coding_exon; - } else { - positionType = PositionType.non_coding_intron; - } - } - - //handle transitions - if(positionType == PositionType.CDS && prevPositionType != PositionType.CDS && !codingCoordResetForCDS) { - //transitioning from utr5 to CDS, reset the coding coord from -1 to 1. - codingCoord_from5 = 1; - codingCoordResetForCDS = true; - } else if(positionType == PositionType.utr3 && prevPositionType != PositionType.utr3 && !codingCoordResetForUtr3) { - //transitioning from CDS to utr3, reset the coding coord to 1. - codingCoord_from5 = 1; - codingCoordResetForUtr3 = true; - } - - - try - { - //handle introns - boolean wasWithinIntronAndFarFromSpliceJunction = isWithinIntronAndFarFromSpliceJunction; - isWithinIntronAndFarFromSpliceJunction = !isWithinExon && !isWithin10bpOfSpliceJunction; - - if(!wasWithinIntronAndFarFromSpliceJunction && isWithinIntronAndFarFromSpliceJunction) { - //save intron start - intronStart_5prime = txCoord_5to3; - - } else if(wasWithinIntronAndFarFromSpliceJunction && !isWithinIntronAndFarFromSpliceJunction) { - //output intron record - intronEnd_5prime = txCoord_5to3 - increment_5to3; - - final int intronStart = (intronStart_5prime < intronEnd_5prime ? intronStart_5prime : intronEnd_5prime) ; - final int intronEnd = (intronEnd_5prime > intronStart_5prime ? intronEnd_5prime : intronStart_5prime); - outputLineFields.clear(); - outputLineFields.put(GenomicAnnotation.CHR_COLUMN, parsedTranscriptRod.txChrom); - outputLineFields.put(GenomicAnnotation.START_COLUMN, String.valueOf(intronStart)); - outputLineFields.put(GenomicAnnotation.END_COLUMN, String.valueOf(intronEnd)); - outputLineFields.put(GenomicAnnotation.HAPLOTYPE_REFERENCE_COLUMN, Character.toString( '*' ) ); - outputLineFields.put(GenomicAnnotation.HAPLOTYPE_REFERENCE_COLUMN, Character.toString( '*' ) ); - for(int i = 0; i < GENE_NAME_COLUMNS.length; i++) { - outputLineFields.put(GENE_NAME_COLUMNS[i], parsedTranscriptRod.geneNames[i] ); - } - - outputLineFields.put(OUTPUT_POSITION_TYPE, positionType.toString() ); - outputLineFields.put(OUTPUT_TRANSCRIPT_STRAND, positiveStrand ? "+" : "-" ); - - if ( isProteinCodingTranscript ) - outputLineFields.put(OUTPUT_IN_CODING_REGION, Boolean.toString(positionType == PositionType.CDS) ); - - addThisLineToResult(outputLineFields); - } - - //when in utr5, compute the utr5NucBuffer_5to3 which is later used to compute the OUTPUT_UORF_CHANGE field - if(positionType == PositionType.utr5) - { - if(utr5Count_from5 < parsedTranscriptRod.utr5Sequence.length()) - { - if(utr5NucBuffer_5to3 == null) { - //initialize - utr5NucBuffer_5to3 = new char[5]; - utr5NucBuffer_5to3[3] = parsedTranscriptRod.utr5Sequence.charAt( utr5Count_from5 ); - - if(utr5Count_from5 + 1 < parsedTranscriptRod.utr5Sequence.length() ) { - utr5NucBuffer_5to3[4] = parsedTranscriptRod.utr5Sequence.charAt( utr5Count_from5 + 1 ); - } - } - - //as we move 5' to 3', shift nucleotides down to the 5' end, making room for the new 3' nucleotide: - utr5NucBuffer_5to3[0] = utr5NucBuffer_5to3[1]; - utr5NucBuffer_5to3[1] = utr5NucBuffer_5to3[2]; - utr5NucBuffer_5to3[2] = utr5NucBuffer_5to3[3]; - utr5NucBuffer_5to3[3] = utr5NucBuffer_5to3[4]; - - char nextRefBase = 0; - if( utr5Count_from5 + 2 < parsedTranscriptRod.utr5Sequence.length() ) - { - nextRefBase = parsedTranscriptRod.utr5Sequence.charAt( utr5Count_from5 + 2 ); - } - utr5NucBuffer_5to3[4] = nextRefBase; - - //check for bad bases - if( (utr5NucBuffer_5to3[0] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[0])) || - (utr5NucBuffer_5to3[1] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[1])) || - (utr5NucBuffer_5to3[2] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[2])) || - (utr5NucBuffer_5to3[3] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[3])) || - (utr5NucBuffer_5to3[4] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[4]))) - { - logger.debug("Skipping current position [" + parsedTranscriptRod.txChrom + ":" +txCoord_5to3 + "] in transcript " + parsedTranscriptRod.geneNames.toString() +". utr5NucBuffer_5to3 contains irregular base:" + utr5NucBuffer_5to3[0] + utr5NucBuffer_5to3[1] + utr5NucBuffer_5to3[2] + utr5NucBuffer_5to3[3] + utr5NucBuffer_5to3[4]);// +". Transcript is: " + parsedTranscriptRod); - ++skippedPositionsCounter; - continue; - } - - } else { // if(utr5Count_from5 >= parsedTranscriptRod.utr5Sequence.length()) - //defensive programming - throw new RuntimeException("Exception: Skipping current position [" + parsedTranscriptRod.txChrom + ":" +txCoord_5to3 + "] in transcript " + parsedTranscriptRod.geneNames.toString() +". utr5Count_from5 is now " + utr5Count_from5 + ", while parsedTranscriptRod.utr5Sequence.length() == " + parsedTranscriptRod.utr5Sequence.length() + ". This means parsedTranscriptRod.utr5Sequence isn't as long as it should be. This is a bug in handling this record: " + parsedTranscriptRod); - - } - } - - - //when in CDS, compute current codon - if(positionType == PositionType.CDS) - { - if(frame == 0) - { - currentCodon_5to3[0] = parsedTranscriptRod.cdsSequence.charAt( codingCoord_from5 - 1 ); //subtract 1 to go to zero-based coords - currentCodon_5to3[1] = parsedTranscriptRod.cdsSequence.charAt( codingCoord_from5 ); - currentCodon_5to3[2] = parsedTranscriptRod.cdsSequence.charAt( codingCoord_from5 + 1); - } - - //check for bad bases - if(!BaseUtils.isRegularBase(currentCodon_5to3[0]) || !BaseUtils.isRegularBase(currentCodon_5to3[1]) || !BaseUtils.isRegularBase(currentCodon_5to3[2])) { - logger.debug("Skipping current position [" + parsedTranscriptRod.txChrom + ":" +txCoord_5to3 + "] in transcript " + parsedTranscriptRod.geneNames.toString() +". CDS codon contains irregular base:" + currentCodon_5to3[0] + currentCodon_5to3[1] + currentCodon_5to3[2]);// +". Transcript is: " + parsedTranscriptRod); - ++skippedPositionsCounter; - continue; - } - - } - - char haplotypeReference = parsedTranscriptRod.txSequence.charAt( txOffset_from5 - 1 ); - if(!positiveStrand) { - haplotypeReference = BaseUtils.simpleComplement(haplotypeReference); //txSequence contents depend on whether its +/- strand - } - char haplotypeReferenceStrandSpecific= positiveStrand ? haplotypeReference : BaseUtils.simpleComplement(haplotypeReference); - - - - if(!BaseUtils.isRegularBase(haplotypeReference) && haplotypeReference != '*') { //* is special case for mitochondrial genes where polyA tail completes the last codon - //check for bad bases - logger.debug("Skipping current position [" + parsedTranscriptRod.txChrom + ":" +txCoord_5to3 + "] in transcript " + parsedTranscriptRod.geneNames.toString() + ". The reference contains an irregular base:" + haplotypeReference); // +". Transcript is: " + parsedTranscriptRod); - ++skippedPositionsCounter; - continue; - } - - - char haplotypeAlternateStrandSpecific; - for(char haplotypeAlternate : ALLELES ) - { - haplotypeAlternateStrandSpecific= positiveStrand ? haplotypeAlternate : BaseUtils.simpleComplement(haplotypeAlternate); - outputLineFields.clear(); - - if(!isProteinCodingTranscript || isWithinIntronAndFarFromSpliceJunction) { - haplotypeReference = '*'; - haplotypeAlternate = '*'; - } - - //compute simple OUTPUT fields. - outputLineFields.put(GenomicAnnotation.CHR_COLUMN, parsedTranscriptRod.txChrom); - outputLineFields.put(GenomicAnnotation.START_COLUMN, String.valueOf(txCoord_5to3)); - outputLineFields.put(GenomicAnnotation.END_COLUMN, String.valueOf(txCoord_5to3)); - outputLineFields.put(GenomicAnnotation.HAPLOTYPE_REFERENCE_COLUMN, Character.toString( haplotypeReference ) ); - outputLineFields.put(GenomicAnnotation.HAPLOTYPE_ALTERNATE_COLUMN, Character.toString( haplotypeAlternate ) ); - for(int i = 0; i < GENE_NAME_COLUMNS.length; i++) { - outputLineFields.put(GENE_NAME_COLUMNS[i], parsedTranscriptRod.geneNames[i] ); - } - - outputLineFields.put(OUTPUT_POSITION_TYPE, positionType.toString() ); - outputLineFields.put(OUTPUT_TRANSCRIPT_STRAND, positiveStrand ? "+" : "-" ); - if(isWithinExon) { - outputLineFields.put(OUTPUT_MRNA_COORD, Integer.toString(mrnaCoord_from5) ); - } - outputLineFields.put(OUTPUT_SPLICE_DISTANCE, Integer.toString(distanceToNearestSpliceSite) ); - - //compute OUTPUT_SPLICE_INFO - final String spliceInfoString; - if(isWithin10bpOfSpliceJunction) { - if(distanceToNearestSpliceSite < 0) { - //is on the 5' side of the splice junction - if(isWithinExon) { - spliceInfoString = "splice-donor_" + distanceToNearestSpliceSite; - } else { - spliceInfoString = "splice-acceptor_" + distanceToNearestSpliceSite; - } - } else { - if(isWithinExon) { - spliceInfoString = "splice-acceptor_" + distanceToNearestSpliceSite; - } else { - spliceInfoString = "splice-donor_" + distanceToNearestSpliceSite; - } - } - outputLineFields.put(OUTPUT_SPLICE_INFO, spliceInfoString); - } - - //compute OUTPUT_IN_CODING_REGION - if(isProteinCodingTranscript) - { - outputLineFields.put(OUTPUT_IN_CODING_REGION, Boolean.toString(positionType == PositionType.CDS) ); - } - - - //compute OUTPUT_UORF_CHANGE - if(positionType == PositionType.utr5) - { - String refCodon1 = (Character.toString(utr5NucBuffer_5to3[0]) + Character.toString(utr5NucBuffer_5to3[1]) + utr5NucBuffer_5to3[2]).toUpperCase(); - String refCodon2 = (Character.toString(utr5NucBuffer_5to3[1]) + Character.toString(utr5NucBuffer_5to3[2]) + utr5NucBuffer_5to3[3]).toUpperCase(); - String refCodon3 = (Character.toString(utr5NucBuffer_5to3[2]) + Character.toString(utr5NucBuffer_5to3[3]) + utr5NucBuffer_5to3[4]).toUpperCase(); - - String varCodon1 = (Character.toString(utr5NucBuffer_5to3[0]) + Character.toString(utr5NucBuffer_5to3[1]) + haplotypeAlternateStrandSpecific).toUpperCase(); - String varCodon2 = (Character.toString(utr5NucBuffer_5to3[1]) + Character.toString(haplotypeAlternateStrandSpecific) + utr5NucBuffer_5to3[3]).toUpperCase(); - String varCodon3 = (Character.toString(haplotypeAlternateStrandSpecific) + Character.toString(utr5NucBuffer_5to3[3]) + utr5NucBuffer_5to3[4]).toUpperCase(); - - //check for +1 (eg. addition of new ATG uORF) and -1 (eg. disruption of existing ATG uORF) - String uORFChangeStr = null; - if( (refCodon1.equals("ATG") && !varCodon1.equals("ATG")) || - (refCodon2.equals("ATG") && !varCodon2.equals("ATG")) || - (refCodon3.equals("ATG") && !varCodon3.equals("ATG"))) - { - uORFChangeStr = "-1"; - } - else if((varCodon1.equals("ATG") && !refCodon1.equals("ATG")) || - (varCodon2.equals("ATG") && !refCodon2.equals("ATG")) || - (varCodon3.equals("ATG") && !refCodon3.equals("ATG"))) - { - uORFChangeStr = "+1"; - } - - outputLineFields.put(OUTPUT_UORF_CHANGE, uORFChangeStr ); - } - //compute CDS-specific fields - else if (positionType == PositionType.CDS) { - final String referenceCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2]; - final char temp = currentCodon_5to3[frame]; - currentCodon_5to3[frame] = haplotypeAlternateStrandSpecific; - final String variantCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2]; - currentCodon_5to3[frame] = temp; - - final AminoAcid refAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA(referenceCodon, codonCount_from5 == 1) : AminoAcidTable.getEukaryoticAA( referenceCodon ) ; - final AminoAcid variantAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA(variantCodon, codonCount_from5 == 1) : AminoAcidTable.getEukaryoticAA( variantCodon ) ; - - if (refAA.isUnknown() || variantAA.isUnknown()) { - logger.warn("Illegal amino acid detected: refCodon=" + referenceCodon + " altCodon=" + variantCodon); - } - outputLineFields.put(OUTPUT_TRANSCRIPT_STRAND, positiveStrand ? "+" : "-" ); - outputLineFields.put(OUTPUT_FRAME, Integer.toString(frame)); - outputLineFields.put(OUTPUT_CODON_NUMBER, Integer.toString(codonCount_from5)); - outputLineFields.put(OUTPUT_REFERENCE_CODON, referenceCodon); - outputLineFields.put(OUTPUT_REFERENCE_AA, refAA.getCode()); - - outputLineFields.put(OUTPUT_VARIANT_CODON, variantCodon); - outputLineFields.put(OUTPUT_VARIANT_AA, variantAA.getCode()); - - outputLineFields.put(OUTPUT_PROTEIN_COORD_STR, "p." + refAA.getLetter() + Integer.toString(codonCount_from5) + variantAA.getLetter()); //for example: "p.K7$ - - boolean changesAA = !refAA.equals(variantAA); - outputLineFields.put(OUTPUT_CHANGES_AMINO_ACID, Boolean.toString(changesAA)); - final String functionalClass; - if (changesAA) { - if (variantAA.isStop()) { - functionalClass = "nonsense"; - } else if (refAA.isStop()) { - functionalClass = "readthrough"; - } else { - functionalClass = "missense"; - } - } else { - functionalClass = "silent"; - } - outputLineFields.put(OUTPUT_FUNCTIONAL_CLASS, functionalClass); - } - - //compute OUTPUT_CODING_COORD_STR - if(isProteinCodingTranscript) - { - //compute coding coord - final StringBuilder codingCoordStr = new StringBuilder(); - codingCoordStr.append( "c." ); - if(positionType == PositionType.utr3) { - codingCoordStr.append( '*' ); - } - - if(isWithinExon) { - codingCoordStr.append( Integer.toString(codingCoord_from5) ); - - codingCoordStr.append ( haplotypeReferenceStrandSpecific + ">" + haplotypeAlternateStrandSpecific); - } else { - //intronic coordinates - if(distanceToNearestSpliceSite < 0) { - codingCoordStr.append( Integer.toString(codingCoord_from5 + 1) ); - } else { - codingCoordStr.append( Integer.toString(codingCoord_from5 ) ); - codingCoordStr.append( "+" ); - } - - codingCoordStr.append( Integer.toString( distanceToNearestSpliceSite ) ); - } - - outputLineFields.put(OUTPUT_CODING_COORD_STR, codingCoordStr.toString()); - } - - - //generate the output line and add it to 'result' map. - if ( !isWithinIntronAndFarFromSpliceJunction ) - addThisLineToResult(outputLineFields); - - if( haplotypeAlternate == '*' ) { - //need only one record for this position with "*" for haplotypeAlternate, instead of the 4 individual alleles - break; - } - - } //ALLELE for-loop - } - finally - { - //increment coords - txOffset_from5++; - if(isWithinExon) { - mrnaCoord_from5++; - } - - if(positionType == PositionType.utr5) { - utr5Count_from5++; - } else if(positionType == PositionType.CDS) { - frame = (frame + 1) % 3; - if(frame == 0) { - codonCount_from5++; - } - } - } - } // l for-loop - - } //method close - - - /** - * Utility method. Creates a line containing the outputLineFields, and adds it to result, hashed by the sortKey. - * - * @param outputLineFields Column-name to value pairs. - */ - private void addThisLineToResult(final Map outputLineFields) { - final StringBuilder outputLine = new StringBuilder(); - for( final String column : outputColumnNames ) { - if(outputLine.length() != 0) { - outputLine.append( AnnotatorInputTableCodec.DELIMITER ); - } - final String value = outputLineFields.get(column); - if(value != null) { - outputLine.append(value); - } - } - - out.println(outputLine.toString()); - } - - public Integer reduce(Integer value, Integer sum) { return sum + value; } - - public void onTraversalDone(Integer result) { - logger.info("Skipped " + skippedPositionsCounter + " in-transcript genomic positions out of "+ totalPositionsCounter + " total (" + ( totalPositionsCounter == 0 ? 0 : (100*skippedPositionsCounter)/totalPositionsCounter) + "%)"); - logger.info("Skipped " + skippedTranscriptCounter + " transcripts out of "+ transcriptsProcessedCounter + " total (" + ( transcriptsProcessedCounter == 0 ? 0 : (100*skippedTranscriptCounter)/transcriptsProcessedCounter) + "%)"); - logger.info("Protein-coding transcripts (eg. with a CDS region) that don't start with Methionine or end in a stop codon: " + transcriptsThatDontStartWithMethionineOrEndWithStopCodonCounter + " transcripts out of "+ transcriptsProcessedCounter + " total (" + ( transcriptsProcessedCounter == 0 ? 0 : (100*transcriptsThatDontStartWithMethionineOrEndWithStopCodonCounter)/transcriptsProcessedCounter) + "%)"); - logger.info("Protein-coding transcripts (eg. with a CDS region) that don't start with Methionine: " + transcriptsThatDontStartWithMethionineCounter + " transcripts out of "+ transcriptsProcessedCounter + " total (" + ( transcriptsProcessedCounter == 0 ? 0 : (100*transcriptsThatDontStartWithMethionineCounter)/transcriptsProcessedCounter) + "%)"); - logger.info("Protein-coding transcripts (eg. with a CDS region) that don't end in a stop codon: " + transcriptsThatDontEndWithStopCodonCounter + " transcripts out of "+ transcriptsProcessedCounter + " total (" + ( transcriptsProcessedCounter == 0 ? 0 : (100*transcriptsThatDontEndWithStopCodonCounter)/transcriptsProcessedCounter) + "%)"); - } - - - /** - * Container for all data fields from a single row of the transcript table. - */ - protected static class TranscriptTableRecord - { - public static final String STRAND_COLUMN = "strand"; //eg. + - public static final String CDS_START_COLUMN = "cdsStart"; - public static final String CDS_END_COLUMN = "cdsEnd"; - public static final String EXON_COUNT_COLUMN = "exonCount"; - public static final String EXON_STARTS_COLUMN = "exonStarts"; - public static final String EXON_ENDS_COLUMN = "exonEnds"; - //public static final String EXON_FRAMES_COLUMN = "exonFrames"; - - - /** - * This StringBuffer accumulates the entire transcript sequence. - * This buffer is used instead of using the GATK window mechanism - * because arbitrary-length look-aheads and look-behinds are needed to deal - * with codons that span splice-junctions in + & - strand transcripts. - * The window mechanism requires hard-coding the window size, which would - * translate into a limit on maximum supported intron size. To avoid this, the - * sequence is accumulated as the transcript is scanned left-to-right. - * Then, all calculations are performed at the end. - */ - public StringBuilder txSequence; //the sequence of the entire transcript in order from 5' to 3' - public StringBuilder utr5Sequence; //the protein coding sequence (with introns removed) in order from 5' to 3' - public StringBuilder cdsSequence; //the protein coding sequence (with introns removed) in order from 5' to 3' - - public boolean positiveStrand; //whether the transcript is on the + or the - strand. - public String[] geneNames; //eg. NM_021649 - - public String txChrom; //The chromosome name - public int txStart; - public int txEnd; - - public int cdsStart; - public int cdsEnd; - - public int[] exonStarts; - public int[] exonEnds; - //public int[] exonFrames; - not used for anything, frame is computed another way - - /** - * Constructor. - * - * @param transcriptRod A rod representing a single record in the transcript table. - * @param geneNameColumns name columns. - */ - public TranscriptTableRecord(final AnnotatorInputTableFeature transcriptRod, String[] geneNameColumns) { - - //String binStr = transcriptRod.get("bin"); - //String idStr = transcriptRod.get("id"); //int(10) unsigned range Unique identifier ( usually 0 for some reason - even for translated ) - String strandStr = transcriptRod.getColumnValue(STRAND_COLUMN); - if(strandStr == null) { - throw new IllegalArgumentException("Transcript table record doesn't contain a 'strand' column. Make sure the transcripts input file has a header and the usual columns: \"" + strandStr + "\""); - } else if(strandStr.equals("+")) { - positiveStrand = true; - } else if(strandStr.equals("-")) { - positiveStrand = false; - } else { - throw new IllegalArgumentException("Transcript table record contains unexpected value for 'strand' column: \"" + strandStr + "\""); - } - - geneNames = new String[geneNameColumns.length]; - for(int i = 0; i < geneNameColumns.length; i++) { - geneNames[i] = transcriptRod.getColumnValue(geneNameColumns[i]); - } - - //String txStartStr = transcriptRod.get(TXSTART_COLUMN); //These fields were used to generate column 1 of the ROD file (eg. they got turned into chr:txStart-txStop) - //String txEndStr = transcriptRod.get(TXEND_COLUMN); - txChrom = transcriptRod.getChr(); - txStart = transcriptRod.getStart(); - txEnd = transcriptRod.getEnd(); - - String cdsStartStr = transcriptRod.getColumnValue(CDS_START_COLUMN); - String cdsEndStr = transcriptRod.getColumnValue(CDS_END_COLUMN); - - cdsStart = Integer.parseInt(cdsStartStr); - cdsEnd = Integer.parseInt(cdsEndStr); - - txSequence = new StringBuilder( (txEnd - txStart + 1) ); //the sequence of the entire transcript in order from 5' to 3' - if(isProteinCodingTranscript()) { - utr5Sequence = new StringBuilder( positiveStrand ? (cdsStart - txStart + 1) : (txEnd - cdsEnd + 1) ); //TODO reduce init size by size of introns - cdsSequence = new StringBuilder( (cdsEnd - cdsStart + 1) ); //TODO reduce init size by size of introns - } - - String exonCountStr = transcriptRod.getColumnValue(EXON_COUNT_COLUMN); - String exonStartsStr = transcriptRod.getColumnValue(EXON_STARTS_COLUMN); - String exonEndsStr = transcriptRod.getColumnValue(EXON_ENDS_COLUMN); - //String exonFramesStr = transcriptRod.get(EXON_FRAMES_COLUMN); - - String[] exonStartStrs = exonStartsStr.split(","); - String[] exonEndStrs = exonEndsStr.split(","); - //String[] exonFrameStrs = exonFramesStr.split(","); - - int exonCount = Integer.parseInt(exonCountStr); - if(exonCount != exonStartStrs.length || exonCount != exonEndStrs.length /* || exonCount != exonFrameStrs.length */) - { - throw new RuntimeException("exonCount != exonStarts.length || exonCount != exonEnds.length || exonCount != exonFrames.length. Exon starts: " + exonStartsStr + ", Exon ends: " + exonEndsStr + /*", Exon frames: " + exonFramesStr + */", Exon count: " + exonCountStr +". transcriptRod = " + transcriptRod); - } - - exonStarts = new int[exonCount]; - exonEnds = new int[exonCount]; - //exonFrames = new int[exonCount]; - for(int i = 0; i < exonCount; i++) { - exonStarts[i] = Integer.parseInt(exonStartStrs[i]); - exonEnds[i] = Integer.parseInt(exonEndStrs[i]); - //exonFrames[i] = Integer.parseInt(exonFrameStrs[i]); - } - } - - - /** - * Takes a genomic position on the same contig as the transcript, and - * returns true if this position falls within an exon. - */ - public boolean isWithinExon(final int genomPosition) { - for(int i = 0; i < exonStarts.length; i++) { - final int curStart = exonStarts[i]; - if(genomPosition < curStart) { - return false; - } - final int curStop = exonEnds[i]; - if(genomPosition <= curStop) { - return true; - } - } - - return false; - } - - /** - * Computes the distance to the nearest splice-site. - * The returned value is negative its on the 5' side (eg. upstream) of the juntion, and - * positive if its on the 3' side. - */ - public int computeDistanceToNearestSpliceSite(final int genomPosition) { - int prevDistance = Integer.MAX_VALUE; - for(int i = 0; i < exonStarts.length; i++) { - final int curStart = exonStarts[i]; - int curDistance = curStart - genomPosition; - if(genomPosition < curStart) { - //position is within the current intron - if(prevDistance < curDistance) { - return positiveStrand ? prevDistance : -prevDistance; - } else { - return positiveStrand ? -curDistance : curDistance; - } - } else { - prevDistance = genomPosition - curStart + 1; - } - - final int curStop = exonEnds[i]; - curDistance = curStop - genomPosition + 1; - if(genomPosition <= curStop) { - //position is within an exon - if(prevDistance < curDistance) { - return positiveStrand ? prevDistance : -prevDistance; - } else { - return positiveStrand ? -curDistance : curDistance; - } - } else { - prevDistance = genomPosition - curStop; - } - } - - throw new IllegalArgumentException("Genomic position: [" + genomPosition +"] not found within transcript: " + this +". " + - "This method should not have been called for this position. NOTE: this method assumes that all transcripts start " + - "with an exon and end with an exon (rather than an intron). Is this wrong?"); - //return prevDistance; //out of exons. return genomPosition-curStop - } - - - /** - * Returns true if this is a coding transcript (eg. is translated - * into proteins). Returns false for non-coding RNA. - */ - public boolean isProteinCodingTranscript() { - return cdsStart < cdsEnd; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("chrpos=" + txChrom + ':' + txStart + '-' + txEnd + ", strand=" + (positiveStrand ? '+':'-') + ", gene-names=" + Arrays.toString(geneNames) + ", cds="+ cdsStart + '-' + cdsEnd + ", exonStarts=" + Arrays.toString(exonStarts) + ", exonEnds=" + Arrays.toString(exonEnds)); - return sb.toString(); - } - - - - /** - * Computes the coding coord of the 1st nucleotide in the transcript. - * If the 1st nucleotide is in the 5'utr, the returned value will be negative. - * Otherwise (if the 1st nucleotide is CDS), the returned value is 1. - */ - public int computeInitialCodingCoord() { - if(!isProteinCodingTranscript()) { - throw new ReviewedStingException("This method should only be called for protein-coding transcripts"); - } - - if(positiveStrand) - { - if( cdsStart == exonStarts[0] ) { - //the 1st nucleotide of the transcript is CDS. - return 1; - } - - int result = 0; - for(int i = 0; i < exonStarts.length; i++) - { - final int exonStart = exonStarts[i]; - final int exonEnd = exonEnds[i]; - if(cdsStart <= exonEnd) { //eg. exonEnd is now on the 3' side of cdsStart - //this means cdsStart is within the current exon - result += (cdsStart - exonStart) + 1; - break; - } else { - //cdsStart is downstream of the current exon - result += (exonEnd - exonStart) + 1; - } - } - return -result; //negate because 5' UTR coding coord is negative - } - else //(negative strand) - { - final int cdsStart_5prime = cdsEnd; - if(cdsStart_5prime == exonEnds[exonEnds.length - 1]) { - //the 1st nucleotide of the transcript is CDS. - return 1; - } - - int result = 0; - for(int i = exonEnds.length - 1; i >= 0; i--) - { - final int exonStart = exonEnds[i]; //when its the negative strand, the 5' coord of the 1st exon is exonEnds[i] - final int exonEnd = exonStarts[i]; - if( exonEnd <= cdsStart_5prime ) { //eg. exonEnd is now on the 3' side of cdsStart - //this means cdsStart is within the current exon - result += -(cdsStart_5prime - exonStart) + 1; - break; - } else { - //cdsStart is downstream of the current exon - result += -(exonEnd - exonStart) + 1; - } - } - return -result; //negate because 5' UTR coding coord is negative - } - } - } - - -} - diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java index 57bc44ab8..29d90b5bf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -10,15 +12,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.List; import java.util.Map; -public interface GenotypeAnnotation { +public abstract class GenotypeAnnotation extends VariantAnnotatorAnnotation { // return annotations for the given contexts/genotype split by sample - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g); - - // return the FORMAT keys - public List getKeyNames(); + public abstract Map annotate(RefMetaDataTracker tracker, Map> rodBindings, + ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g); // return the descriptions used for the VCF FORMAT meta field - public List getDescriptions(); - + public abstract List getDescriptions(); + } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java index 4e850d01b..63dea93d2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -9,15 +11,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.List; import java.util.Map; -public interface InfoFieldAnnotation { - +public abstract class InfoFieldAnnotation extends VariantAnnotatorAnnotation { // return annotations for the given contexts split by sample - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc); - - // return the INFO keys - public List getKeyNames(); + public abstract Map annotate(RefMetaDataTracker tracker, Map> rodBindings, + ReferenceContext ref, Map stratifiedContexts, VariantContext vc); // return the descriptions used for the VCF INFO meta field - public List getDescriptions(); - + public abstract List getDescriptions(); } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java new file mode 100644 index 000000000..f33d61df9 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.List; +import java.util.Map; + +@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations") +public abstract class VariantAnnotatorAnnotation { + // return the INFO keys + public abstract List getKeyNames(); +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 21c8ec430..ee2e4853b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -25,15 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; -import org.broadinstitute.sting.gatk.walkers.RMD; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; @@ -51,15 +49,22 @@ import static java.lang.Math.log10; /** * Takes files produced by Beagle imputation engine and creates a vcf with modified annotations. */ -@Requires(value={},referenceMetaData=@RMD(name=BeagleOutputToVCFWalker.INPUT_ROD_NAME, type=VariantContext.class)) - public class BeagleOutputToVCFWalker extends RodWalker { - public static final String INPUT_ROD_NAME = "variant"; - public static final String COMP_ROD_NAME = "comp"; - public static final String R2_ROD_NAME = "beagleR2"; - public static final String PROBS_ROD_NAME = "beagleProbs"; - public static final String PHASED_ROD_NAME = "beaglePhased"; + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + + @Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false) + public RodBinding comp = RodBinding.makeUnbound(VariantContext.class); + + @Input(fullName="beagleR2", shortName = "beagleR2", doc="VCF file", required=true) + public RodBinding beagleR2; + + @Input(fullName="beagleProbs", shortName = "beagleProbs", doc="VCF file", required=true) + public RodBinding beagleProbs; + + @Input(fullName="beaglePhased", shortName = "beaglePhased", doc="VCF file", required=true) + public RodBinding beaglePhased; @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; @@ -97,17 +102,13 @@ public class BeagleOutputToVCFWalker extends RodWalker { // Open output file specified by output VCF ROD final List dataSources = this.getToolkit().getRodDataSources(); - for( final ReferenceOrderedDataSource source : dataSources ) { - if (source.getName().equals(COMP_ROD_NAME)) { - hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site")); - hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site")); - hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site")); - break; - } - + if ( comp.isBound() ) { + hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site")); + hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site")); + hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site")); } - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(INPUT_ROD_NAME)); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variantCollection.variants.getName())); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); @@ -119,41 +120,35 @@ public class BeagleOutputToVCFWalker extends RodWalker { return 0; GenomeLoc loc = context.getLocation(); - VariantContext vc_input = tracker.getVariantContext(ref,INPUT_ROD_NAME, null, loc, true); + VariantContext vc_input = tracker.getFirstValue(variantCollection.variants, loc); - VariantContext vc_comp = tracker.getVariantContext(ref,COMP_ROD_NAME, null, loc, true); + VariantContext vc_comp = tracker.getFirstValue(comp, loc); if ( vc_input == null ) return 0; if (vc_input.isFiltered()) { - vcfWriter.add(vc_input, ref.getBase()); + vcfWriter.add(vc_input); return 1; } - List r2rods = tracker.getReferenceMetaData(R2_ROD_NAME); + BeagleFeature beagleR2Feature = tracker.getFirstValue(beagleR2); // ignore places where we don't have a variant - if ( r2rods.size() == 0 ) + if ( beagleR2Feature == null ) return 0; - BeagleFeature beagleR2Feature = (BeagleFeature)r2rods.get(0); - List gProbsrods = tracker.getReferenceMetaData(PROBS_ROD_NAME); + BeagleFeature beagleProbsFeature = tracker.getFirstValue(beagleProbs); // ignore places where we don't have a variant - if ( gProbsrods.size() == 0 ) + if ( beagleProbsFeature == null ) return 0; - BeagleFeature beagleProbsFeature = (BeagleFeature)gProbsrods.get(0); - - List gPhasedrods = tracker.getReferenceMetaData(PHASED_ROD_NAME); - + BeagleFeature beaglePhasedFeature = tracker.getFirstValue(beaglePhased); // ignore places where we don't have a variant - if ( gPhasedrods.size() == 0 ) + if ( beaglePhasedFeature == null ) return 0; - BeagleFeature beaglePhasedFeature = (BeagleFeature)gPhasedrods.get(0); - // get reference base for current position byte refByte = ref.getBase(); @@ -333,7 +328,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { } - vcfWriter.add(VariantContext.modifyAttributes(filteredVC,attributes), ref.getBase()); + vcfWriter.add(VariantContext.modifyAttributes(filteredVC,attributes)); return 1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 3eed12992..a46ec8b48 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -25,16 +25,12 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RMD; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve; import org.broadinstitute.sting.utils.GenomeLoc; @@ -54,10 +50,12 @@ import java.util.*; /** * Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input variant file */ -@Requires(value={},referenceMetaData=@RMD(name=ProduceBeagleInputWalker.ROD_NAME, type=VariantContext.class)) public class ProduceBeagleInputWalker extends RodWalker { - public static final String ROD_NAME = "variant"; - public static final String VALIDATION_ROD_NAME = "validation"; + + @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + + @Input(fullName="validation", shortName = "validation", doc="Input VCF file", required=false) + public RodBinding validation = RodBinding.makeUnbound(VariantContext.class); @Output(doc="File to which BEAGLE input should be written",required=true) protected PrintStream beagleWriter = null; @@ -99,7 +97,7 @@ public class ProduceBeagleInputWalker extends RodWalker { public void initialize() { - samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(ROD_NAME)); + samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variantCollection.variants.getName())); beagleWriter.print("marker alleleA alleleB"); for ( String sample : samples ) @@ -121,8 +119,8 @@ public class ProduceBeagleInputWalker extends RodWalker { public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { if( tracker != null ) { GenomeLoc loc = context.getLocation(); - VariantContext variant_eval = tracker.getVariantContext(ref, ROD_NAME, null, loc, true); - VariantContext validation_eval = tracker.getVariantContext(ref,VALIDATION_ROD_NAME,null,loc, true); + VariantContext variant_eval = tracker.getFirstValue(variantCollection.variants, loc); + VariantContext validation_eval = tracker.getFirstValue(validation, loc); if ( goodSite(variant_eval,validation_eval) ) { if ( useValidation(validation_eval, ref) ) { @@ -171,20 +169,20 @@ public class ProduceBeagleInputWalker extends RodWalker { logger.debug(String.format("boot: %d, test: %d, total: %d", bootstrapSetSize, testSetSize, bootstrapSetSize+testSetSize+1)); if ( (bootstrapSetSize+1.0)/(1.0+bootstrapSetSize+testSetSize) <= bootstrap ) { if ( bootstrapVCFOutput != null ) { - bootstrapVCFOutput.add(VariantContext.modifyFilters(validation, BOOTSTRAP_FILTER), ref.getBase() ); + bootstrapVCFOutput.add(VariantContext.modifyFilters(validation, BOOTSTRAP_FILTER)); } bootstrapSetSize++; return true; } else { if ( bootstrapVCFOutput != null ) { - bootstrapVCFOutput.add(validation,ref.getBase()); + bootstrapVCFOutput.add(validation); } testSetSize++; return false; } } else { if ( validation != null && bootstrapVCFOutput != null ) { - bootstrapVCFOutput.add(validation,ref.getBase()); + bootstrapVCFOutput.add(validation); } return false; } @@ -303,9 +301,7 @@ public class ProduceBeagleInputWalker extends RodWalker { } private void initializeVcfWriter() { - - final ArrayList inputNames = new ArrayList(); - inputNames.add( VALIDATION_ROD_NAME ); + final List inputNames = Arrays.asList(validation.getName()); // setup the header fields Set hInfo = new HashSet(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java index f6cd1d636..22c39d794 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java @@ -26,12 +26,12 @@ package org.broadinstitute.sting.gatk.walkers.beagle; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RMD; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; @@ -54,9 +54,9 @@ import java.util.Set; * in input variant file. Will additional hold back a fraction of the sites for evaluation, marking the * genotypes at that sites as missing, and writing the truth of these sites to a second VCF file */ -@Requires(value={},referenceMetaData=@RMD(name= VariantsToBeagleUnphasedWalker.ROD_NAME, type=VariantContext.class)) public class VariantsToBeagleUnphasedWalker extends RodWalker { - public static final String ROD_NAME = "variant"; + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; @Output(doc="File to which BEAGLE unphased genotypes should be written",required=true) protected PrintStream beagleWriter = null; @@ -75,7 +75,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker private int testSetSize = 0; public void initialize() { - samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(ROD_NAME)); + samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); beagleWriter.print("I marker alleleA alleleB"); for ( String sample : samples ) @@ -102,7 +102,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { if( tracker != null ) { GenomeLoc loc = context.getLocation(); - VariantContext vc = tracker.getVariantContext(ref, ROD_NAME, null, loc, true); + VariantContext vc = tracker.getFirstValue(variants, loc); if ( ProduceBeagleInputWalker.canBeOutputToBeagle(vc) ) { // do we want to hold back this site? @@ -110,7 +110,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker // if we are holding it back and we are writing a bootstrap VCF, write it out if ( makeMissing && bootstrapVCFOutput != null ) { - bootstrapVCFOutput.add(vc, ref.getBase()); + bootstrapVCFOutput.add(vc); } // regardless, all sites are written to the unphased genotypes file, marked as missing if appropriate diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java index 6b91b0198..cd5fdc505 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java @@ -22,9 +22,11 @@ package org.broadinstitute.sting.gatk.walkers.coverage; -import org.broad.tribble.bed.FullBEDFeature; +import org.broad.tribble.bed.BEDFeature; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -43,11 +45,11 @@ public class CompareCallableLociWalker extends RodWalker compTrack1; - @Argument(shortName="comp2", doc="First comparison track name", required=false) - protected String COMP2 = "comp2"; + @Input(fullName="comp2", shortName = "comp2", doc="Second comparison track name", required=true) + public RodBinding compTrack2; @Argument(shortName="printState", doc="If provided, prints sites satisfying this state pair", required=false) protected String printState = null; @@ -77,8 +79,8 @@ public class CompareCallableLociWalker extends RodWalker map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker != null ) { - CallableLociWalker.CallableBaseState comp1 = getCallableBaseState(tracker, COMP1); - CallableLociWalker.CallableBaseState comp2 = getCallableBaseState(tracker, COMP2); + CallableLociWalker.CallableBaseState comp1 = getCallableBaseState(tracker, compTrack1); + CallableLociWalker.CallableBaseState comp2 = getCallableBaseState(tracker, compTrack2); if ( printState != null && comp1.getState() == printState1 && comp2.getState() == printState2 ) { out.printf("%s %s %s %s%n", comp1.getLocation(), comp1.getState(), comp2.getLocation(), comp2.getState()); @@ -90,14 +92,14 @@ public class CompareCallableLociWalker extends RodWalker rodBinding) { //System.out.printf("tracker %s%n", tracker); - List bindings = tracker.getReferenceMetaData(track); - if ( bindings.size() != 1 || ! (bindings.get(0) instanceof FullBEDFeature)) { - throw new UserException.MalformedFile(String.format("%s track isn't a properly formated CallableBases object!", track)); + List bindings = tracker.getValues(rodBinding); + if ( bindings.size() != 1 ) { + throw new UserException.MalformedFile(String.format("%s track isn't a properly formated CallableBases object!", rodBinding.getName())); } - FullBEDFeature bed = (FullBEDFeature)bindings.get(0); + BEDFeature bed = bindings.get(0); GenomeLoc loc = getToolkit().getGenomeLocParser().createGenomeLoc(bed.getChr(), bed.getStart(), bed.getEnd()); CallableLociWalker.CalledState state = CallableLociWalker.CalledState.valueOf(bed.getName()); return new CallableLociWalker.CallableBaseState(getToolkit().getGenomeLocParser(),loc, state); @@ -127,7 +129,7 @@ public class CompareCallableLociWalker extends RodWalker diffs = diffEngine.diff(master, test); diffEngine.reportSummarizedDifferences(diffs, params); return true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index b679f967a..5889d19e5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -25,44 +25,53 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import java.io.File; import java.io.PrintStream; -import java.util.Arrays; import java.util.List; /** * A generic engine for comparing tree-structured objects + * *

- * Compares two record-oriented files, itemizing specific difference between equivalent - * records in the two files. Reports both itemized and summarized differences. - *

- * What are the summarized differences and the DiffObjectsWalker + * Compares two record-oriented files, itemizing specific difference between equivalent + * records in the two files. Reports both itemized and summarized differences. + *

+ * + *

What are the summarized differences and the DiffObjectsWalker?

+ * *

* The GATK contains a summarizing difference engine that compares hierarchical data structures to emit: - *

    - *
  • A list of specific differences between the two data structures. This is similar to saying the value in field A in record 1 in file F differences from the value in field A in record 1 in file G. - *
  • A summarized list of differences ordered by frequency of the difference. This output is similar to saying field A in 50 records in files F and G differed. - *
+ *
    + *
  • A list of specific differences between the two data structures. This is similar to saying the value in field A in record 1 in file F differences from the value in field A in record 1 in file G. + *
  • A summarized list of differences ordered by frequency of the difference. This output is similar to saying field A in 50 records in files F and G differed. + *
+ *

* *

- * The GATK contains a private walker DiffObjects that allows you access to the DiffEngine capabilities on the command line. Simply provide the walker with the master and test files and it will emit summarized differences for you. + * The GATK contains a private walker DiffObjects that allows you access to the DiffEngine capabilities on the command line. Simply provide the walker with the master and test files and it will emit summarized differences for you. + *

+ * + *

Why?

* *

- * Why? - *

- * The reason for this system is that it allows you to compare two structured files -- such as BAMs and VCFs -- for common differences among them. This is primarily useful in regression testing or optimization, where you want to ensure that the differences are those that you expect and not any others. + * The reason for this system is that it allows you to compare two structured files -- such as BAMs and VCFs -- for common differences among them. This is primarily useful in regression testing or optimization, where you want to ensure that the differences are those that you expect and not any others. + *

* - *

Understanding the output - *

The DiffEngine system compares to two hierarchical data structures for specific differences in the values of named - * nodes. Suppose I have two trees: + *

Input

+ *

+ * The DiffObjectsWalker works with BAM or VCF files. + *

+ * + *

Output

+ *

+ * The DiffEngine system compares to two hierarchical data structures for specific differences in the values of named + * nodes. Suppose I have two trees: *

  *     Tree1=(A=1 B=(C=2 D=3))
  *     Tree2=(A=1 B=(C=3 D=3 E=4))
@@ -70,33 +79,37 @@ import java.util.List;
  * 
*

* where every node in the tree is named, or is a raw value (here all leaf values are integers). The DiffEngine - * traverses these data structures by name, identifies equivalent nodes by fully qualified names - * (Tree1.A is distinct from Tree2.A, and determines where their values are equal (Tree1.A=1, Tree2.A=1, so they are). - * These itemized differences are listed as: + * traverses these data structures by name, identifies equivalent nodes by fully qualified names + * (Tree1.A is distinct from Tree2.A, and determines where their values are equal (Tree1.A=1, Tree2.A=1, so they are). + * These itemized differences are listed as: *

  *     Tree1.B.C=2 != Tree2.B.C=3
  *     Tree1.B.C=2 != Tree3.B.C=4
  *     Tree2.B.C=3 != Tree3.B.C=4
  *     Tree1.B.E=MISSING != Tree2.B.E=4
  * 
+ * *

- * This conceptually very similar to the output of the unix command line tool diff. What's nice about DiffEngine though - * is that it computes similarity among the itemized differences and displays the count of differences names - * in the system. In the above example, the field C is not equal three times, while the missing E in Tree1 occurs - * only once. So the summary is: + * This conceptually very similar to the output of the unix command line tool diff. What's nice about DiffEngine though + * is that it computes similarity among the itemized differences and displays the count of differences names + * in the system. In the above example, the field C is not equal three times, while the missing E in Tree1 occurs + * only once. So the summary is: * *

  *     *.B.C : 3
  *     *.B.E : 1
  * 
- *

where the * operator indicates that any named field matches. This output is sorted by counts, and provides an - * immediate picture of the commonly occurring differences among the files. + * *

- * Below is a detailed example of two VCF fields that differ because of a bug in the AC, AF, and AN counting routines, - * detected by the integrationtest integration (more below). You can see that in the although there are many specific - * instances of these differences between the two files, the summarized differences provide an immediate picture that - * the AC, AF, and AN fields are the major causes of the differences. + * where the * operator indicates that any named field matches. This output is sorted by counts, and provides an + * immediate picture of the commonly occurring differences among the files. *

+ * Below is a detailed example of two VCF fields that differ because of a bug in the AC, AF, and AN counting routines, + * detected by the integrationtest integration (more below). You can see that in the although there are many specific + * instances of these differences between the two files, the summarized differences provide an immediate picture that + * the AC, AF, and AN fields are the major causes of the differences. + *

+ * *

    [testng] path                                                             count
    [testng] *.*.*.AC                                                         6
@@ -117,7 +130,6 @@ import java.util.List;
  * @author Mark DePristo
  * @since 7/4/11
  */
-@Requires(value={})
 public class DiffObjectsWalker extends RodWalker {
     /**
      * Writes out a file of the DiffEngine format:
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java
new file mode 100644
index 000000000..ef47ee33c
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.diffengine;
+
+import org.broadinstitute.sting.gatk.report.GATKReport;
+import org.broadinstitute.sting.gatk.report.GATKReportColumn;
+import org.broadinstitute.sting.gatk.report.GATKReportTable;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Map;
+
+
+/**
+ * Class implementing diffnode reader for GATKReports
+ */
+public class GATKReportDiffableReader implements DiffableReader {
+    @Override
+    public String getName() { return "GATKReport"; }
+
+    @Override
+    public DiffElement readFromFile(File file, int maxElementsToRead) {
+        DiffNode root = DiffNode.rooted(file.getName());
+        try {
+            // one line reads the whole thing into memory
+            GATKReport report = new GATKReport(file);
+
+            for (GATKReportTable table : report.getTables() ) {
+                root.add(tableToNode(table, root));
+            }
+
+            return root.getBinding();
+        } catch ( Exception e ) {
+            return null;
+        }
+    }
+
+    private DiffNode tableToNode(GATKReportTable table, DiffNode root) {
+        DiffNode tableRoot = DiffNode.empty(table.getTableName(), root);
+
+        tableRoot.add("Description", table.getTableDescription());
+        tableRoot.add("NumberOfRows", table.getNumRows());
+        tableRoot.add("Version", table.getVersion());
+
+        for ( GATKReportColumn column : table.getColumns().values() ) {
+            DiffNode columnRoot = DiffNode.empty(column.getColumnName(), tableRoot);
+
+            columnRoot.add("Width", column.getColumnWidth());
+            columnRoot.add("Displayable", column.isDisplayable());
+
+            int n = 1;
+            for ( Object elt : column.values() ) {
+                String name = column.getColumnName() + n++;
+                columnRoot.add(name, elt.toString());
+            }
+
+            tableRoot.add(columnRoot);
+        }
+
+        return tableRoot;
+    }
+
+    @Override
+    public boolean canRead(File file) {
+        try {
+            final String HEADER = GATKReport.GATKREPORT_HEADER_PREFIX;
+            char[] buff = new char[HEADER.length()];
+            new FileReader(file).read(buff, 0, HEADER.length());
+            String firstLine = new String(buff);
+            return firstLine.startsWith(HEADER);
+        } catch ( IOException e ) {
+            return false;
+        }
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java
index 77a992ce0..a447d17af 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java
@@ -129,14 +129,6 @@ public class VCFDiffableReader implements DiffableReader {
 
     @Override
     public boolean canRead(File file) {
-        try {
-            final String VCF4_HEADER = "##fileformat=VCFv4";
-            char[] buff = new char[VCF4_HEADER.length()];
-            new FileReader(file).read(buff, 0, VCF4_HEADER.length());
-            String firstLine = new String(buff);
-            return firstLine.startsWith(VCF4_HEADER);
-        } catch ( IOException e ) {
-            return false;
-        }
+        return AbstractVCFCodec.canDecodeFile(file, VCFCodec.VCF4_MAGIC_HEADER);
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java
index efc101618..93012ee10 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java
@@ -25,6 +25,8 @@
 
 package org.broadinstitute.sting.gatk.walkers.fasta;
 
+import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@@ -33,7 +35,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 
-import java.util.Collection;
+import java.util.List;
 
 
 /**
@@ -46,6 +48,12 @@ import java.util.Collection;
 @Requires(value={DataSource.REFERENCE})
 public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
 
+    @Input(fullName = "variant", shortName = "V", doc="variants to model", required=false)
+    public List> variants;
+
+    @Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false)
+    public RodBinding snpmask = RodBinding.makeUnbound(VariantContext.class);
+
     private int deletionBasesRemaining = 0;
 
     public Pair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
@@ -57,11 +65,9 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
 
         String refBase = String.valueOf((char)ref.getBase());
 
-        Collection vcs = tracker.getAllVariantContexts(ref);
-
         // Check to see if we have a called snp
-        for ( VariantContext vc : vcs ) {
-            if ( !vc.getSource().startsWith("snpmask") ) {
+        for ( VariantContext vc : tracker.getValues(VariantContext.class) ) {
+            if ( ! vc.getSource().equals(snpmask.getName())) {
                 if ( vc.isDeletion()) {
                     deletionBasesRemaining = vc.getReference().length();
                     // delete the next n bases, not this one
@@ -75,8 +81,8 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
         }
 
         // if we don't have a called site, and we have a mask at this site, mask it
-        for ( VariantContext vc : vcs ) {
-            if ( vc.getSource().startsWith("snpmask") && vc.isSNP()) {
+        for ( VariantContext vc : tracker.getValues(snpmask) ) {
+            if ( vc.isSNP()) {
                 return new Pair(context.getLocation(), "N");
             }
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java
index 6c023573a..8ee1e3a89 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java
@@ -25,11 +25,11 @@
 
 package org.broadinstitute.sting.gatk.walkers.filters;
 
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Output;
+import org.broad.tribble.Feature;
+import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.utils.GenomeLoc;
@@ -46,10 +46,15 @@ import java.util.*;
 /**
  * Filters variant calls using a number of user-selectable, parameterizable criteria.
  */
-@Requires(value={},referenceMetaData=@RMD(name="variant", type=VariantContext.class))
 @Reference(window=@Window(start=-50,stop=50))
 public class VariantFiltrationWalker extends RodWalker {
 
+    @ArgumentCollection
+    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
+
+    @Input(fullName="mask", doc="Input ROD mask", required=false)
+    public RodBinding mask = RodBinding.makeUnbound(Feature.class);
+
     @Output(doc="File to which variants should be written", required=true)
     protected VCFWriter writer = null;
 
@@ -70,7 +75,7 @@ public class VariantFiltrationWalker extends RodWalker {
 
     @Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered; [default:0]", required=false)
     protected Integer MASK_EXTEND = 0;
-    @Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false)
+    @Argument(fullName="maskName", shortName="maskName", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false)
     protected String MASK_NAME = "Mask";
 
     @Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)?", required=false)
@@ -80,7 +85,6 @@ public class VariantFiltrationWalker extends RodWalker {
     List filterExps;
     List genotypeFilterExps;
 
-    public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant";
     public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster";
     private ClusteredSnps clusteredSNPs = null;
     private GenomeLoc previousMaskPosition = null;
@@ -92,8 +96,7 @@ public class VariantFiltrationWalker extends RodWalker {
 
     private void initializeVcfWriter() {
 
-        final ArrayList inputNames = new ArrayList();
-        inputNames.add( INPUT_VARIANT_ROD_BINDING_NAME );
+        final List inputNames = Arrays.asList(variantCollection.variants.getName());
 
         // setup the header fields
         Set hInfo = new HashSet();
@@ -110,12 +113,8 @@ public class VariantFiltrationWalker extends RodWalker {
         if ( genotypeFilterExps.size() > 0 )
             hInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, 1, VCFHeaderLineType.String, "Genotype-level filter"));
 
-        List dataSources = getToolkit().getRodDataSources();
-        for ( ReferenceOrderedDataSource source : dataSources ) {
-            if ( source.getName().equals("mask") ) {
-                hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Overlaps a user-input mask"));
-                break;
-            }
+        if ( mask.isBound() ) {
+            hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Overlaps a user-input mask"));
         }
 
         writer.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)));
@@ -149,10 +148,10 @@ public class VariantFiltrationWalker extends RodWalker {
         if ( tracker == null )
             return 0;
 
-        Collection VCs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, null, context.getLocation(), true, false);
+        Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation());
 
         // is there a SNP mask present?
-        boolean hasMask = tracker.getReferenceMetaData("mask").size() > 0;
+        boolean hasMask = tracker.hasValues(mask);
         if ( hasMask )
             previousMaskPosition = ref.getLocus();  // multi-base masks will get triggered over all bases of the mask
 
@@ -272,7 +271,7 @@ public class VariantFiltrationWalker extends RodWalker {
         else
             filteredVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes());
 
-        writer.add( filteredVC, context.getReferenceContext().getBase() );
+        writer.add(filteredVC);
     }
 
     public Integer reduce(Integer value, Integer sum) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
index 2014801e4..5f6865d04 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
@@ -276,13 +276,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
         if ( elt.isReducedRead() ) {
             // reduced read representation
             byte qual = elt.getReducedQual();
-            for ( int i = 0; i < elt.getReducedCount(); i++ ) {
-                add(obsBase, qual, (byte)0, (byte)0);
-            }
-            return elt.getQual();
+            add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
+            return elt.getReducedCount(); // we added nObs bases here
         } else {
             byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
-            return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0) : 0;
+            return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0;
         }
     }
 
@@ -309,9 +307,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
      * @param qual1
      * @param obsBase2
      * @param qual2 can be 0, indicating no second base was observed for this fragment
+     * @param nObs The number of times this quad of values was seen.  Generally 1, but reduced reads
+     *  can have nObs > 1 for synthetic reads
      * @return
      */
-    private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2) {
+    private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2, int nObs) {
         // TODO-- Right now we assume that there are at most 2 reads per fragment.  This assumption is fine
         // TODO--   given the current state of next-gen sequencing, but may need to be fixed in the future.
         // TODO--   However, when that happens, we'll need to be a lot smarter about the caching we do here.
@@ -332,19 +332,17 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
 
         for ( DiploidGenotype g : DiploidGenotype.values() ) {
             double likelihood = likelihoods[g.ordinal()];
-            
-            //if ( VERBOSE ) {
-            //    System.out.printf("  L(%c | G=%s, Q=%d, S=%s) = %f / %f%n",
-            //            observedBase, g, qualityScore, pow(10,likelihood) * 100, likelihood);
-            //}
-
-            log10Likelihoods[g.ordinal()] += likelihood;
-            log10Posteriors[g.ordinal()] += likelihood;
+            log10Likelihoods[g.ordinal()] += likelihood * nObs;
+            log10Posteriors[g.ordinal()] += likelihood * nObs;
         }
 
         return 1;
     }
 
+    private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2) {
+        return add(obsBase1, qual1, obsBase2, qual2, 1);
+    }
+
     // -------------------------------------------------------------------------------------
     //
     // Dealing with the cache routines
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
index 60ea601d5..897e1a668 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@@ -293,6 +293,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
         return aList;
 
     }
+
+    private final static EnumSet allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
+
     public Allele getLikelihoods(RefMetaDataTracker tracker,
                                  ReferenceContext ref,
                                  Map contexts,
@@ -318,11 +321,10 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
             haplotypeMap.clear();
 
             if (getAlleleListFromVCF) {
-                 EnumSet allowableTypes = EnumSet.of(VariantContext.Type.INDEL);
-                 allowableTypes.add(VariantContext.Type.MIXED);
-                 for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles",
-                         allowableTypes, ref.getLocus(), false, false) ) {
-                      if( vc_input != null && ref.getLocus().getStart() == vc_input.getStart()) {
+                 for( final VariantContext vc_input : tracker.getValues(VariantContext.class, "alleles") ) {
+                      if( vc_input != null &&
+                              allowableTypes.contains(vc_input.getType()) &&
+                              ref.getLocus().getStart() == vc_input.getStart()) {
                          vc = vc_input;
                          break;
                      }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
index 3e3cd128b..9205e33a0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@@ -63,12 +63,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
         VariantContext vc = null;
 
         // search for usable record
-        for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles", null, ref.getLocus(), true, false) ) {
+        for( final VariantContext vc_input : tracker.getValues(VariantContext.class, "alleles", ref.getLocus()) ) {
             if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) {
                 if ( vc == null ) {
                     vc = vc_input;
                 } else {
-                    logger.warn("Multiple valid VCF records detected at site " + ref.getLocus() + ", only considering alleles from first record only");
+                    logger.warn("Multiple valid VCF records detected at site " + ref.getLocus() + ", only considering alleles from first record");
                 }
             }
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java
index 22c3081a3..503d87cbe 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java
@@ -49,7 +49,6 @@ import java.util.TreeSet;
  * the name 'allele' so we know which alternate allele to use at each site.
  */
 @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
-@Requires(value={},referenceMetaData=@RMD(name="alleles", type= VariantContext.class))
 @Reference(window=@Window(start=-200,stop=200))
 @By(DataSource.READS)
 @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
@@ -93,7 +92,7 @@ public class UGCalcLikelihoods extends LocusWalker
 
     public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
         VariantContext call = UG_engine.calculateLikelihoods(tracker, refContext, rawContext);
-        return call == null ? null : new VariantCallContext(call, refContext.getBase(), true);
+        return call == null ? null : new VariantCallContext(call, true);
     }
 
     public Integer reduceInit() { return 0; }
@@ -107,7 +106,7 @@ public class UGCalcLikelihoods extends LocusWalker
             return sum;
 
         try {
-            writer.add(value, value.refBase);
+            writer.add(value);
         } catch (IllegalArgumentException e) {
             throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
index 68d8f9b54..500b11360 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
@@ -25,7 +25,9 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;
 
 import org.broadinstitute.sting.commandline.ArgumentCollection;
+import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
@@ -51,6 +53,9 @@ public class UGCallVariants extends RodWalker {
     @ArgumentCollection
     private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
 
+    @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
+    public List> variants;
+
     // control the output
     @Output(doc="File to which variants should be written",required=true)
     protected VCFWriter writer = null;
@@ -62,15 +67,9 @@ public class UGCallVariants extends RodWalker {
     private Set trackNames = new HashSet();
 
     public void initialize() {
-        UAC.NO_SLOD = true;
-
-        for ( ReferenceOrderedDataSource d : getToolkit().getRodDataSources() ) {
-            if ( d.getName().startsWith("variant") )
-                trackNames.add(d.getName());
-        }
-        if ( trackNames.size() == 0 )
-            throw new UserException("At least one track bound to a name beginning with 'variant' must be provided.");
 
+        for ( RodBinding rb : variants )
+            trackNames.add(rb.getName());
         Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), trackNames);
 
         UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples);
@@ -94,11 +93,7 @@ public class UGCallVariants extends RodWalker {
         if ( tracker == null )
             return null;
 
-        List VCs = new ArrayList();
-        for ( String name : trackNames ) {
-            Collection vc = tracker.getVariantContexts(ref, name, null, context.getLocation(), true, true);
-            VCs.addAll(vc);
-        }
+        List VCs = tracker.getValues(variants, context.getLocation());
 
         VariantContext mergedVC = mergeVCsWithGLs(VCs);
         if ( mergedVC == null )
@@ -116,7 +111,7 @@ public class UGCallVariants extends RodWalker {
         try {
             Map attrs = new HashMap(value.getAttributes());
             VariantContextUtils.calculateChromosomeCounts(value, attrs, true);
-            writer.add(VariantContext.modifyAttributes(value, attrs), value.refBase);
+            writer.add(VariantContext.modifyAttributes(value, attrs));
         } catch (IllegalArgumentException e) {
             throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
index 2b25df4aa..52bf3f715 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@@ -58,8 +58,8 @@ public class UnifiedArgumentCollection {
     @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
     public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
 
-    @Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false)
-    public boolean NO_SLOD = false;
+    @Argument(fullName = "computeSLOD", shortName = "sl", doc = "If provided, we will calculate the SLOD", required = false)
+    public boolean COMPUTE_SLOD = false;
 
 
     // control the error modes
@@ -154,7 +154,7 @@ public class UnifiedArgumentCollection {
         uac.PCR_error = PCR_error;
         uac.GenotypingMode = GenotypingMode;
         uac.OutputMode = OutputMode;
-        uac.NO_SLOD = NO_SLOD;
+        uac.COMPUTE_SLOD = COMPUTE_SLOD;
         uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE;
         uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
         uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index 2a0338bca..07ba27639 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -25,17 +25,20 @@
 
 package org.broadinstitute.sting.gatk.walkers.genotyper;
 
+import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.ArgumentCollection;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.DownsampleType;
+import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.filters.BadMateFilter;
 import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
+import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
 import org.broadinstitute.sting.utils.SampleUtils;
@@ -59,6 +62,13 @@ public class UnifiedGenotyper extends LocusWalker>());
         UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
 
         // initialize the header
@@ -144,7 +155,7 @@ public class UnifiedGenotyper extends LocusWalker GLs) {
@@ -300,7 +300,8 @@ public class UnifiedGenotyperEngine {
                 genotypes,
                 VariantContext.NO_NEG_LOG_10PERROR,
                 null,
-                null);
+                null,
+                refContext.getBase());
     }
 
     // private method called by both UnifiedGenotyper and UGCallVariants entry points into the engine
@@ -372,8 +373,8 @@ public class UnifiedGenotyperEngine {
             attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
 
 
-        if ( !UAC.NO_SLOD && bestAFguess != 0 ) {
-            final boolean DEBUG_SLOD = false;
+        if ( UAC.COMPUTE_SLOD && bestAFguess != 0 ) {
+            //final boolean DEBUG_SLOD = false;
 
             // the overall lod
             VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
@@ -381,7 +382,7 @@ public class UnifiedGenotyperEngine {
             afcm.get().getLog10PNonRef(tracker, refContext, vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
             //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
             double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
-            if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
+            //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
 
             // the forward lod
             VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
@@ -390,7 +391,7 @@ public class UnifiedGenotyperEngine {
             //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
             double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
             double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
-            if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
+            //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
 
             // the reverse lod
             VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
@@ -399,11 +400,11 @@ public class UnifiedGenotyperEngine {
             //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
             double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
             double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
-            if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
+            //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
 
             double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
             double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
-            if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
+            //if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
 
             // strand score is max bias between forward and reverse strands
             double strandScore = Math.max(forwardLod, reverseLod);
@@ -425,10 +426,10 @@ public class UnifiedGenotyperEngine {
             myAlleles.add(vc.getReference());
         }
         VariantContext vcCall = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc,
-                myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes);
+                myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes, refContext.getBase());
 
         if ( annotationEngine != null ) {
-            // first off, we want to use the *unfiltered* and *unBAQed* context for the annotations
+            // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
             ReadBackedPileup pileup = null;
             if (rawContext.hasExtendedEventPileup())
                 pileup = rawContext.getExtendedEventPileup();
@@ -436,13 +437,10 @@ public class UnifiedGenotyperEngine {
                 pileup = rawContext.getBasePileup();
             stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
 
-            Collection variantContexts = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
-            vcCall = variantContexts.iterator().next(); // we know the collection will always have exactly 1 element.
+            vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
         }
 
-        VariantCallContext call = new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
-        call.setRefBase(refContext.getBase());
-        return call;
+        return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
     }
 
     private int calculateEndPos(Set alleles, Allele refAllele, GenomeLoc loc) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/VariantCallContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/VariantCallContext.java
index 5896e784e..423c80112 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/VariantCallContext.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/VariantCallContext.java
@@ -36,7 +36,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
  * Useful helper class to communicate the results of calculateGenotype to framework
  */
 public class VariantCallContext extends VariantContext {
-    public byte refBase;
 
     // Was the site called confidently, either reference or variant?
     public boolean confidentlyCalled = false;
@@ -55,16 +54,6 @@ public class VariantCallContext extends VariantContext {
         this.shouldEmit = shouldEmit;
     }
 
-    VariantCallContext(VariantContext vc, byte ref, boolean confidentlyCalledP) {
-        super(vc);
-        this.refBase = ref;
-        this.confidentlyCalled = confidentlyCalledP;
-    }
-
-    public void setRefBase(byte ref) {
-        this.refBase = ref;
-    }
-
     /* these methods are only implemented for GENOTYPE_GIVEN_ALLELES MODE */
     //todo -- expand these methods to all modes
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
index 61f21c488..fa3991694 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
@@ -30,16 +30,12 @@ import net.sf.samtools.*;
 import net.sf.samtools.util.RuntimeIOException;
 import net.sf.samtools.util.SequenceUtil;
 import net.sf.samtools.util.StringUtil;
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Hidden;
-import org.broadinstitute.sting.commandline.Input;
-import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
 import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
 import org.broadinstitute.sting.gatk.walkers.BAQMode;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
@@ -86,6 +82,9 @@ public class IndelRealigner extends ReadWalker {
         USE_SW
     }
 
+    @Input(fullName="known", shortName = "known", doc="Input VCF file with known indels", required=false)
+    public List> known = Collections.emptyList();
+
     @Input(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true)
     protected String intervalsFile = null;
 
@@ -159,21 +158,6 @@ public class IndelRealigner extends ReadWalker {
     protected boolean CHECKEARLY = false;
 
 
-    // DEPRECATED
-
-    @Deprecated
-    @Argument(fullName="sortInCoordinateOrderEvenThoughItIsHighlyUnsafe", doc="This argument is no longer used.", required=false)
-    protected boolean DEPRECATED_SORT_IN_COORDINATE_ORDER = false;
-
-    @Deprecated
-    @Argument(fullName="realignReadsWithBadMates", doc="This argument is no longer used.", required=false)
-    protected boolean DEPRECATED_REALIGN_MATES = false;
-
-    @Deprecated
-    @Argument(fullName="useOnlyKnownIndels", shortName="knownsOnly", doc="This argument is no longer used. See --consensusDeterminationModel instead.", required=false)
-    protected boolean DEPRECATED_KNOWNS_ONLY = false;
-
-
     // DEBUGGING OPTIONS FOLLOW
 
     @Hidden
@@ -558,8 +542,8 @@ public class IndelRealigner extends ReadWalker {
                 if ( indelRodsSeen.contains(rod) )
                     continue;
                 indelRodsSeen.add(rod);
-                if ( VariantContextAdaptors.canBeConvertedToVariantContext(rod))
-                    knownIndelsToTry.add(VariantContextAdaptors.toVariantContext("", rod, ref));
+                if ( rod instanceof VariantContext )
+                    knownIndelsToTry.add((VariantContext)rod);
             }
         }
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
index 488e37f26..fbb62f17e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
@@ -26,7 +26,9 @@
 package org.broadinstitute.sting.gatk.walkers.indels;
 
 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.filters.BadCigarFilter;
@@ -46,6 +48,8 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 
 import java.io.PrintStream;
 import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 
 /**
  * Emits intervals for the Local Indel Realigner to target for cleaning.  Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string.
@@ -56,15 +60,19 @@ import java.util.ArrayList;
 @By(DataSource.REFERENCE)
 @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
 public class RealignerTargetCreator extends RodWalker {
+
     @Output
     protected PrintStream out;
 
+    @Input(fullName="known", shortName = "known", doc="Input VCF file with known indels", required=false)
+    public List> known = Collections.emptyList();
+
     // mismatch/entropy/SNP arguments
     @Argument(fullName="windowSize", shortName="window", doc="window size for calculating entropy or SNP clusters", required=false)
     protected int windowSize = 10;
 
     @Argument(fullName="mismatchFraction", shortName="mismatch", doc="fraction of base qualities needing to mismatch for a position to have high entropy; to disable set to <= 0 or > 1", required=false)
-    protected double mismatchThreshold = 0.15;
+    protected double mismatchThreshold = 0.0;
 
     @Argument(fullName="minReadsAtLocus", shortName="minReads", doc="minimum reads at a locus to enable using the entropy calculation", required=false)
     protected int minReadsAtLocus = 4;
@@ -110,7 +118,7 @@ public class RealignerTargetCreator extends RodWalker {
-//    @Output
-//    PrintStream out;
-    @Output(doc="File to which variants should be written",required=true)
-    protected VCFWriter vcf_writer = null;
-
-    @Argument(fullName="outputFile", shortName="O", doc="output file name (BED format). DEPRECATED> Use --bed", required=true)
-    @Deprecated
-    java.io.File output_file;
-
-    @Argument(fullName = "metrics_file", shortName = "metrics", doc = "File to print callability metrics output", required = false)
-    public PrintStream metricsWriter = null;
-
-//    @Argument(fullName="vcf_format", shortName="vcf", doc="generate output file in VCF format", required=false)
-//    boolean FORMAT_VCF = false;
-
-    @Hidden
-    @Argument(fullName = "genotype_intervals", shortName = "genotype",
-            doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or it's the ref", required = false)
-    public String genotypeIntervalsFile = null;
-
-    @Hidden
-    @Argument(fullName="genotypeIntervalsAreNotSorted", shortName="giNotSorted", required=false,
-            doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
-                "if the list turns out to be unsorted, it will throw an exception.  "+
-                "Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
-                "to sort and keep it in memory (increases memory usage!).")
-    protected boolean GENOTYPE_NOT_SORTED = false;
-
-    @Hidden
-	@Argument(fullName="unpaired", shortName="unpaired",
-			doc="Perform unpaired calls (no somatic status detection)", required=false)
-    boolean call_unpaired = false;
-	boolean call_somatic ;
-
-	@Argument(fullName="verboseOutput", shortName="verbose",
-			doc="Verbose output file in text format", required=false)
-	java.io.File verboseOutput = null;
-
-    @Argument(fullName="bedOutput", shortName="bed",
-            doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
-    java.io.File bedOutput = null;
-
-	@Argument(fullName="minCoverage", shortName="minCoverage",
-			doc="indel calls will be made only at sites with coverage of minCoverage or more reads; with --somatic this value is applied to tumor sample", required=false)
-	int minCoverage = 6;
-
-	@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
-			doc="used only with --somatic;  normal sample must have at least minNormalCoverage or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
-	int minNormalCoverage = 4;
-
-	@Argument(fullName="minFraction", shortName="minFraction",
-			doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
-			" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
-	double minFraction = 0.3;
-
-	@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
-			doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt all indel observations at the site exceeds this threshold", required=false)
-	double minConsensusFraction = 0.7;
-
-	@Argument(fullName="minIndelCount", shortName="minCnt",
-			doc="Minimum count of reads supporting consensus indel required for making the call. "+
-			" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
-			"(minIndelCount not met) will not pass.", required=false)
-	int minIndelCount = 0;
-
-	@Argument(fullName="refseq", shortName="refseq",
-			doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
-	String RefseqFileName = null;
-
-    @Argument(fullName="blacklistedLanes", shortName="BL",
-            doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
-                    "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
-    PlatformUnitFilterHelper dummy;
-     @Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",required=false) Boolean DEBUG = false;
-    @Argument(fullName="window_size", shortName="ws", doc="Size (bp) of the sliding window used for accumulating the coverage. "+
-            "May need to be increased to accomodate longer reads or longer deletions.",required=false) int WINDOW_SIZE = 200;
-    @Argument(fullName="maxNumberOfReads",shortName="mnr",doc="Maximum number of reads to cache in the window; if number of reads exceeds this number,"+
-                " the window will be skipped and no calls will be made from it",required=false) int MAX_READ_NUMBER = 10000;
-
-	private WindowContext tumor_context;
-	private WindowContext normal_context; 
-	private int currentContigIndex = -1;
-    private int contigLength = -1; // we see to much messy data with reads hanging out of contig ends...
-	private int currentPosition = -1; // position of the last read we've seen on the current contig
-	private String refName = null;
-	private java.io.Writer output = null;
-	private GenomeLoc location = null;
-    private long normalCallsMade = 0L, tumorCallsMade = 0L;
-
-    boolean outOfContigUserWarned = false;
-
-    private LocationAwareSeekableRODIterator refseqIterator=null;
-
-//	private Set normalReadGroups; // we are going to remember which read groups are normals and which are tumors in order to be able
-//	private Set tumorReadGroups ; // to properly assign the reads coming from a merged stream
-    private Set normalSamples; // we are going to remember which samples are normal and which are tumor:
-    private Set tumorSamples ; // these are used only to generate genotypes for vcf output
-
-	private int NQS_WIDTH = 5; // 5 bases on each side of the indel for NQS-style statistics
-
-    private Writer bedWriter = null;
-    private Writer verboseWriter = null;
-
-
-	private static String annGenomic = "GENOMIC";
-	private static String annIntron = "INTRON";
-	private static String annUTR = "UTR";
-	private static String annCoding = "CODING";
-	private static String annUnknown = "UNKNOWN";
-
-    enum CallType {
-        NOCOVERAGE,
-        BADCOVERAGE,
-        NOEVIDENCE,
-        GERMLINE,
-        SOMATIC
-    };
-
-	private SAMRecord lastRead;
-    private byte[] refBases;
-    private ReferenceDataSource refData;
-    private Iterator genotypeIntervalIterator = null;
-
-    // the current interval in the list of intervals, for which we want to do full genotyping
-    private GenomeLoc currentGenotypeInterval = null;
-    private long lastGenotypedPosition = -1; // last position on the currentGenotypeInterval, for which a call was already printed;
-                                     // can be 1 base before lastGenotyped start
-
-
-    // "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt"
-
-    private Set getVCFHeaderInfo() {
-        Set headerInfo = new HashSet();
-
-        // first, the basic info
-        headerInfo.add(new VCFHeaderLine("source", "IndelGenotyperV2"));
-        headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
-
-        // FORMAT and INFO fields
-//        headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
-
-        headerInfo.addAll(VCFIndelAttributes.getAttributeHeaderLines());
-        if ( call_somatic ) {
-            headerInfo.add(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY, 0, VCFHeaderLineType.Flag, "Somatic event"));
-        }  else {
-        }
-
-        // all of the arguments from the argument collection
-        Set args = new HashSet();
-        args.add(this);
-        args.addAll(getToolkit().getFilters());
-        Map commandLineArgs = getToolkit().getApproximateCommandLineArguments(args);
-        for ( Map.Entry commandLineArg : commandLineArgs.entrySet() )
-            headerInfo.add(new VCFHeaderLine(String.format("IGv2_%s", commandLineArg.getKey()), commandLineArg.getValue()));
-        // also, the list of input bams
-        for ( String fileName : getToolkit().getArguments().samFiles )
-            headerInfo.add(new VCFHeaderLine("IGv2_bam_file_used", fileName));
-
-        return headerInfo;
-    }
-
-
-	@Override
-	public void initialize() {
-
-        call_somatic =  (call_unpaired ? false : true);
-		normal_context = new WindowContext(0,WINDOW_SIZE);
-        normalSamples = new HashSet();
-
-        if ( bedOutput != null && output_file != null ) {
-            throw new UserException.DeprecatedArgument("-O", "-O option is deprecated and -bed option replaces it; you can not use both at the same time");
-        }
-
-		if ( RefseqFileName != null ) {
-            logger.info("Using RefSeq annotations from "+RefseqFileName);
-
-			RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
-                                                          getToolkit().getGenomeLocParser(),
-                                                          getToolkit().getArguments().unsafe);
-            RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,new File(RefseqFileName));
-
-            refseqIterator = new SeekableRODIterator(refseq.getHeader(),
-                                                     refseq.getSequenceDictionary(),
-                                                     getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
-                                                     getToolkit().getGenomeLocParser(),
-                                                     refseq.getIterator());
-		}
-
-		if ( refseqIterator == null ) logger.info("No gene annotations available");
-
-		int nSams = getToolkit().getArguments().samFiles.size();
-
-        if ( call_somatic ) {
-            if ( nSams < 2 ) throw new UserException.BadInput("In default (paired sample) mode at least two bam files (normal and tumor) must be specified");
-            tumor_context = new WindowContext(0,WINDOW_SIZE);
-            tumorSamples = new HashSet();
-        }
-
-        int nNorm = 0;
-        int nTum = 0;
-        for ( SAMReaderID rid : getToolkit().getReadsDataSource().getReaderIDs() ) {
-             Tags tags = rid.getTags() ;
-             if ( tags.getPositionalTags().isEmpty() && call_somatic )
-                 throw new UserException.BadInput("In default (paired sample) mode all input bam files must be tagged as either 'normal' or 'tumor'. Untagged file: "+
-                         getToolkit().getSourceFileForReaderID(rid));
-             boolean normal = false;
-             boolean tumor = false;
-             for ( String s : tags.getPositionalTags() ) { // we allow additional unrelated tags (and we do not use them), but we REQUIRE one of Tumor/Normal to be present if --somatic is on
-                 if ( "NORMAL".equals(s.toUpperCase()) ) {
-                     normal = true;
-                     nNorm++;
-                 }
-                 if ( "TUMOR".equals(s.toUpperCase()) ) {
-                     tumor = true;
-                     nTum++ ;
-                 }
-             }
-             if ( call_somatic && normal && tumor ) throw new UserException.BadInput("Input bam file "+
-                     getToolkit().getSourceFileForReaderID(rid)+" is tagged both as normal and as tumor. Which one is it??");
-             if ( call_somatic && !normal && ! tumor )
-                 throw new UserException.BadInput("In somatic mode all input bams must be tagged as either normal or tumor. Encountered untagged file: "+
-                    getToolkit().getSourceFileForReaderID(rid));
-             if ( ! call_somatic && (normal || tumor) )
-                 System.out.println("WARNING: input bam file "+getToolkit().getSourceFileForReaderID(rid)
-                         +" is tagged as Normal and/or Tumor, but somatic mode is not on. Tags will ne IGNORED");
-            if ( call_somatic && tumor ) {
-                for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader(rid).getReadGroups() ) {
-                    tumorSamples.add(rg.getSample());
-                }
-            } else {
-                for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader(rid).getReadGroups() ) {
-                    normalSamples.add(rg.getSample());
-                }
-            }
-            if ( genotypeIntervalsFile != null ) {
-
-                if ( ! GENOTYPE_NOT_SORTED && IntervalUtils.isIntervalFile(genotypeIntervalsFile)) {
-                    // prepare to read intervals one-by-one, as needed (assuming they are sorted).
-                    genotypeIntervalIterator = new IntervalFileMergingIterator(getToolkit().getGenomeLocParser(),
-                        new java.io.File(genotypeIntervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
-                } else {
-                    // read in the whole list of intervals for cleaning
-                    GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
-                        IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY);
-                    genotypeIntervalIterator = locs.iterator();
-                }
-
-                // wrap intervals requested for genotyping inside overlapping iterator, so that we actually
-                // genotype only on the intersections of the requested intervals with the -L intervals
-                genotypeIntervalIterator = new OverlappingIntervalIterator(genotypeIntervalIterator, getToolkit().getIntervals().iterator() );
-
-                currentGenotypeInterval = genotypeIntervalIterator.hasNext() ? genotypeIntervalIterator.next() : null;
-
-                if ( DEBUG) System.out.println("DEBUG>> first genotyping interval="+currentGenotypeInterval);
-
-                if ( currentGenotypeInterval != null ) lastGenotypedPosition = currentGenotypeInterval.getStart()-1;
-            }
-
-        }
-
-		location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1);
-
-        normalSamples = getToolkit().getSamplesByReaders().get(0);
-
-        try {
-            // we already checked that bedOutput and output_file are not set simultaneously
-            if ( bedOutput != null ) bedWriter = new FileWriter(bedOutput);
-            if ( output_file != null ) bedWriter = new FileWriter(output_file);
-        } catch (java.io.IOException e) {
-            throw new UserException.CouldNotReadInputFile(bedOutput, "Failed to open BED file for writing.", e);
-        }
-        try {
-            if ( verboseOutput != null ) verboseWriter = new FileWriter(verboseOutput);
-        } catch (java.io.IOException e) {
-            throw new UserException.CouldNotReadInputFile(verboseOutput, "Failed to open BED file for writing.", e);
-        }
-
-        vcf_writer.writeHeader(new VCFHeader(getVCFHeaderInfo(), SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()))) ;
-        refData = new ReferenceDataSource(getToolkit().getArguments().referenceFile);
-	}
-
-
-	@Override
-	public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
-
-    //        if ( read.getReadName().equals("428EFAAXX090610:2:36:1384:639#0") ) System.out.println("GOT READ");
-
-            if ( DEBUG ) {
-                //            System.out.println("DEBUG>> read at "+ read.getAlignmentStart()+"-"+read.getAlignmentEnd()+
-                //                    "("+read.getCigarString()+")");
-                if ( read.getDuplicateReadFlag() ) System.out.println("DEBUG>> Duplicated read (IGNORED)");
-            }
-
-            if ( AlignmentUtils.isReadUnmapped(read) ||
-			 read.getDuplicateReadFlag() ||
-			 read.getNotPrimaryAlignmentFlag() ||
-			 read.getMappingQuality() == 0 ) {
-			return 0; // we do not need those reads!
-            }
-
-            if ( read.getReferenceIndex() != currentContigIndex ) {
-                // we just jumped onto a new contig
-                if ( DEBUG ) System.out.println("DEBUG>>> Moved to contig "+read.getReferenceName());
-                if ( read.getReferenceIndex() < currentContigIndex ) // paranoidal
-                    throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, read, "Read "+read.getReadName()+": contig is out of order; input BAM file is unsorted");
-
-                // print remaining indels from the previous contig (if any);
-                if ( call_somatic ) emit_somatic(1000000000, true);
-                else emit(1000000000,true);
-
-                currentContigIndex = read.getReferenceIndex();
-                currentPosition = read.getAlignmentStart();
-                refName = new String(read.getReferenceName());
-
-                location = getToolkit().getGenomeLocParser().createGenomeLoc(refName,location.getStart(),location.getStop());
-                contigLength = getToolkit().getGenomeLocParser().getContigInfo(refName).getSequenceLength();
-                outOfContigUserWarned = false;
-
-                lastGenotypedPosition = -1;
-
-                normal_context.clear(); // reset coverage window; this will also set reference position to 0
-                if ( call_somatic) tumor_context.clear();
-
-                refBases = new String(refData.getReference().getSequence(read.getReferenceName()).getBases()).toUpperCase().getBytes();
-            }
-
-            // we have reset the window to the new contig if it was required and emitted everything we collected
-            // on a previous contig. At this point we are guaranteed that we are set up properly for working
-            // with the contig of the current read.
-
-            // NOTE: all the sanity checks and error messages below use normal_context only. We make sure that normal_context and
-            // tumor_context are synchronized exactly (windows are always shifted together by emit_somatic), so it's safe
-
-            if ( read.getAlignmentStart() < currentPosition ) // oops, read out of order?
-                throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, read, "Read "+read.getReadName() +" out of order on the contig\n"+
-                        "Read starts at "+refName+":"+read.getAlignmentStart()+"; last read seen started at "+refName+":"+currentPosition
-                        +"\nLast read was: "+lastRead.getReadName()+" RG="+lastRead.getAttribute("RG")+" at "+lastRead.getAlignmentStart()+"-"
-                        +lastRead.getAlignmentEnd()+" cigar="+lastRead.getCigarString());
-
-            currentPosition = read.getAlignmentStart();
-            lastRead = read;
-
-            if ( read.getAlignmentEnd() > contigLength  ) {
-                if ( ! outOfContigUserWarned ) {
-                    System.out.println("WARNING: Reads aligned past contig length on "+ location.getContig()+"; all such reads will be skipped");
-                    outOfContigUserWarned = true;
-                }
-                return 0;
-            }
-
-            long alignmentEnd = read.getAlignmentEnd();
-            Cigar c = read.getCigar();
-            int lastNonClippedElement = 0; // reverse offset to the last unclipped element
-            CigarOperator op = null;
-            // moving backwards from the end of the cigar, skip trailing S or H cigar elements:
-            do {
-                lastNonClippedElement++;
-                op = c.getCigarElement( c.numCigarElements()-lastNonClippedElement ).getOperator();
-            } while ( op == CigarOperator.H || op == CigarOperator.S );
-
-            // now op is the last non-S/H operator in the cigar.
-
-            // a little trick here: we want to make sure that current read completely fits into the current
-            // window so that we can accumulate indel observations over the whole length of the read.
-            // The ::getAlignmentEnd() method returns the last position on the reference where bases from the
-            // read actually match (M cigar elements). After our cleaning procedure, we can have reads that end
-            // with I element, which is not gonna be counted into alignment length on the reference. On the other hand,
-            // in this program we assign insertions, internally, to the first base *after* the insertion position.
-            // Hence, we have to make sure that that extra base is already in the window or we will get IndexOutOfBounds.
-
-            if ( op == CigarOperator.I) alignmentEnd++;
-
-            if ( alignmentEnd > normal_context.getStop()) {
-
-                // we don't emit anything until we reach a read that does not fit into the current window.
-                // At that point we try shifting the window to the start of that read (or reasonably close) and emit everything prior to
-                // that position. This is legitimate, since the reads are sorted and  we are not gonna see any more coverage at positions
-                // below the current read's start.
-                // Clearly, we assume here that window is large enough to accomodate any single read, so simply shifting
-                // the window to around the read's start will ensure that the read fits...
-
-                if ( DEBUG) System.out.println("DEBUG>> Window at "+normal_context.getStart()+"-"+normal_context.getStop()+", read at "+
-                                read.getAlignmentStart()+": trying to emit and shift" );
-                if ( call_somatic ) emit_somatic( read.getAlignmentStart(), false );
-                else emit( read.getAlignmentStart(), false );
-
-                // let's double check now that the read fits after the shift
-                if ( read.getAlignmentEnd() > normal_context.getStop()) {
-                    // ooops, looks like the read does not fit into the window even after the latter was shifted!!
-                    throw new UserException.BadArgumentValue("window_size", "Read "+read.getReadName()+": out of coverage window bounds. Probably window is too small, so increase the value of the window_size argument.\n"+
-                                             "Read length="+read.getReadLength()+"; cigar="+read.getCigarString()+"; start="+
-                                             read.getAlignmentStart()+"; end="+read.getAlignmentEnd()+
-                                             "; window start (after trying to accomodate the read)="+normal_context.getStart()+"; window end="+normal_context.getStop());
-                }
-            }
-
-            if ( call_somatic ) {
-
-                Tags tags =  getToolkit().getReaderIDForRead(read).getTags();
-                boolean assigned = false;
-                for ( String s : tags.getPositionalTags() ) {
-                    if ( "NORMAL".equals(s.toUpperCase()) ) {
-                        normal_context.add(read,ref.getBases());
-                        assigned = true;
-                        break;
-                    }
-                    if ( "TUMOR".equals(s.toUpperCase()) ) {
-                        tumor_context.add(read,ref.getBases());
-                        assigned = true;
-                        break;
-                    }
-                }
-                if ( ! assigned )
-                    throw new StingException("Read "+read.getReadName()+" from "+getToolkit().getSourceFileForReaderID(getToolkit().getReaderIDForRead(read))+
-                    "has no Normal/Tumor tag associated with it");
-
-//                String rg = (String)read.getAttribute("RG");
-//                if ( rg == null )
-//                    throw new UserException.MalformedBam(read, "Read "+read.getReadName()+" has no read group in merged stream. RG is required for somatic calls.");
-
-//                if ( normalReadGroups.contains(rg) ) {
-//                    normal_context.add(read,ref.getBases());
-//                } else if ( tumorReadGroups.contains(rg) ) {
-//                    tumor_context.add(read,ref.getBases());
-//                } else {
-//                    throw new UserException.MalformedBam(read, "Unrecognized read group in merged stream: "+rg);
-//                }
-
-                if ( tumor_context.getReads().size() > MAX_READ_NUMBER ) {
-                    System.out.println("WARNING: a count of "+MAX_READ_NUMBER+" reads reached in a window "+
-                            refName+':'+tumor_context.getStart()+'-'+tumor_context.getStop()+" in tumor sample. The whole window will be dropped.");
-                    tumor_context.shift(WINDOW_SIZE);
-                    normal_context.shift(WINDOW_SIZE);
-                }
-                if ( normal_context.getReads().size() > MAX_READ_NUMBER ) {
-                    System.out.println("WARNING: a count of "+MAX_READ_NUMBER+" reads reached in a window "+
-                            refName+':'+normal_context.getStart()+'-'+normal_context.getStop()+" in normal sample. The whole window will be dropped");
-                    tumor_context.shift(WINDOW_SIZE);
-                    normal_context.shift(WINDOW_SIZE);
-                }
-
-
-            } else {
-                normal_context.add(read, ref.getBases());
-                if ( normal_context.getReads().size() > MAX_READ_NUMBER ) {
-                    System.out.println("WARNING: a count of "+MAX_READ_NUMBER+" reads reached in a window "+
-                            refName+':'+normal_context.getStart()+'-'+normal_context.getStop()+". The whole window will be dropped");
-                    normal_context.shift(WINDOW_SIZE);
-                }
-            }
-
-            return 1;
-	}
-
-    /** An auxiliary shortcut: returns true if position(location.getContig(), p) is past l  */
-    private boolean pastInterval(long p, GenomeLoc l) {
-        return ( location.getContigIndex() > l.getContigIndex() ||
-                 location.getContigIndex() == l.getContigIndex() && p > l.getStop() );
-    }
-
-    /** Emit calls of the specified type across genotyping intervals, from position lastGenotypedPosition+1 to
-     * pos-1, inclusive.
-     * @param contigIndex
-     * @param pos
-     * @param call
-     */
-    /*
-    private void emitNoCallsUpTo(int contigIndex, long pos, CallType call) {
-
-        if ( contigIndex < currentGenotypeInterval.getContigIndex() ||
-             contigIndex == currentGenotypeInterval.getContigIndex() && pos <= currentGenotypeInterval.getStart() ) return;
-
-        if ( contigIndex == currentGenotypeInterval.getContigIndex() && pos >= currentGenotypeInterval.getStart() ) {
-            for ( long p = lastGenotypedPosition+1; p < pos; p++ ) {
-
-            }
-        }
-        while( currentGenotypeInterval != null ) {
-
-            while ( )
-        if ( genotypeIntervalIterator.hasNext() ) {
-            currentGenotypeInterval = genotypeIntervalIterator.next() ;
-            if ( pastInterval(p,currentGenotypeInterval) ) {
-                // if we are about to jump over the whole next interval, we need to emit NO_COVERAGE calls there!
-                emitNoCoverageCalls(currentGenotypeInterval);
-            }
-        } else {
-            currentGenotypeInterval = null;
-        }
-        }
-    }
-*/
-    
-   /** Output indel calls up to the specified position and shift the window: after this method is executed, the
-    * first element of the window maps onto 'position', if possible, or at worst a few bases to the left of 'position' if we may need more
-    * reads to get full NQS-style statistics for an indel in the close proximity of 'position'.
-    *
-    * @param position
-    */
-   private void emit(long position, boolean force) {
-
-            long adjustedPosition = adjustPosition(position);
-
-            if ( adjustedPosition == -1 ) {
-                // failed to find appropriate shift position, the data are probably to messy anyway so we drop them altogether
-                normal_context.shift((int)(position-normal_context.getStart()));
-                return;
-            }
-            long move_to = adjustedPosition;
-
-            for ( int pos = normal_context.getStart() ; pos < Math.min(adjustedPosition,normal_context.getStop()+1) ; pos++ ) {
-
-                boolean genotype = false;
-                // first let's see if we need to genotype current position:
-
-                final long p = pos - 1; // our internally used positions (pos) are +1 compared to external format spec (e.g. vcf)
-
-                if ( pos <= lastGenotypedPosition ) continue;
-
-                while ( currentGenotypeInterval != null ) {
-
-                    // if we did not even reach next interval yet, no genotyping at current position:
-                    if ( location.getContigIndex() < currentGenotypeInterval.getContigIndex() ||
-                         location.getContigIndex() == currentGenotypeInterval.getContigIndex() &&
-                                 p < currentGenotypeInterval.getStart() ) break;
-                    if ( pastInterval(p, currentGenotypeInterval) ) {
-                        // we are past current genotyping interval, so we are done with it; let's load next interval:
-                        currentGenotypeInterval = genotypeIntervalIterator.hasNext() ? genotypeIntervalIterator.next() : null;
-                        continue; // re-enter the loop to check against the interval we just loaded
-                    }
-
-                    // we reach this point only if p is inside current genotyping interval; set the flag and bail out:
-                    genotype = true;
-                    break;
-                }
-
-//                if ( DEBUG ) System.out.println("DEBUG>> pos="+pos +"; genotyping interval="+currentGenotypeInterval+"; genotype="+genotype);
-
-                if ( normal_context.indelsAt(pos).size() == 0 && ! genotype ) continue;
-
-                IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
-
-                if ( normalCall.getCoverage() < minCoverage && ! genotype ) {
-                    if ( DEBUG ) {
-                        System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
-                    }
-                    continue; // low coverage
-                }
-
-                if ( DEBUG ) System.out.println("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" at "+pos);
-
-                long left = Math.max( pos-NQS_WIDTH, normal_context.getStart() );
-                long right = pos+( normalCall.getVariant() == null ? 0 : normalCall.getVariant().lengthOnRef())+NQS_WIDTH-1;
-
-                if ( right >= adjustedPosition && ! force) {
-                    // we are not asked to force-shift, and there is more coverage around the current indel that we still need to collect
-
-                    // we are not asked to force-shift, and there's still additional coverage to the right of current indel, so its too early to emit it;
-                    // instead we shift only up to current indel pos - MISMATCH_WIDTH, so that we could keep collecting that coverage
-                    move_to = adjustPosition(left);
-                    if ( move_to == -1 ) {
-                        // failed to find appropriate shift position, the data are probably to messy anyway so we drop them altogether
-                        normal_context.shift((int)(adjustedPosition-normal_context.getStart()));
-                        return;
-                    }
-                    if ( DEBUG ) System.out.println("DEBUG>> waiting for coverage; actual shift performed to "+ move_to);
-                    break;
-                }
-
-                // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right:
-                if ( right > normal_context.getStop() ) right = normal_context.getStop();
-
-    //            location = getToolkit().getGenomeLocParser().setStart(location,pos);
-    //            location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
-
-                location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(), pos);
-
-                boolean haveCall = normalCall.isCall(); // cache the value
-
-                if ( haveCall || genotype) {
-                    if ( haveCall ) normalCallsMade++;
-                    printVCFLine(vcf_writer,normalCall);
-                    if ( bedWriter != null ) normalCall.printBedLine(bedWriter);
-                    if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall);
-                    lastGenotypedPosition = pos;
-                }
-
-                normal_context.indelsAt(pos).clear();
-                    // we dealt with this indel; don't want to see it again
-                    // (we might otherwise in the case when 1) there is another indel that follows
-                    // within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
-
-//			for ( IndelVariant var : variants ) {
-//				System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
-//			}
-            }
-
-            if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
-            normal_context.shift((int)(move_to - normal_context.getStart() ) );
-    }
-
-    /** A shortcut. Returns true if we got indels within the specified interval in single and only window context
-     * (for single-sample calls) or in either of the two window contexts (for two-sample/somatic calls)
-     *
-     */
-    private boolean indelsPresentInInterval(long start, long stop) {
-        if ( tumor_context == null ) return  normal_context.hasIndelsInInterval(start,stop);
-        return tumor_context.hasIndelsInInterval(start,stop) ||
-              normal_context.hasIndelsInInterval(start,stop);
-    }
-        /** Takes the position, to which window shift is requested, and tries to adjust it in such a way that no NQS window is broken.
-         * Namely, this method checks, iteratively, if there is an indel within NQS_WIDTH bases ahead of initially requested or adjusted 
-         * shift position. If there is such an indel,
-         * then shifting to that position would lose some or all NQS-window bases to the left of the indel (since it's not going to be emitted
-         * just yet). Instead, this method tries to readjust the shift position leftwards so that full NQS window to the left of the next indel
-         * is preserved. This method tries thie strategy 4 times (so that it would never walk away too far to the left), and if it fails to find
-         * an appropriate adjusted shift position (which could happen if there are many indels following each other at short intervals), it will give up, 
-         * go back to the original requested shift position and try finding the first shift poisition that has no indel associated with it.
-         */
-
-    private long adjustPosition(long request) {
-        long initial_request = request;
-        int attempts = 0;
-        boolean failure = false;
-        while ( indelsPresentInInterval(request,request+NQS_WIDTH)  ) {
-            request -= NQS_WIDTH;
-            if ( DEBUG ) System.out.println("DEBUG>> indel observations present within "+NQS_WIDTH+" bases ahead. Resetting shift to "+request);
-            attempts++;
-            if ( attempts == 4 ) {
-                if ( DEBUG ) System.out.println("DEBUG>> attempts to preserve full NQS window failed; now trying to find any suitable position.") ;
-                failure = true;
-                break;
-            }
-        }
-
-        if ( failure ) {
-            // we tried 4 times but did not find a good shift position that would preserve full nqs window
-            // around all indels. let's fall back and find any shift position as long and there's no indel at the very
-            // first position after the shift (this is bad for other reasons); if it breaks a nqs window, so be it
-            request = initial_request;
-            attempts = 0;
-            while ( indelsPresentInInterval(request,request+1) ) {
-                request--;
-                if ( DEBUG ) System.out.println("DEBUG>> indel observations present within "+NQS_WIDTH+" bases ahead. Resetting shift to "+request);
-                attempts++;
-                if ( attempts == 50 ) {
-                    System.out.println("WARNING: Indel at every position in the interval "+refName+":"+request+"-"+initial_request+
-                            ". Can not find a break to shift context window to; no calls will be attempted in the current window.");
-                    return -1;
-                }
-            }
-        }
-        if ( DEBUG ) System.out.println("DEBUG>> Found acceptable target position "+request);
-        return request;
-    }
-
-    /** Output somatic indel calls up to the specified position and shift the coverage array(s): after this method is executed
-     * first elements of the coverage arrays map onto 'position', or a few bases prior to the specified position
-     * if there is an indel in close proximity to 'position' so that we may get more coverage around it later.
-     *
-     * @param position
-     */
-    private void emit_somatic(long position, boolean force) {
-
-        long adjustedPosition = adjustPosition(position);
-        if ( adjustedPosition == -1 ) {
-            // failed to find appropriate shift position, the data are probably to messy anyway so we drop them altogether
-            normal_context.shift((int)(position-normal_context.getStart()));
-            tumor_context.shift((int)(position-tumor_context.getStart()));
-            return;
-        }
-        long move_to = adjustedPosition;
-
-        if ( DEBUG ) System.out.println("DEBUG>> Emitting in somatic mode up to "+position+" force shift="+force+" current window="+tumor_context.getStart()+"-"+tumor_context.getStop());
-
-        for ( int pos = tumor_context.getStart() ; pos < Math.min(adjustedPosition,tumor_context.getStop()+1) ; pos++ ) {
-
-            boolean genotype = false;
-             // first let's see if we need to genotype current position:
-
-             final long p = pos - 1; // our internally used positions (pos) are +1 compared to external format spec (e.g. vcf)
-
-             if ( pos <= lastGenotypedPosition ) continue;
-
-             while ( currentGenotypeInterval != null ) {
-
-                 // if we did not even reach next interval yet, no genotyping at current position:
-                 if ( location.getContigIndex() < currentGenotypeInterval.getContigIndex() ||
-                      location.getContigIndex() == currentGenotypeInterval.getContigIndex() &&
-                              p < currentGenotypeInterval.getStart() ) break;
-                 if ( pastInterval(p, currentGenotypeInterval) ) {
-                     // we are past current genotyping interval, so we are done with it; let's load next interval:
-                     currentGenotypeInterval = genotypeIntervalIterator.hasNext() ? genotypeIntervalIterator.next() : null;
-                     continue; // re-enter the loop to check against the interval we just loaded
-                 }
-
-                 // we reach tjis point only if p is inside current genotyping interval; set the flag and bail out:
-                 genotype = true;
-                 break;
-             }
-//            if ( DEBUG) System.out.println("DEBUG>> pos="+pos +"; genotyping interval="+currentGenotypeInterval+"; genotype="+genotype);
-
-            if ( tumor_context.indelsAt(pos).size() == 0 && ! genotype ) continue; // no indels in tumor
-
-            if ( DEBUG && genotype ) System.out.println("DEBUG>> Genotyping requested at "+pos);
-
-            IndelPrecall tumorCall = new IndelPrecall(tumor_context,pos,NQS_WIDTH);
-            IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
-
-            if ( tumorCall.getCoverage() < minCoverage && ! genotype ) {
-                if ( DEBUG ) {
-                    System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)");
-                }
-                continue; // low coverage
-            }
-            if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) {
-                if ( DEBUG ) {
-                    System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
-                }
-                continue; // low coverage
-            }
-
-            if ( DEBUG ) {
-                System.out.print("DEBUG>> "+(tumorCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in tumor, ");
-                System.out.print("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in normal at "+pos);
-            }
-
-            long left = Math.max( pos-NQS_WIDTH, tumor_context.getStart() );
-            long right = pos+ ( tumorCall.getVariant() == null ? 0 : tumorCall.getVariant().lengthOnRef() )+NQS_WIDTH-1;
-
-            if ( right >= adjustedPosition && ! force) {
-                // we are not asked to force-shift, and there is more coverage around the current indel that we still need to collect
-
-                // we are not asked to force-shift, and there's still additional coverage to the right of current indel, so its too early to emit it;
-                // instead we shift only up to current indel pos - MISMATCH_WIDTH, so that we could keep collecting that coverage
-                move_to = adjustPosition(left);
-                if ( move_to == -1 ) {
-                    // failed to find appropriate shift position, the data are probably to messy anyway so we drop them altogether
-                    normal_context.shift((int)(adjustedPosition-normal_context.getStart()));
-                    tumor_context.shift((int)(adjustedPosition-tumor_context.getStart()));
-                    return;
-                }
-                if ( DEBUG ) System.out.println("DEBUG>> waiting for coverage; actual shift performed to "+ move_to);
-                break;
-            }
-
-            if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right
-
-//            location = getToolkit().getGenomeLocParser().setStart(location,pos);
-//            location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
-
-            location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(),pos); // retrieve annotation data
-
-            boolean haveCall = tumorCall.isCall(); // cache the value
-
-            if ( haveCall || genotype ) {
-                if ( haveCall ) tumorCallsMade++;
-
-                printVCFLine(vcf_writer,normalCall,tumorCall);
-
-                if ( bedWriter != null ) tumorCall.printBedLine(bedWriter);
-
-                if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall );
-                lastGenotypedPosition = pos;
-            }
-            tumor_context.indelsAt(pos).clear();
-            normal_context.indelsAt(pos).clear();
-                // we dealt with this indel; don't want to see it again
-                // (we might otherwise in the case when 1) there is another indel that follows
-                // within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
-
-//			for ( IndelVariant var : variants ) {
-//				System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
-//			}
-        }
-
-        if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
-        tumor_context.shift((int)(move_to - tumor_context.getStart() ) );
-        normal_context.shift((int)(move_to - normal_context.getStart() ) );
-    }
-
-    private String makeFullRecord(IndelPrecall normalCall, IndelPrecall tumorCall) {
-        StringBuilder fullRecord = new StringBuilder();
-        if ( tumorCall.getVariant() != null || normalCall.getVariant() == null) {
-            fullRecord.append(tumorCall.makeEventString());
-        } else {
-            fullRecord.append(normalCall.makeEventString());            
-        }
-        fullRecord.append('\t');
-        fullRecord.append(normalCall.makeStatsString("N_"));
-        fullRecord.append('\t');
-        fullRecord.append(tumorCall.makeStatsString("T_"));
-        fullRecord.append('\t');
-        return fullRecord.toString();
-    }
-
-    private String makeFullRecord(IndelPrecall normalCall) {
-        StringBuilder fullRecord = new StringBuilder();
-        fullRecord.append(normalCall.makeEventString());
-        fullRecord.append('\t');
-        fullRecord.append(normalCall.makeStatsString(""));
-        fullRecord.append('\t');
-        return fullRecord.toString();
-    }
-
-    private String getAnnotationString(RODRecordList ann) {
-        if ( ann == null ) return annGenomic;
-        else {
-            StringBuilder b = new StringBuilder();
-
-            if ( RefSeqFeature.isExon(ann) ) {
-                if ( RefSeqFeature.isCodingExon(ann) ) b.append(annCoding); // both exon and coding = coding exon sequence
-                else b.append(annUTR); // exon but not coding = UTR
-            } else {
-                if ( RefSeqFeature.isCoding(ann) ) b.append(annIntron); // not in exon, but within the coding region = intron
-                else b.append(annUnknown); // we have no idea what this is. this may actually happen when we have a fully non-coding exon...
-            }
-            b.append('\t');
-            b.append(((Transcript)ann.get(0).getUnderlyingObject()).getGeneName()); // there is at least one transcript in the list, guaranteed
-//			while ( it.hasNext() ) { //
-//				t.getGeneName()
-//			}
-            return b.toString();
-        }
-
-    }
-
-    public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall) {
-        RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
-        String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
-
-        StringBuilder fullRecord = new StringBuilder();
-        fullRecord.append(makeFullRecord(normalCall));
-        fullRecord.append(annotationString);
-        if ( ! normalCall.isCall() && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
-        try {
-            verboseWriter.write(fullRecord.toString());
-            verboseWriter.write('\n');
-        } catch (IOException e) {
-            throw new UserException.CouldNotCreateOutputFile(verboseOutput, "Write failed", e);
-        }
-
-    }
-
-
-    public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall) {
-        RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
-        String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
-
-        StringBuilder fullRecord = new StringBuilder();
-        fullRecord.append(makeFullRecord(normalCall,tumorCall));
-
-        if ( normalCall.getVariant() == null && tumorCall.getVariant() == null ) {
-            // did not observe anything
-            if ( normalCall.getCoverage() >= minNormalCoverage && tumorCall.getCoverage() >= minCoverage ) fullRecord.append("REFERENCE");
-            else {
-                if ( tumorCall.getCoverage() >= minCoverage ) fullRecord.append("REFERENCE"); // no coverage in normal but nothing in tumor
-                else {
-                    // no coverage in tumor; if we have no coverage in normal, it can be anything; if we do have coverage in normal,
-                    // this still could be a somatic event. so either way it is 'unknown'
-                    fullRecord.append("UNKNOWN");
-                }
-            }
-
-        }
-
-        if ( normalCall.getVariant() == null && tumorCall.getVariant() != null ) {
-            // looks like somatic call
-            if ( normalCall.getCoverage() >= minNormalCoverage ) fullRecord.append("SOMATIC"); // we confirm there is nothing in normal
-            else {
-                // low coverage in normal
-                fullRecord.append("EVENT_T"); // no coverage in normal, no idea whether it is germline or somatic
-            }
-        }
-
-        if ( normalCall.getVariant() != null && tumorCall.getVariant() == null ) {
-            // it's likely germline (with missing observation in tumor - maybe loh?
-            if ( tumorCall.getCoverage() >= minCoverage ) fullRecord.append("GERMLINE_LOH"); // we confirm there is nothing in tumor
-            else {
-                // low coverage in tumor, maybe we missed the event
-                fullRecord.append("GERMLINE"); // no coverage in tumor but we already saw it in normal...
-            }
-        }
-
-        if ( normalCall.getVariant() != null && tumorCall.getVariant() != null ) {
-            // events in both T/N, got to be germline!
-            fullRecord.append("GERMLINE"); 
-        }
-
-
-        fullRecord.append('\t');
-        fullRecord.append(annotationString);
-
-        if ( ! tumorCall.isCall() && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
-
-        try {
-            verboseWriter.write(fullRecord.toString());
-            verboseWriter.write('\n');
-        } catch (IOException e) {
-            throw new UserException.CouldNotCreateOutputFile(verboseOutput, "Write failed", e);
-        }
-    }
-
-    public void printVCFLine(VCFWriter vcf, IndelPrecall call) {
-
-        long start = call.getPosition()-1;
-        // If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed.
-        // The suggestion is instead of putting the base before the indel, to put the base after the indel.
-        // For now, just don't print out that site.
-        if ( start == 0 )
-            return;
-
-        long stop = start;
-
-        List alleles = new ArrayList(2); // actual observed (distinct!) alleles at the site
-        List homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype
-
-
-        if ( call.getVariant() == null ) {
-            // we will need to cteate genotype with two (hom) ref alleles (below).
-            // we can not use 'alleles' list here, since that list is supposed to contain
-            // only *distinct* alleles observed at the site or VCFContext will frown upon us...
-            alleles.add( Allele.create(refBases[(int)start-1],true) );
-            homref_alleles = new ArrayList(2);
-            homref_alleles.add( alleles.get(0));
-            homref_alleles.add( alleles.get(0));
-        } else {
-            // we always create alt allele when we observe anything but the ref, even if it is not a call!
-            // (Genotype will tell us whether it is an actual call or not!)
-            int event_length = call.getVariant().lengthOnRef();
-            if ( event_length < 0 ) event_length = 0;
-            fillAlleleList(alleles,call);
-            stop += event_length;
-        }
-
-        Map genotypes = new HashMap();
-
-        for ( String sample : normalSamples ) {
-
-            Map attrs = call.makeStatsAttributes(null);
-
-            if ( call.isCall() ) // we made a call - put actual het genotype here:
-                genotypes.put(sample,new Genotype(sample,alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false));
-            else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all) 
-                genotypes.put(sample,new Genotype(sample, homref_alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false));
-
-        }
-        Set filters = null;
-        if ( call.getVariant() != null && ! call.isCall() ) {
-            filters = new HashSet();
-            filters.add("NoCall");
-        }
-        VariantContext vc = new VariantContext("IGv2_Indel_call", refName, start, stop, alleles, genotypes,
-            -1.0 /* log error */,  filters, null);
-        vcf.add(vc,refBases[(int)start-1]);
-    }
-
-    /** Fills l with appropriate alleles depending on whether call is insertion or deletion
-     * (l MUST have a variant or this method will crash). It is guaranteed that the *first* allele added
-     * to the list is ref, and the next one is alt.
-     * @param l
-     * @param call
-     */
-    private void fillAlleleList(List l, IndelPrecall call) {
-        int event_length = call.getVariant().lengthOnRef();
-        if ( event_length == 0 ) { // insertion
-
-            l.add( Allele.create(Allele.NULL_ALLELE_STRING,true) );
-            l.add( Allele.create(call.getVariant().getBases(), false ));
-
-        } else { //deletion:
-            l.add( Allele.create(call.getVariant().getBases(), true ));
-            l.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
-        }
-    }
-
-    public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall) {
-
-        long start = tCall.getPosition()-1;
-        long stop = start;
-
-        // If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed.
-        // The suggestion is instead of putting the base before the indel, to put the base after the indel.
-        // For now, just don't print out that site.
-        if ( start == 0 )
-            return;
-
-        Map attrsNormal = nCall.makeStatsAttributes(null);
-        Map attrsTumor = tCall.makeStatsAttributes(null);
-
-        Map attrs = new HashMap();
-
-        boolean isSomatic = false;
-        if ( nCall.getCoverage() >= minNormalCoverage && nCall.getVariant() == null && tCall.getVariant() != null ) {
-            isSomatic = true;
-            attrs.put(VCFConstants.SOMATIC_KEY,true);
-        }
-        List alleles = new ArrayList(2); // all alleles at the site
- //       List normal_alleles = null; // all alleles at the site
-        List homRefAlleles = null;
-
-//        if ( nCall.getVariant() == null || tCall.getVariant() == null ) {
-        homRefAlleles = new ArrayList(2) ; // we need this for somatic calls (since normal is ref-ref), and also for no-calls
-//        }
-        boolean homRefT = ( tCall.getVariant() == null );
-        boolean homRefN = ( nCall.getVariant() == null );
-        if ( tCall.getVariant() == null && nCall.getVariant() == null) {
-            // no indel at all  ; create base-representation ref/ref alleles for genotype construction
-            alleles.add( Allele.create(refBases[(int)start-1],true) );
-        } else {
-            // we got indel(s)
-            int event_length = 0;
-            if ( tCall.getVariant() != null ) {
-                // indel in tumor
-                event_length = tCall.getVariant().lengthOnRef();
-                fillAlleleList(alleles, tCall);
-            } else {
-                event_length = nCall.getVariant().lengthOnRef();
-                fillAlleleList(alleles, nCall);
-            }
-            if ( event_length > 0 ) stop += event_length;
-        }
-        homRefAlleles.add( alleles.get(0));
-        homRefAlleles.add( alleles.get(0));
-
-        Map genotypes = new HashMap();
-
-        for ( String sample : normalSamples ) {
-            genotypes.put(sample,new Genotype(sample, homRefN ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsNormal,false));
-        }
-
-        for ( String sample : tumorSamples ) {
-            genotypes.put(sample,new Genotype(sample, homRefT ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsTumor,false) );
-        }
-
-        Set filters = null;
-        if ( tCall.getVariant() != null && ! tCall.isCall() ) {
-            filters = new HashSet();
-            filters.add("NoCall");
-        }
-        if ( nCall.getCoverage() < minNormalCoverage ) {
-            if ( filters == null ) filters = new HashSet();
-            filters.add("NCov");
-        }
-        if ( tCall.getCoverage() < minCoverage ) {
-            if ( filters == null ) filters = new HashSet();
-            filters.add("TCov");
-        }
-
-        VariantContext vc = new VariantContext("IGv2_Indel_call", refName, start, stop, alleles, genotypes,
-            -1.0 /* log error */, filters, attrs);
-        vcf.add(vc,refBases[(int)start-1]);
-    }
-
-    @Override
-    public void onTraversalDone(Integer result) {
-        if ( DEBUG ) {
-            System.out.println("DEBUG>> Emitting last window at "+normal_context.getStart()+"-"+normal_context.getStop());
-        }
-        if ( call_somatic ) emit_somatic(1000000000, true);
-        else emit(1000000000,true); // emit everything we might have left
-
-        if ( metricsWriter != null ) {
-            metricsWriter.println(String.format("Normal calls made     %d", normalCallsMade));
-            metricsWriter.println(String.format("Tumor calls made      %d", tumorCallsMade));
-            metricsWriter.close();
-        }
-
-        try {
-            if ( bedWriter != null ) bedWriter.close();
-            if ( verboseWriter != null ) verboseWriter.close();
-        } catch (IOException e) {
-            System.out.println("Failed to close output BED file gracefully, data may be lost");
-            e.printStackTrace();
-        }
-        super.onTraversalDone(result);
-    }
-
-    @Override
-    public Integer reduce(Integer value, Integer sum) {
-        if ( value == -1 ) {
-            onTraversalDone(sum);
-            System.exit(1);
-        }
-        sum += value;
-        return sum;
-    }
-
-    @Override
-    public Integer reduceInit() {
-        return new Integer(0);
-    }
-
-
-        static class IndelVariant {
-            public static enum Type { I, D};
-            private String bases;
-            private Type type;
-            private ArrayList fromStartOffsets = null;
-            private ArrayList fromEndOffsets = null;
-
-            private Set reads = new HashSet(); // keep track of reads that have this indel
-            private Set samples = new HashSet();   // which samples had the indel described by this object
-
-            public IndelVariant(ExpandedSAMRecord read , Type type, String bases) {
-                this.type = type;
-                this.bases = bases.toUpperCase();
-                addObservation(read);
-                fromStartOffsets = new ArrayList();
-                fromEndOffsets = new ArrayList();
-            }
-
-            /** Adds another observation for the current indel. It is assumed that the read being registered
-             * does contain the observation, no checks are performed. Read's sample is added to the list of samples
-             * this indel was observed in as well.
-             * @param read
-             */
-            public void addObservation(ExpandedSAMRecord read) {
-                if ( reads.contains(read) ) {
-                    //TODO fix CleanedReadInjector and reinstate exception here: duplicate records may signal a problem with the bam
-                    // seeing the same read again can mean only one thing: the input bam file is corrupted and contains
-                    // duplicate records. We KNOW that this may happen for the time being due to bug in CleanedReadInjector
-                    // so this is a short-term patch: don't cry, but just ignore the duplicate record
-
-                    //throw new StingException("Attempting to add indel observation that was already registered");
-                    return;
-                }
-                reads.add(read);
-                String sample = null;
-                if ( read.getSAMRecord().getReadGroup() != null ) sample = read.getSAMRecord().getReadGroup().getSample();
-                if ( sample != null ) samples.add(sample);
-            }
-
-
-            /** Returns length of the event on the reference (number of deleted bases
-             * for deletions, -1 for insertions.
-             * @return
-             */
-            public int lengthOnRef() {
-                if ( type == Type.D ) return bases.length();
-                else return 0;
-            }
-
-
-            public void addSample(String sample) {
-                if ( sample != null )
-                samples.add(sample);
-            }
-
-            public void addReadPositions(int fromStart, int fromEnd) {
-                fromStartOffsets.add(fromStart);
-                fromEndOffsets.add(fromEnd);
-            }
-
-            public List getOffsetsFromStart() { return fromStartOffsets ; }
-            public List getOffsetsFromEnd() { return fromEndOffsets; }
-
-            public String getSamples() {
-                StringBuffer sb = new StringBuffer();
-                Iterator i = samples.iterator();
-                while ( i.hasNext() ) {
-                    sb.append(i.next());
-                    if ( i.hasNext() )
-                        sb.append(",");
-                }
-                return sb.toString();
-            }
-
-            public Set getReadSet() { return reads; }
-
-            public int getCount() { return reads.size(); }
-
-            public String getBases() { return bases; }
-
-            public Type getType() { return type; }
-
-            @Override
-            public boolean equals(Object o) {
-                if ( ! ( o instanceof IndelVariant ) ) return false;
-                IndelVariant that = (IndelVariant)o;
-                return ( this.type == that.type && this.bases.equals(that.bases) );
-            }
-
-            public boolean equals(Type type, String bases) {
-                return ( this.type == type && this.bases.equals(bases.toUpperCase()) );
-            }
-        }
-
-    /**
-     * Utility class that encapsulates the logic related to collecting all the stats and counts required to
-     * make (or discard) a call, as well as the calling heuristics that uses those data.
-      */
-    class IndelPrecall {
-//        private boolean DEBUG = false;
-        private int NQS_MISMATCH_CUTOFF = 1000000;
-        private double AV_MISMATCHES_PER_READ = 1.5;
-
-        private int nqs = 0;
-        private IndelVariant consensus_indel = null; // indel we are going to call
-        private long pos = -1 ; // position on the ref
-        private int total_coverage = 0; // total number of reads overlapping with the event
-        private int consensus_indel_count = 0; // number of reads, in which consensus indel was observed
-        private int all_indel_count = 0 ; // number of reads, in which any indel was observed at current position
-
-        private int total_mismatches_in_nqs_window = 0; // total number of mismatches in the nqs window around the indel
-        private int total_bases_in_nqs_window = 0; // total number of bases in the nqs window (some reads may not fully span the window so it's not coverage*nqs_size)
-        private int total_base_qual_in_nqs_window = 0; // sum of qualitites of all the bases in the nqs window
-        private int total_mismatching_base_qual_in_nqs_window = 0; // sum of qualitites of all mismatching bases in the nqs window
-
-        private int indel_read_mismatches_in_nqs_window = 0;   // mismatches inside the nqs window in indel-containing reads only
-        private int indel_read_bases_in_nqs_window = 0;  // number of bases in the nqs window from indel-containing reads only
-        private int indel_read_base_qual_in_nqs_window = 0; // sum of qualitites of bases in nqs window from indel-containing reads only
-        private int indel_read_mismatching_base_qual_in_nqs_window = 0; // sum of qualitites of mismatching bases in the nqs window from indel-containing reads only
-
-
-        private int consensus_indel_read_mismatches_in_nqs_window = 0; // mismatches within the nqs window from consensus indel reads only
-        private int consensus_indel_read_bases_in_nqs_window = 0;  // number of bases in the nqs window from consensus indel-containing reads only
-        private int consensus_indel_read_base_qual_in_nqs_window = 0; // sum of qualitites of bases in nqs window from consensus indel-containing reads only
-        private int consensus_indel_read_mismatching_base_qual_in_nqs_window = 0; // sum of qualitites of mismatching bases in the nqs window from consensus indel-containing reads only
-
-
-        private double consensus_indel_read_total_mm = 0.0; // sum of all mismatches in reads that contain consensus indel
-        private double all_indel_read_total_mm = 0.0; // sum of all mismatches in reads that contain any indel at given position
-        private double all_read_total_mm = 0.0; // sum of all mismatches in all reads
-
-        private double consensus_indel_read_total_mapq = 0.0; // sum of mapping qualitites of all reads with consensus indel
-        private double all_indel_read_total_mapq = 0.0 ; // sum of mapping qualitites of all reads with (any) indel at current position
-        private double all_read_total_mapq = 0.0; // sum of all mapping qualities of all reads
-
-        private PrimitivePair.Int consensus_indel_read_orientation_cnt = new PrimitivePair.Int();
-        private PrimitivePair.Int all_indel_read_orientation_cnt = new PrimitivePair.Int();
-        private PrimitivePair.Int all_read_orientation_cnt = new PrimitivePair.Int();
-
-        private int from_start_median = 0;
-        private int from_start_mad = 0;
-        private int from_end_median = 0;
-        private int from_end_mad = 0;
-
-        /** Makes an empty call (no-call) with all stats set to 0
-         *
-         * @param position
-         */
-        public IndelPrecall(long position) {
-            this.pos = position;
-        }
-
-        public IndelPrecall(WindowContext context, long position, int nqs_width) {
-            this.pos = position;
-            this.nqs = nqs_width;
-            total_coverage = context.coverageAt(pos,true);
-            List variants = context.indelsAt(pos);
-            findConsensus(variants);
-
-            // pos is the first base after the event: first deleted base or first base after insertion.
-            // hence, [pos-nqs, pos+nqs-1] (inclusive) is the window with nqs bases on each side of a no-event or an insertion
-            // and [pos-nqs, pos+Ndeleted+nqs-1] is the window with nqs bases on each side of a deletion.
-            // we initialize the nqs window for no-event/insertion case
-            long left = Math.max( pos-nqs, context.getStart() );
-            long right = Math.min(pos+nqs-1, context.getStop());
-//if ( pos == 3534096 ) System.out.println("pos="+pos +" total reads: "+context.getReads().size());
-            Iterator read_iter = context.getReads().iterator();
-
-
-            while ( read_iter.hasNext() ) {
-                ExpandedSAMRecord rec = read_iter.next();
-                SAMRecord read = rec.getSAMRecord();
-                byte[] flags = rec.getExpandedMMFlags();
-                byte[] quals = rec.getExpandedQuals();
-                int mm = rec.getMMCount();
-
-
-                if( read.getAlignmentStart() > pos || read.getAlignmentEnd() < pos ) continue;
-
-                long local_right = right; // end of nqs window for this particular read. May need to be advanced further right
-                                          // if read has a deletion. The gap in the middle of nqs window will be skipped
-                                          // automatically since flags/quals are set to -1 there
-
-                boolean read_has_a_variant = false;
-                boolean read_has_consensus = ( consensus_indel!= null && consensus_indel.getReadSet().contains(rec) );
-                for ( IndelVariant v : variants ) {
-                    if ( v.getReadSet().contains(rec) ) {
-                        read_has_a_variant = true;
-                        local_right += v.lengthOnRef();
-                        break;
-                    }
-                }
-
-                if ( read_has_consensus ) {
-                    consensus_indel_read_total_mm += mm;
-                    consensus_indel_read_total_mapq += read.getMappingQuality();
-                    if ( read.getReadNegativeStrandFlag() ) consensus_indel_read_orientation_cnt.second++;
-                    else consensus_indel_read_orientation_cnt.first++;
-                }
-                if ( read_has_a_variant ) {
-                    all_indel_read_total_mm += mm;
-                    all_indel_read_total_mapq += read.getMappingQuality();
-                    if ( read.getReadNegativeStrandFlag() ) all_indel_read_orientation_cnt.second++;
-                    else all_indel_read_orientation_cnt.first++;
-                }
-
-                all_read_total_mm+= mm;
-                all_read_total_mapq += read.getMappingQuality();
-                if ( read.getReadNegativeStrandFlag() ) all_read_orientation_cnt.second++;
-                else all_read_orientation_cnt.first++;
-
-                for ( int pos_in_flags = Math.max((int)(left - read.getAlignmentStart()),0);
-                      pos_in_flags <= Math.min((int)local_right-read.getAlignmentStart(),flags.length - 1);
-                       pos_in_flags++) {
-
-                        if ( flags[pos_in_flags] == -1 ) continue; // gap (deletion), skip it; we count only bases aligned to the ref
-                        total_bases_in_nqs_window++;
-                        if ( read_has_consensus ) consensus_indel_read_bases_in_nqs_window++;
-                        if ( read_has_a_variant ) indel_read_bases_in_nqs_window++;
-
-                        if ( quals[pos_in_flags] != -1 ) {
-
-                            total_base_qual_in_nqs_window += quals[pos_in_flags];
-                            if ( read_has_a_variant ) indel_read_base_qual_in_nqs_window += quals[pos_in_flags];
-                            if ( read_has_consensus ) consensus_indel_read_base_qual_in_nqs_window += quals[pos_in_flags];
-                        }
-
-                        if ( flags[pos_in_flags] == 1 ) { // it's a mismatch
-                            total_mismatches_in_nqs_window++;
-                            total_mismatching_base_qual_in_nqs_window += quals[pos_in_flags];
-
-                            if ( read_has_consensus ) {
-                                consensus_indel_read_mismatches_in_nqs_window++;
-                                consensus_indel_read_mismatching_base_qual_in_nqs_window += quals[pos_in_flags];
-                            }
-                            
-                            if ( read_has_a_variant ) {
-                                indel_read_mismatches_in_nqs_window++;
-                                indel_read_mismatching_base_qual_in_nqs_window += quals[pos_in_flags];
-                            }
-                        }
-                }
-//         if ( pos == 3534096 ) {
-//             System.out.println(read.getReadName());
-//             System.out.println(" cons nqs bases="+consensus_indel_read_bases_in_nqs_window);
-//             System.out.println(" qual sum="+consensus_indel_read_base_qual_in_nqs_window);
-//         }
-
-            }
-
-            // compute median/mad for offsets from the read starts/ends
-            if ( consensus_indel != null ) {
-                from_start_median = median(consensus_indel.getOffsetsFromStart()) ;
-                from_start_mad = mad(consensus_indel.getOffsetsFromStart(),from_start_median);
-                from_end_median = median(consensus_indel.getOffsetsFromEnd()) ;
-                from_end_mad = mad(consensus_indel.getOffsetsFromEnd(),from_end_median);   
-            }
-        }
-
-        /** As a side effect will sort l!
-         *
-         * @param l
-         * @return
-         */
-        private int median(List l) {
-            Collections.sort(l);
-            int k = l.size()/2;
-            return ( l.size() % 2 == 0 ?
-                  (l.get(k-1).intValue()+l.get(k).intValue())/2 :
-                   l.get(k).intValue());
-        }
-
-        private int median(int[] l) {
-            Arrays.sort(l);
-            int k = l.length/2;
-            return ( l.length % 2 == 0 ?
-                  (l[k-1]+l[k])/2 :
-                   l[k]);
-        }
-
-        private int mad(List l, int med) {
-            int [] diff = new int[l.size()];
-            for ( int i = 0; i < l.size(); i++ ) {
-                   diff[i] = Math.abs(l.get(i).intValue() - med);
-            }
-            return median(diff);
-        }
-
-        public long getPosition() { return pos; }
-
-        public boolean hasObservation() { return consensus_indel != null; }
-
-        public int getCoverage() { return total_coverage; }
-
-        public double getTotalMismatches() { return all_read_total_mm; }
-        public double getConsensusMismatches() { return consensus_indel_read_total_mm; }
-        public double getAllVariantMismatches() { return all_indel_read_total_mm; }
-
-        /** Returns average number of mismatches per consensus indel-containing read */
-        public double getAvConsensusMismatches() {
-            return ( consensus_indel_count != 0 ? consensus_indel_read_total_mm/consensus_indel_count : 0.0 );
-        }
-
-        /** Returns average number of mismatches per read across all reads matching the ref (not containing any indel variants) */
-        public double getAvRefMismatches() {
-            int coverage_ref = total_coverage-all_indel_count;
-            return ( coverage_ref != 0 ? (all_read_total_mm - all_indel_read_total_mm )/coverage_ref : 0.0 );
-        }
-
-        public PrimitivePair.Int getConsensusStrandCounts() {
-            return consensus_indel_read_orientation_cnt;
-        }
-
-        public PrimitivePair.Int getRefStrandCounts() {
-            return new PrimitivePair.Int(all_read_orientation_cnt.first-all_indel_read_orientation_cnt.first,
-                                         all_read_orientation_cnt.second - all_indel_read_orientation_cnt.second);
-        }
-
-        /** Returns a sum of mapping qualities of all reads spanning the event. */
-        public double getTotalMapq() { return all_read_total_mapq; }
-
-        /** Returns a sum of mapping qualities of all reads, in which the consensus variant is observed. */
-        public double getConsensusMapq() { return consensus_indel_read_total_mapq; }
-
-        /** Returns a sum of mapping qualities of all reads, in which any variant is observed at the current event site. */
-        public double getAllVariantMapq() { return all_indel_read_total_mapq; }
-
-        /** Returns average mapping quality per consensus indel-containing read. */
-        public double getAvConsensusMapq() {
-            return ( consensus_indel_count != 0 ? consensus_indel_read_total_mapq/consensus_indel_count : 0.0 );
-        }
-
-        /** Returns average number of mismatches per read across all reads matching the ref (not containing any indel variants). */
-        public double getAvRefMapq() {
-            int coverage_ref = total_coverage-all_indel_count;
-            return ( coverage_ref != 0 ? (all_read_total_mapq - all_indel_read_total_mapq )/coverage_ref : 0.0 );
-        }
-
-        /** Returns fraction of bases in NQS window around the indel that are mismatches, across all reads,
-         * in which consensus indel is observed. NOTE: NQS window for indel containing reads is defined around
-         * the indel itself (e.g. for a 10-base deletion spanning [X,X+9], the 5-NQS window is {[X-5,X-1],[X+10,X+15]}
-         * */
-        public double getNQSConsensusMMRate() {
-            if ( consensus_indel_read_bases_in_nqs_window == 0 ) return 0;
-            return ((double)consensus_indel_read_mismatches_in_nqs_window)/consensus_indel_read_bases_in_nqs_window;
-        }
-
-        /** Returns fraction of bases in NQS window around the indel start position that are mismatches, across all reads
-         * that align to the ref (i.e. contain no indel observation at the current position). NOTE: NQS window for ref
-         * reads is defined around the event start position, NOT around the actual consensus indel.
-         * */
-        public double getNQSRefMMRate() {
-            int num_ref_bases = total_bases_in_nqs_window - indel_read_bases_in_nqs_window;
-            if ( num_ref_bases == 0 ) return 0;
-            return ((double)(total_mismatches_in_nqs_window - indel_read_mismatches_in_nqs_window))/num_ref_bases;
-        }
-
-        /** Returns average base quality in NQS window around the indel, across all reads,
-         * in which consensus indel is observed. NOTE: NQS window for indel containing reads is defined around
-         * the indel itself (e.g. for a 10-base deletion spanning [X,X+9], the 5-NQS window is {[X-5,X-1],[X+10,X+15]}
-         * */
-        public double getNQSConsensusAvQual() {
-            if ( consensus_indel_read_bases_in_nqs_window == 0 ) return 0;
-            return ((double)consensus_indel_read_base_qual_in_nqs_window)/consensus_indel_read_bases_in_nqs_window;
-        }
-
-        /** Returns fraction of bases in NQS window around the indel start position that are mismatches, across all reads
-         * that align to the ref (i.e. contain no indel observation at the current position). NOTE: NQS window for ref
-         * reads is defined around the event start position, NOT around the actual consensus indel.
-         * */
-        public double getNQSRefAvQual() {
-            int num_ref_bases = total_bases_in_nqs_window - indel_read_bases_in_nqs_window;
-            if ( num_ref_bases == 0 ) return 0;
-            return ((double)(total_base_qual_in_nqs_window - indel_read_base_qual_in_nqs_window))/num_ref_bases;
-        }
-
-        public int getTotalNQSMismatches() { return total_mismatches_in_nqs_window; }
-
-        public int getAllVariantCount() { return all_indel_count; }
-        public int getConsensusVariantCount() { return consensus_indel_count; }
-
-//        public boolean failsNQSMismatch() {
-//            //TODO wrong fraction: mismatches are counted only in indel-containing reads, but total_coverage is used!
-//            return ( indel_read_mismatches_in_nqs_window > NQS_MISMATCH_CUTOFF ) ||
-//                    ( indel_read_mismatches_in_nqs_window > total_coverage * AV_MISMATCHES_PER_READ );
-//        }
-
-        public IndelVariant getVariant() { return consensus_indel; }
-
-        public boolean isCall() {
-            boolean ret =  ( consensus_indel_count >= minIndelCount &&
-                    (double)consensus_indel_count > minFraction * total_coverage &&
-                    (double) consensus_indel_count > minConsensusFraction*all_indel_count && total_coverage >= minCoverage);
-            if ( DEBUG && ! ret ) System.out.println("DEBUG>>  NOT a call: count="+consensus_indel_count+
-                        " total_count="+all_indel_count+" cov="+total_coverage+
-                " minConsensusF="+((double)consensus_indel_count)/all_indel_count+
-                    " minF="+((double)consensus_indel_count)/total_coverage);
-            return ret;
-
-        }
-
-        /** Utility method: finds the indel variant with the largest count (ie consensus) among all the observed
-         * variants, and sets the counts of consensus observations and all observations of any indels (including non-consensus)
-         * @param variants
-         * @return
-         */
-        private void findConsensus(List variants) {
-            for ( IndelVariant var : variants ) {
-                if ( DEBUG ) System.out.println("DEBUG>> Variant "+var.getBases()+" (cnt="+var.getCount()+")");
-                int cnt = var.getCount();
-                all_indel_count +=cnt;
-                if ( cnt > consensus_indel_count ) {
-                    consensus_indel = var;
-                    consensus_indel_count = cnt;
-                }
-            }
-            if ( DEBUG && consensus_indel != null ) System.out.println("DEBUG>> Returning: "+consensus_indel.getBases()+
-                    " (cnt="+consensus_indel.getCount()+") with total count of "+all_indel_count);
-        }
-
-
-
-        public void printBedLine(Writer bed) {
-            int event_length;
-            if ( consensus_indel == null ) event_length = 0;
-            else {
-                event_length = consensus_indel.lengthOnRef();
-                if ( event_length < 0 ) event_length = 0;
-            }
-
-            StringBuffer message = new StringBuffer();
-            message.append(refName+"\t"+(pos-1)+"\t");
-            message.append((pos-1+event_length)+"\t");
-            if ( consensus_indel != null ) {
-                message.append((event_length>0? "-":"+")+consensus_indel.getBases());
-            } else {
-                message.append('.');
-            }
-            message.append(":"+all_indel_count+"/"+total_coverage);
-            try {
-                bed.write(message.toString()+"\n");
-            } catch (IOException e) {
-               throw new UserException.CouldNotCreateOutputFile(bedOutput, "Error encountered while writing into output BED file", e);
-            }
-        }
-
-        public String makeEventString() {
-            int event_length;
-            if ( consensus_indel == null ) event_length = 0;
-            else {
-                event_length = consensus_indel.lengthOnRef();
-                if ( event_length < 0 ) event_length = 0;
-            }
-            StringBuffer message = new StringBuffer();
-            message.append(refName);
-            message.append('\t');
-            message.append(pos-1);
-            message.append('\t');
-            message.append(pos-1+event_length);
-            message.append('\t');
-            if ( consensus_indel != null ) {
-                message.append((event_length>0?'-':'+'));
-                message.append(consensus_indel.getBases());
-            } else {
-                message.append('.');
-            }
-            return message.toString();
-        }
-
-        public String makeStatsString(String prefix) {
-             StringBuilder message = new StringBuilder();
-             message.append(prefix+"OBS_COUNTS[C/A/T]:"+getConsensusVariantCount()+"/"+getAllVariantCount()+"/"+getCoverage());
-             message.append('\t');
-             message.append(prefix+"AV_MM[C/R]:"+String.format("%.2f/%.2f",getAvConsensusMismatches(),
-                                 getAvRefMismatches()));
-             message.append('\t');
-             message.append(prefix+"AV_MAPQ[C/R]:"+String.format("%.2f/%.2f",getAvConsensusMapq(),
-                                getAvRefMapq()));
-             message.append('\t');
-             message.append(prefix+"NQS_MM_RATE[C/R]:"+String.format("%.2f/%.2f",getNQSConsensusMMRate(),getNQSRefMMRate()));
-             message.append('\t');
-             message.append(prefix+"NQS_AV_QUAL[C/R]:"+String.format("%.2f/%.2f",getNQSConsensusAvQual(),getNQSRefAvQual()));
-
-             PrimitivePair.Int strand_cons = getConsensusStrandCounts();
-             PrimitivePair.Int strand_ref = getRefStrandCounts();
-             message.append('\t');
-             message.append(prefix+"STRAND_COUNTS[C/C/R/R]:"+strand_cons.first+"/"+strand_cons.second+"/"+strand_ref.first+"/"+strand_ref.second);
-
-             message.append('\t');
-             message.append(prefix+"OFFSET_RSTART:"+from_start_median+"/"+from_start_mad);
-             message.append('\t');
-             message.append(prefix+"OFFSET_REND:"+from_end_median+"/"+from_end_mad);
-
-             return message.toString();
-
-         }
-
-        /**
-         * Places alignment statistics into attribute map and returns the map. If attr parameter is null,
-         * a new map is allocated, filled and returned. If attr is not null, new attributes are added to that
-         * preexisting map, and the same instance of the (updated) map is returned.
-         *
-         * @param attr
-         * @return
-         */
-        public Map makeStatsAttributes(Map attr) {
-             if ( attr == null ) attr = new HashMap();
-
-             VCFIndelAttributes.recordDepth(getConsensusVariantCount(),getAllVariantCount(),getCoverage(),attr);
-
-             VCFIndelAttributes.recordAvMM(getAvConsensusMismatches(),getAvRefMismatches(),attr);
-
-             VCFIndelAttributes.recordAvMapQ(getAvConsensusMapq(),getAvRefMapq(),attr);
-
-             VCFIndelAttributes.recordNQSMMRate(getNQSConsensusMMRate(),getNQSRefMMRate(),attr);
-
-             VCFIndelAttributes.recordNQSAvQ(getNQSConsensusAvQual(),getNQSRefAvQual(),attr);
-
-             VCFIndelAttributes.recordOffsetFromStart(from_start_median,from_start_mad,attr);
-
-             VCFIndelAttributes.recordOffsetFromEnd(from_end_median,from_end_mad,attr);
-
-             PrimitivePair.Int strand_cons = getConsensusStrandCounts();
-             PrimitivePair.Int strand_ref = getRefStrandCounts();
-
-             VCFIndelAttributes.recordStrandCounts(strand_cons.first,strand_cons.second,strand_ref.first,strand_ref.second,attr);
-             return attr;
-         }
-    }
-
-    interface IndelListener {
-        public void addObservation(int pos, IndelVariant.Type t, String bases, int fromStart, int fromEnd, ExpandedSAMRecord r);
-    }
-
-    class WindowContext implements IndelListener {
-            private Set reads;
-            private int start=0; // where the window starts on the ref, 1-based
-            private CircularArray< List< IndelVariant > > indels;
-
-            private List emptyIndelList = new ArrayList();
-
-
-            public WindowContext(int start, int length) {
-                this.start = start;
-                indels = new CircularArray< List >(length);
-//                reads = new LinkedList();
-                reads = new HashSet();
-            }
-
-            /** Returns 1-based reference start position of the interval this object keeps context for.
-             *
-             * @return
-             */
-            public int getStart() { return start; }
-
-            /** Returns 1-based reference stop position (inclusive) of the interval this object keeps context for.
-             *
-             * @return
-             */
-            public int getStop() { return start + indels.length() - 1; }
-
-            /** Resets reference start position to 0 and clears the context.
-             *
-             */
-            public void clear() {
-                start = 0;
-                reads.clear();
-                indels.clear();
-            }
-
-        /**
-         * Returns true if any indel observations are present in the specified interval
-         * [begin,end] (1-based, inclusive). Interval can be partially of fully outside of the
-         * current context window: positions outside of the window will be ignored.
-         * @param begin
-         * @param end
-         */
-            public boolean hasIndelsInInterval(long begin, long end) {
-                for ( long k = Math.max(start,begin); k < Math.min(getStop(),end); k++ ) {
-                    if ( indelsAt(k) != emptyIndelList ) return true;
-                }
-                return false;               
-            }
-
-            public Set getReads() { return reads; }
-
-            /** Returns the number of reads spanning over the specified reference position                                                                                                       
-             * (regardless of whether they have a base or indel at that specific location).
-             * The second argument controls whether to count with indels in mind (this is relevant for insertions only,
-             * deletions do not require any special treatment since they occupy non-zero length on the ref and since
-             * alignment can not start or end with a deletion). For insertions, note that, internally, we assign insertions
-             * to the reference position right after the actual event, and we count all events assigned to a given position.
-             * This count (reads with indels) should be contrasted to reads without indels, or more rigorously, reads
-             * that support the ref rather than the indel. Few special cases may occur here:
-             * 1) an alignment that ends (as per getAlignmentEnd()) right before the current position but has I as its
-             * last element: we have to count that read into the "coverage" at the current position for the purposes of indel
-             * assessment, as the indel in that read will be counted at the current position, so the total coverage
-             * should be consistent with that.
-             */
-             /* NOT IMPLEMENTED: 2) alsignments that start exactly at the current position do not count for the purpose of insertion
-             * assessment since they do not contribute any evidence to either Ref or Alt=insertion hypothesis, unless
-             * the alignment starts with I (so that we do have evidence for an indel assigned to the current position and
-             * read should be counted). For deletions, reads starting at the current position should always be counted (as they
-             * show no deletion=ref).
-             * @param refPos position on the reference; must be within the bounds of the window
-             */
-            public int coverageAt(final long refPos, boolean countForIndels) {
-                int cov = 0;
-                for ( ExpandedSAMRecord read : reads ) {
-                    if ( read.getSAMRecord().getAlignmentStart() > refPos || read.getSAMRecord().getAlignmentEnd() < refPos ) {
-                        if ( countForIndels && read.getSAMRecord().getAlignmentEnd() == refPos - 1) {
-                            Cigar c = read.getSAMRecord().getCigar();
-                            if ( c.getCigarElement(c.numCigarElements()-1).getOperator() == CigarOperator.I ) cov++;
-                        }
-                        continue;
-                    }
-                    cov++;
-                } 
-                return cov;
-            }
-
-
-            /** Shifts current window to the right along the reference contig by the specified number of bases.
-             * The context will be updated accordingly (indels and reads that go out of scope will be dropped).
-             * @param offset
-             */
-            public void shift(int offset) {
-                start += offset;
-
-                indels.shiftData(offset);
-                if ( indels.get(0) != null && indels.get(0).size() != 0 ) {
-                    IndelVariant indel =  indels.get(0).get(0);
-
-                    System.out.println("WARNING: Indel(s) at first position in the window ("+refName+":"+start+"): currently not supported: "+
-                    (indel.getType()==IndelVariant.Type.I?"+":"-")+indel.getBases()+"; read: "+indel.getReadSet().iterator().next().getSAMRecord().getReadName()+"; site ignored");
-                    indels.get(0).clear();
-//                    throw new StingException("Indel found at the first position ("+start+") after a shift was performed: currently not supported: "+
-//                    (indel.getType()==IndelVariant.Type.I?"+":"-")+indel.getBases()+"; reads: "+indel.getReadSet().iterator().next().getSAMRecord().getReadName());
-                }
-                
-                Iterator read_iter = reads.iterator();
-
-                while ( read_iter.hasNext() ) {
-                    ExpandedSAMRecord r = read_iter.next();
-                    if ( r.getSAMRecord().getAlignmentEnd() < start ) { // discard reads and associated data that went out of scope
-                        read_iter.remove();
-                    }
-                }
-            }
-
-            public void add(SAMRecord read, byte [] ref) {
-
-                if ( read.getAlignmentStart() < start ) return; // silently ignore reads starting before the window start
-
-                ExpandedSAMRecord er = new ExpandedSAMRecord(read,ref,read.getAlignmentStart()-start,this);
-                //TODO duplicate records may actually indicate a problem with input bam file; throw an exception when the bug in CleanedReadInjector is fixed
-                if ( reads.contains(er)) return; // ignore duplicate records
-                reads.add(er);
-            }
-
-            public void addObservation(int pos, IndelVariant.Type type, String bases, int fromStart, int fromEnd, ExpandedSAMRecord rec) {
-                List indelsAtSite;
-                try {
-                    indelsAtSite = indels.get(pos);
-                } catch (IndexOutOfBoundsException e) {
-                    SAMRecord r = rec.getSAMRecord();
-                    System.out.println("Failed to add indel observation, probably out of coverage window bounds (trailing indel?):\nRead "+
-                            r.getReadName()+": "+
-                        "read length="+r.getReadLength()+"; cigar="+r.getCigarString()+"; start="+
-                        r.getAlignmentStart()+"; end="+r.getAlignmentEnd()+"; window start="+getStart()+
-                        "; window end="+getStop());
-                    throw e;
-                }
-
-                if ( indelsAtSite == null ) {
-                    indelsAtSite = new ArrayList();
-                    indels.set(pos, indelsAtSite);
-                }
-
-                IndelVariant indel = null;
-                for ( IndelVariant v : indelsAtSite ) {
-                    if ( ! v.equals(type, bases) ) continue;
-
-                    indel = v;
-                    indel.addObservation(rec);
-                    break;
-                }
-                
-                if ( indel == null ) {  // not found:
-                    indel = new IndelVariant(rec, type, bases);
-                    indelsAtSite.add(indel);
-                }
-                indel.addReadPositions(fromStart,fromEnd);
-            }
-
-            public List indelsAt( final long refPos ) {
-                List l = indels.get((int)( refPos - start ));
-                if ( l == null ) return emptyIndelList;
-                else return l;
-            }
-
-
-        }
-
-
-    class ExpandedSAMRecord {
-        private SAMRecord read;
-        private byte[] mismatch_flags;
-        private byte[] expanded_quals;
-        private int mms;
-
-        public ExpandedSAMRecord(SAMRecord r, byte [] ref, long offset, IndelListener l) {
-
-            read = r;
-            final long rStart = read.getAlignmentStart();
-            final long rStop = read.getAlignmentEnd();
-            final byte[] readBases = read.getReadString().toUpperCase().getBytes();
-
-            ref = new String(ref).toUpperCase().getBytes();
-
-            mismatch_flags = new byte[(int)(rStop-rStart+1)];
-            expanded_quals = new byte[(int)(rStop-rStart+1)];
-
-            // now let's extract indels:
-
-            Cigar c = read.getCigar();
-            final int nCigarElems = c.numCigarElements();
-
-
-            int readLength = 0; // length of the aligned part of the read NOT counting clipped bases
-            for ( CigarElement cel : c.getCigarElements() ) {
-
-                switch(cel.getOperator()) {
-                case H:
-                case S:
-                case D:
-                case N:
-                case P:
-                    break; // do not count gaps or clipped bases
-                case I:
-                case M:
-                    readLength += cel.getLength();
-                    break; // advance along the gapless block in the alignment
-                default :
-                    throw new IllegalArgumentException("Unexpected operator in cigar string: "+cel.getOperator());
-                }
-            }
-
-            int fromStart = 0;
-            int posOnRead = 0;
-            int posOnRef = 0; // the chunk of reference ref[] that we have access to is aligned with the read:
-                                  // its start on the actual full reference contig is r.getAlignmentStart()
-            for ( int i = 0 ; i < nCigarElems ; i++ ) {
-
-                final CigarElement ce = c.getCigarElement(i);
-                IndelVariant.Type type = null;
-                String indel_bases = null;
-                int eventPosition = posOnRef;
-
-                switch(ce.getOperator()) {
-                case H: break; // hard clipped reads do not have clipped indel_bases in their sequence, so we just ignore the H element...
-                case I:
-                    type = IndelVariant.Type.I;
-                    indel_bases = read.getReadString().substring(posOnRead,posOnRead+ce.getLength());
-                    // will increment position on the read below, there's no 'break' statement yet...
-                case S:
-                    // here we also skip soft-clipped indel_bases on the read; according to SAM format specification,
-                    // alignment start position on the reference points to where the actually aligned
-                    // (not clipped) indel_bases go, so we do not need to increment reference position here
-                    posOnRead += ce.getLength();
-                    break;
-                case D:
-                    type = IndelVariant.Type.D;
-                    indel_bases = new String( ref, posOnRef, ce.getLength() );
-                    for( int k = 0 ; k < ce.getLength(); k++, posOnRef++ ) mismatch_flags[posOnRef] = expanded_quals[posOnRef] = -1;
-
-                    break;
-                case M:
-                    for ( int k = 0; k < ce.getLength(); k++, posOnRef++, posOnRead++ ) {
-                        if ( readBases[posOnRead] != ref[posOnRef] )  { // mismatch!
-                            mms++;
-                            mismatch_flags[posOnRef] = 1;
-                        }
-                        expanded_quals[posOnRef] = read.getBaseQualities()[posOnRead];
-                    }
-                    fromStart += ce.getLength();
-                    break; // advance along the gapless block in the alignment
-                default :
-                    throw new IllegalArgumentException("Unexpected operator in cigar string: "+ce.getOperator());
-                }
-
-                if ( type == null ) continue; // element was not an indel, go grab next element...
-
-                // we got an indel if we are here...
-                if ( i == 0 ) logger.debug("Indel at the start of the read "+read.getReadName());
-                if ( i == nCigarElems - 1) logger.debug("Indel at the end of the read "+read.getReadName());
-
-                // note that here we will be assigning indels to the first deleted base or to the first
-                // base after insertion, not to the last base before the event!
-                int fromEnd = readLength - fromStart;
-                if ( type == IndelVariant.Type.I ) fromEnd -= ce.getLength();
-
-                l.addObservation((int)(offset+eventPosition), type, indel_bases, fromStart, fromEnd, this);
-
-                if ( type == IndelVariant.Type.I ) fromStart += ce.getLength();
-
-            }
-        }
-
-        public SAMRecord getSAMRecord() { return read; }
-
-        public byte [] getExpandedMMFlags() { return mismatch_flags; }
-
-        public byte [] getExpandedQuals() { return expanded_quals; }
-
-        public int getMMCount() { return mms; }
-
-        public boolean equals(Object o) {
-            if ( this == o ) return true;
-            if ( read == null ) return false;
-            if ( o instanceof SAMRecord ) return read.equals(o);
-            if ( o instanceof ExpandedSAMRecord ) return read.equals(((ExpandedSAMRecord)o).read);
-            return false;
-        }
-
-
-    }
-
-}
-
-
-class VCFIndelAttributes {
-    public static String ALLELIC_DEPTH_KEY = "AD";
-    public static String DEPTH_TOTAL_KEY = VCFConstants.DEPTH_KEY;
-
-    public static String MAPQ_KEY = "MQS";
-
-    public static String MM_KEY = "MM";
-
-    public static String NQS_MMRATE_KEY = "NQSMM";
-
-    public static String NQS_AVQ_KEY = "NQSBQ";
-
-    public static String STRAND_COUNT_KEY = "SC";
-    public static String RSTART_OFFSET_KEY = "RStart";
-    public static String REND_OFFSET_KEY = "REnd";
-
-    public static Set getAttributeHeaderLines() {
-        Set lines = new HashSet();
-
-        lines.add(new VCFFormatHeaderLine(ALLELIC_DEPTH_KEY, 2, VCFHeaderLineType.Integer, "# of reads supporting consensus indel/reference at the site"));
-        lines.add(new VCFFormatHeaderLine(DEPTH_TOTAL_KEY, 1, VCFHeaderLineType.Integer, "Total coverage at the site"));
-
-        lines.add(new VCFFormatHeaderLine(MAPQ_KEY, 2, VCFHeaderLineType.Float, "Average mapping qualities of consensus indel-supporting reads/reference-supporting reads"));
-
-        lines.add(new VCFFormatHeaderLine(MM_KEY, 2, VCFHeaderLineType.Float, "Average # of mismatches per consensus indel-supporting read/per reference-supporting read"));
-
-        lines.add(new VCFFormatHeaderLine(NQS_MMRATE_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: fraction of mismatching bases in consensus indel-supporting reads/in reference-supporting reads"));
-
-        lines.add(new VCFFormatHeaderLine(NQS_AVQ_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: average quality of bases from consensus indel-supporting reads/from reference-supporting reads"));
-
-        lines.add(new VCFFormatHeaderLine(STRAND_COUNT_KEY, 4, VCFHeaderLineType.Integer, "Strandness: counts of forward-/reverse-aligned indel-supporting reads / forward-/reverse-aligned reference supporting reads"));
-
-        lines.add(new VCFFormatHeaderLine(RSTART_OFFSET_KEY, 2, VCFHeaderLineType.Integer, "Median/mad of indel offsets from the starts of the reads"));
-        lines.add(new VCFFormatHeaderLine(REND_OFFSET_KEY, 2, VCFHeaderLineType.Integer, "Median/mad of indel offsets from the ends of the reads"));
-
-        return lines;
-    }
-
-    public static Map recordStrandCounts(int cnt_cons_fwd, int cnt_cons_rev, int cnt_ref_fwd, int cnt_ref_rev, Map attrs) {
-        attrs.put(STRAND_COUNT_KEY, new Integer[] {cnt_cons_fwd, cnt_cons_rev, cnt_ref_fwd, cnt_ref_rev} );
-        return attrs;
-    }
-
-    public static Map recordDepth(int cnt_cons, int cnt_indel, int cnt_total, Map attrs) {
-        attrs.put(ALLELIC_DEPTH_KEY, new Integer[] {cnt_cons, cnt_indel} );
-        attrs.put(DEPTH_TOTAL_KEY, cnt_total);
-        return attrs;
-    }
-
-    public static Map recordAvMapQ(double cons, double ref, Map attrs) {
-        attrs.put(MAPQ_KEY, new Float[] {(float)cons, (float)ref} );
-        return attrs;
-    }
-
-    public static Map recordAvMM(double cons, double ref, Map attrs) {
-        attrs.put(MM_KEY, new Float[] {(float)cons, (float)ref} );
-        return attrs;
-    }
-
-    public static Map recordNQSMMRate(double cons, double ref, Map attrs) {
-        attrs.put(NQS_MMRATE_KEY, new Float[] {(float)cons, (float)ref} );
-        return attrs;
-    }
-
-    public static Map recordNQSAvQ(double cons, double ref, Map attrs) {
-        attrs.put(NQS_AVQ_KEY, new Float[] {(float)cons, (float)ref} );
-        return attrs;
-    }
-
-    public static Map recordOffsetFromStart(int median, int mad, Map attrs) {
-        attrs.put(RSTART_OFFSET_KEY, new Integer[] {median, mad} );
-        return attrs;
-    }
-
-    public static Map recordOffsetFromEnd(int median, int mad, Map attrs) {
-        attrs.put(REND_OFFSET_KEY, new Integer[] {median, mad} );
-        return attrs;
-    }
-}
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.indels;
+
+import net.sf.samtools.*;
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Hidden;
+import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.Tags;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
+import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
+import org.broadinstitute.sting.gatk.filters.Platform454Filter;
+import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
+import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
+import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
+import org.broadinstitute.sting.gatk.refdata.features.refseq.Transcript;
+import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
+import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
+import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
+import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
+import org.broadinstitute.sting.gatk.walkers.ReadFilters;
+import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.SampleUtils;
+import org.broadinstitute.sting.utils.codecs.vcf.*;
+import org.broadinstitute.sting.utils.collections.CircularArray;
+import org.broadinstitute.sting.utils.collections.PrimitivePair;
+import org.broadinstitute.sting.utils.exceptions.StingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator;
+import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
+import org.broadinstitute.sting.utils.interval.IntervalUtils;
+import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator;
+import org.broadinstitute.sting.utils.sam.AlignmentUtils;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.Genotype;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
+ * data. Two output formats supported are: BED format (minimal output, required), and extended output that includes read
+ * and mismtach statistics around the calls (tuned on with --verbose). The calls can be performed from a single/pooled sample,
+ * or from a matched pair of samples (with --somatic option). In the latter case, two input bam files must be specified,
+ * the order is important: indels are called from the second sample ("Tumor") and additionally annotated as germline
+ * if even a weak evidence for the same indel, not necessarily a confident call, exists in the first sample ("Normal"), or as somatic
+ * if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
+ * only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
+ */
+@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, PlatformUnitFilter.class})
+public class SomaticIndelDetectorWalker extends ReadWalker {
+//    @Output
+//    PrintStream out;
+    @Output(doc="File to which variants should be written",required=true)
+    protected VCFWriter vcf_writer = null;
+
+    @Argument(fullName="outputFile", shortName="O", doc="output file name (BED format). DEPRECATED> Use --bed", required=true)
+    @Deprecated
+    java.io.File output_file;
+
+    @Argument(fullName = "metrics_file", shortName = "metrics", doc = "File to print callability metrics output", required = false)
+    public PrintStream metricsWriter = null;
+
+//    @Argument(fullName="vcf_format", shortName="vcf", doc="generate output file in VCF format", required=false)
+//    boolean FORMAT_VCF = false;
+
+    @Hidden
+    @Argument(fullName = "genotype_intervals", shortName = "genotype",
+            doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or it's the ref", required = false)
+    public String genotypeIntervalsFile = null;
+
+    @Hidden
+    @Argument(fullName="genotypeIntervalsAreNotSorted", shortName="giNotSorted", required=false,
+            doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
+                "if the list turns out to be unsorted, it will throw an exception.  "+
+                "Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
+                "to sort and keep it in memory (increases memory usage!).")
+    protected boolean GENOTYPE_NOT_SORTED = false;
+
+    @Hidden
+	@Argument(fullName="unpaired", shortName="unpaired",
+			doc="Perform unpaired calls (no somatic status detection)", required=false)
+    boolean call_unpaired = false;
+	boolean call_somatic ;
+
+	@Argument(fullName="verboseOutput", shortName="verbose",
+			doc="Verbose output file in text format", required=false)
+	java.io.File verboseOutput = null;
+
+    @Argument(fullName="bedOutput", shortName="bed",
+            doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
+    java.io.File bedOutput = null;
+
+	@Argument(fullName="minCoverage", shortName="minCoverage",
+			doc="indel calls will be made only at sites with coverage of minCoverage or more reads; with --somatic this value is applied to tumor sample", required=false)
+	int minCoverage = 6;
+
+	@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
+			doc="used only with --somatic;  normal sample must have at least minNormalCoverage or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
+	int minNormalCoverage = 4;
+
+	@Argument(fullName="minFraction", shortName="minFraction",
+			doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
+			" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
+	double minFraction = 0.3;
+
+	@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
+			doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt all indel observations at the site exceeds this threshold", required=false)
+	double minConsensusFraction = 0.7;
+
+	@Argument(fullName="minIndelCount", shortName="minCnt",
+			doc="Minimum count of reads supporting consensus indel required for making the call. "+
+			" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
+			"(minIndelCount not met) will not pass.", required=false)
+	int minIndelCount = 0;
+
+	@Argument(fullName="refseq", shortName="refseq",
+			doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
+	String RefseqFileName = null;
+
+    @Argument(fullName="blacklistedLanes", shortName="BL",
+            doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
+                    "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
+    PlatformUnitFilterHelper dummy;
+     @Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",required=false) Boolean DEBUG = false;
+    @Argument(fullName="window_size", shortName="ws", doc="Size (bp) of the sliding window used for accumulating the coverage. "+
+            "May need to be increased to accomodate longer reads or longer deletions.",required=false) int WINDOW_SIZE = 200;
+    @Argument(fullName="maxNumberOfReads",shortName="mnr",doc="Maximum number of reads to cache in the window; if number of reads exceeds this number,"+
+                " the window will be skipped and no calls will be made from it",required=false) int MAX_READ_NUMBER = 10000;
+
+	private WindowContext tumor_context;
+	private WindowContext normal_context; 
+	private int currentContigIndex = -1;
+    private int contigLength = -1; // we see to much messy data with reads hanging out of contig ends...
+	private int currentPosition = -1; // position of the last read we've seen on the current contig
+	private String refName = null;
+	private java.io.Writer output = null;
+	private GenomeLoc location = null;
+    private long normalCallsMade = 0L, tumorCallsMade = 0L;
+
+    boolean outOfContigUserWarned = false;
+
+    private LocationAwareSeekableRODIterator refseqIterator=null;
+
+//	private Set normalReadGroups; // we are going to remember which read groups are normals and which are tumors in order to be able
+//	private Set tumorReadGroups ; // to properly assign the reads coming from a merged stream
+    private Set normalSamples; // we are going to remember which samples are normal and which are tumor:
+    private Set tumorSamples ; // these are used only to generate genotypes for vcf output
+
+	private int NQS_WIDTH = 5; // 5 bases on each side of the indel for NQS-style statistics
+
+    private Writer bedWriter = null;
+    private Writer verboseWriter = null;
+
+
+	private static String annGenomic = "GENOMIC";
+	private static String annIntron = "INTRON";
+	private static String annUTR = "UTR";
+	private static String annCoding = "CODING";
+	private static String annUnknown = "UNKNOWN";
+
+    enum CallType {
+        NOCOVERAGE,
+        BADCOVERAGE,
+        NOEVIDENCE,
+        GERMLINE,
+        SOMATIC
+    };
+
+	private SAMRecord lastRead;
+    private byte[] refBases;
+    private ReferenceDataSource refData;
+    private Iterator genotypeIntervalIterator = null;
+
+    // the current interval in the list of intervals, for which we want to do full genotyping
+    private GenomeLoc currentGenotypeInterval = null;
+    private long lastGenotypedPosition = -1; // last position on the currentGenotypeInterval, for which a call was already printed;
+                                     // can be 1 base before lastGenotyped start
+
+
+    // "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt"
+
+    private Set getVCFHeaderInfo() {
+        Set headerInfo = new HashSet();
+
+        // first, the basic info
+        headerInfo.add(new VCFHeaderLine("source", "IndelGenotyperV2"));
+        headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
+
+        // FORMAT and INFO fields
+//        headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
+
+        headerInfo.addAll(VCFIndelAttributes.getAttributeHeaderLines());
+        if ( call_somatic ) {
+            headerInfo.add(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY, 0, VCFHeaderLineType.Flag, "Somatic event"));
+        }  else {
+        }
+
+        // all of the arguments from the argument collection
+        Set args = new HashSet();
+        args.add(this);
+        args.addAll(getToolkit().getFilters());
+        Map commandLineArgs = getToolkit().getApproximateCommandLineArguments(args);
+        for ( Map.Entry commandLineArg : commandLineArgs.entrySet() )
+            headerInfo.add(new VCFHeaderLine(String.format("IGv2_%s", commandLineArg.getKey()), commandLineArg.getValue()));
+        // also, the list of input bams
+        for ( String fileName : getToolkit().getArguments().samFiles )
+            headerInfo.add(new VCFHeaderLine("IGv2_bam_file_used", fileName));
+
+        return headerInfo;
+    }
+
+
+	@Override
+	public void initialize() {
+
+        call_somatic =  (call_unpaired ? false : true);
+		normal_context = new WindowContext(0,WINDOW_SIZE);
+        normalSamples = new HashSet();
+
+        if ( bedOutput != null && output_file != null ) {
+            throw new UserException.DeprecatedArgument("-O", "-O option is deprecated and -bed option replaces it; you can not use both at the same time");
+        }
+
+		if ( RefseqFileName != null ) {
+            logger.info("Using RefSeq annotations from "+RefseqFileName);
+
+			RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
+                                                          getToolkit().getGenomeLocParser(),
+                                                          getToolkit().getArguments().unsafe);
+            RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,new File(RefseqFileName));
+
+            refseqIterator = new SeekableRODIterator(refseq.getHeader(),
+                                                     refseq.getSequenceDictionary(),
+                                                     getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
+                                                     getToolkit().getGenomeLocParser(),
+                                                     refseq.getIterator());
+		}
+
+		if ( refseqIterator == null ) logger.info("No gene annotations available");
+
+		int nSams = getToolkit().getArguments().samFiles.size();
+
+        if ( call_somatic ) {
+            if ( nSams < 2 ) throw new UserException.BadInput("In default (paired sample) mode at least two bam files (normal and tumor) must be specified");
+            tumor_context = new WindowContext(0,WINDOW_SIZE);
+            tumorSamples = new HashSet();
+        }
+
+        int nNorm = 0;
+        int nTum = 0;
+        for ( SAMReaderID rid : getToolkit().getReadsDataSource().getReaderIDs() ) {
+             Tags tags = rid.getTags() ;
+             if ( tags.getPositionalTags().isEmpty() && call_somatic )
+                 throw new UserException.BadInput("In default (paired sample) mode all input bam files must be tagged as either 'normal' or 'tumor'. Untagged file: "+
+                         getToolkit().getSourceFileForReaderID(rid));
+             boolean normal = false;
+             boolean tumor = false;
+             for ( String s : tags.getPositionalTags() ) { // we allow additional unrelated tags (and we do not use them), but we REQUIRE one of Tumor/Normal to be present if --somatic is on
+                 if ( "NORMAL".equals(s.toUpperCase()) ) {
+                     normal = true;
+                     nNorm++;
+                 }
+                 if ( "TUMOR".equals(s.toUpperCase()) ) {
+                     tumor = true;
+                     nTum++ ;
+                 }
+             }
+             if ( call_somatic && normal && tumor ) throw new UserException.BadInput("Input bam file "+
+                     getToolkit().getSourceFileForReaderID(rid)+" is tagged both as normal and as tumor. Which one is it??");
+             if ( call_somatic && !normal && ! tumor )
+                 throw new UserException.BadInput("In somatic mode all input bams must be tagged as either normal or tumor. Encountered untagged file: "+
+                    getToolkit().getSourceFileForReaderID(rid));
+             if ( ! call_somatic && (normal || tumor) )
+                 System.out.println("WARNING: input bam file "+getToolkit().getSourceFileForReaderID(rid)
+                         +" is tagged as Normal and/or Tumor, but somatic mode is not on. Tags will ne IGNORED");
+            if ( call_somatic && tumor ) {
+                for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader(rid).getReadGroups() ) {
+                    tumorSamples.add(rg.getSample());
+                }
+            } else {
+                for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader(rid).getReadGroups() ) {
+                    normalSamples.add(rg.getSample());
+                }
+            }
+            if ( genotypeIntervalsFile != null ) {
+
+                if ( ! GENOTYPE_NOT_SORTED && IntervalUtils.isIntervalFile(genotypeIntervalsFile)) {
+                    // prepare to read intervals one-by-one, as needed (assuming they are sorted).
+                    genotypeIntervalIterator = new IntervalFileMergingIterator(getToolkit().getGenomeLocParser(),
+                        new java.io.File(genotypeIntervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
+                } else {
+                    // read in the whole list of intervals for cleaning
+                    GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
+                        IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY);
+                    genotypeIntervalIterator = locs.iterator();
+                }
+
+                // wrap intervals requested for genotyping inside overlapping iterator, so that we actually
+                // genotype only on the intersections of the requested intervals with the -L intervals
+                genotypeIntervalIterator = new OverlappingIntervalIterator(genotypeIntervalIterator, getToolkit().getIntervals().iterator() );
+
+                currentGenotypeInterval = genotypeIntervalIterator.hasNext() ? genotypeIntervalIterator.next() : null;
+
+                if ( DEBUG) System.out.println("DEBUG>> first genotyping interval="+currentGenotypeInterval);
+
+                if ( currentGenotypeInterval != null ) lastGenotypedPosition = currentGenotypeInterval.getStart()-1;
+            }
+
+        }
+
+		location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1);
+
+        normalSamples = getToolkit().getSamplesByReaders().get(0);
+
+        try {
+            // we already checked that bedOutput and output_file are not set simultaneously
+            if ( bedOutput != null ) bedWriter = new FileWriter(bedOutput);
+            if ( output_file != null ) bedWriter = new FileWriter(output_file);
+        } catch (java.io.IOException e) {
+            throw new UserException.CouldNotReadInputFile(bedOutput, "Failed to open BED file for writing.", e);
+        }
+        try {
+            if ( verboseOutput != null ) verboseWriter = new FileWriter(verboseOutput);
+        } catch (java.io.IOException e) {
+            throw new UserException.CouldNotReadInputFile(verboseOutput, "Failed to open BED file for writing.", e);
+        }
+
+        vcf_writer.writeHeader(new VCFHeader(getVCFHeaderInfo(), SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()))) ;
+        refData = new ReferenceDataSource(getToolkit().getArguments().referenceFile);
+	}
+
+
+	@Override
+	public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+
+    //        if ( read.getReadName().equals("428EFAAXX090610:2:36:1384:639#0") ) System.out.println("GOT READ");
+
+            if ( DEBUG ) {
+                //            System.out.println("DEBUG>> read at "+ read.getAlignmentStart()+"-"+read.getAlignmentEnd()+
+                //                    "("+read.getCigarString()+")");
+                if ( read.getDuplicateReadFlag() ) System.out.println("DEBUG>> Duplicated read (IGNORED)");
+            }
+
+            if ( AlignmentUtils.isReadUnmapped(read) ||
+			 read.getDuplicateReadFlag() ||
+			 read.getNotPrimaryAlignmentFlag() ||
+			 read.getMappingQuality() == 0 ) {
+			return 0; // we do not need those reads!
+            }
+
+            if ( read.getReferenceIndex() != currentContigIndex ) {
+                // we just jumped onto a new contig
+                if ( DEBUG ) System.out.println("DEBUG>>> Moved to contig "+read.getReferenceName());
+                if ( read.getReferenceIndex() < currentContigIndex ) // paranoidal
+                    throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, read, "Read "+read.getReadName()+": contig is out of order; input BAM file is unsorted");
+
+                // print remaining indels from the previous contig (if any);
+                if ( call_somatic ) emit_somatic(1000000000, true);
+                else emit(1000000000,true);
+
+                currentContigIndex = read.getReferenceIndex();
+                currentPosition = read.getAlignmentStart();
+                refName = new String(read.getReferenceName());
+
+                location = getToolkit().getGenomeLocParser().createGenomeLoc(refName,location.getStart(),location.getStop());
+                contigLength = getToolkit().getGenomeLocParser().getContigInfo(refName).getSequenceLength();
+                outOfContigUserWarned = false;
+
+                lastGenotypedPosition = -1;
+
+                normal_context.clear(); // reset coverage window; this will also set reference position to 0
+                if ( call_somatic) tumor_context.clear();
+
+                refBases = new String(refData.getReference().getSequence(read.getReferenceName()).getBases()).toUpperCase().getBytes();
+            }
+
+            // we have reset the window to the new contig if it was required and emitted everything we collected
+            // on a previous contig. At this point we are guaranteed that we are set up properly for working
+            // with the contig of the current read.
+
+            // NOTE: all the sanity checks and error messages below use normal_context only. We make sure that normal_context and
+            // tumor_context are synchronized exactly (windows are always shifted together by emit_somatic), so it's safe
+
+            if ( read.getAlignmentStart() < currentPosition ) // oops, read out of order?
+                throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, read, "Read "+read.getReadName() +" out of order on the contig\n"+
+                        "Read starts at "+refName+":"+read.getAlignmentStart()+"; last read seen started at "+refName+":"+currentPosition
+                        +"\nLast read was: "+lastRead.getReadName()+" RG="+lastRead.getAttribute("RG")+" at "+lastRead.getAlignmentStart()+"-"
+                        +lastRead.getAlignmentEnd()+" cigar="+lastRead.getCigarString());
+
+            currentPosition = read.getAlignmentStart();
+            lastRead = read;
+
+            if ( read.getAlignmentEnd() > contigLength  ) {
+                if ( ! outOfContigUserWarned ) {
+                    System.out.println("WARNING: Reads aligned past contig length on "+ location.getContig()+"; all such reads will be skipped");
+                    outOfContigUserWarned = true;
+                }
+                return 0;
+            }
+
+            long alignmentEnd = read.getAlignmentEnd();
+            Cigar c = read.getCigar();
+            int lastNonClippedElement = 0; // reverse offset to the last unclipped element
+            CigarOperator op = null;
+            // moving backwards from the end of the cigar, skip trailing S or H cigar elements:
+            do {
+                lastNonClippedElement++;
+                op = c.getCigarElement( c.numCigarElements()-lastNonClippedElement ).getOperator();
+            } while ( op == CigarOperator.H || op == CigarOperator.S );
+
+            // now op is the last non-S/H operator in the cigar.
+
+            // a little trick here: we want to make sure that current read completely fits into the current
+            // window so that we can accumulate indel observations over the whole length of the read.
+            // The ::getAlignmentEnd() method returns the last position on the reference where bases from the
+            // read actually match (M cigar elements). After our cleaning procedure, we can have reads that end
+            // with I element, which is not gonna be counted into alignment length on the reference. On the other hand,
+            // in this program we assign insertions, internally, to the first base *after* the insertion position.
+            // Hence, we have to make sure that that extra base is already in the window or we will get IndexOutOfBounds.
+
+            if ( op == CigarOperator.I) alignmentEnd++;
+
+            if ( alignmentEnd > normal_context.getStop()) {
+
+                // we don't emit anything until we reach a read that does not fit into the current window.
+                // At that point we try shifting the window to the start of that read (or reasonably close) and emit everything prior to
+                // that position. This is legitimate, since the reads are sorted and  we are not gonna see any more coverage at positions
+                // below the current read's start.
+                // Clearly, we assume here that window is large enough to accomodate any single read, so simply shifting
+                // the window to around the read's start will ensure that the read fits...
+
+                if ( DEBUG) System.out.println("DEBUG>> Window at "+normal_context.getStart()+"-"+normal_context.getStop()+", read at "+
+                                read.getAlignmentStart()+": trying to emit and shift" );
+                if ( call_somatic ) emit_somatic( read.getAlignmentStart(), false );
+                else emit( read.getAlignmentStart(), false );
+
+                // let's double check now that the read fits after the shift
+                if ( read.getAlignmentEnd() > normal_context.getStop()) {
+                    // ooops, looks like the read does not fit into the window even after the latter was shifted!!
+                    throw new UserException.BadArgumentValue("window_size", "Read "+read.getReadName()+": out of coverage window bounds. Probably window is too small, so increase the value of the window_size argument.\n"+
+                                             "Read length="+read.getReadLength()+"; cigar="+read.getCigarString()+"; start="+
+                                             read.getAlignmentStart()+"; end="+read.getAlignmentEnd()+
+                                             "; window start (after trying to accomodate the read)="+normal_context.getStart()+"; window end="+normal_context.getStop());
+                }
+            }
+
+            if ( call_somatic ) {
+
+                Tags tags =  getToolkit().getReaderIDForRead(read).getTags();
+                boolean assigned = false;
+                for ( String s : tags.getPositionalTags() ) {
+                    if ( "NORMAL".equals(s.toUpperCase()) ) {
+                        normal_context.add(read,ref.getBases());
+                        assigned = true;
+                        break;
+                    }
+                    if ( "TUMOR".equals(s.toUpperCase()) ) {
+                        tumor_context.add(read,ref.getBases());
+                        assigned = true;
+                        break;
+                    }
+                }
+                if ( ! assigned )
+                    throw new StingException("Read "+read.getReadName()+" from "+getToolkit().getSourceFileForReaderID(getToolkit().getReaderIDForRead(read))+
+                    "has no Normal/Tumor tag associated with it");
+
+//                String rg = (String)read.getAttribute("RG");
+//                if ( rg == null )
+//                    throw new UserException.MalformedBam(read, "Read "+read.getReadName()+" has no read group in merged stream. RG is required for somatic calls.");
+
+//                if ( normalReadGroups.contains(rg) ) {
+//                    normal_context.add(read,ref.getBases());
+//                } else if ( tumorReadGroups.contains(rg) ) {
+//                    tumor_context.add(read,ref.getBases());
+//                } else {
+//                    throw new UserException.MalformedBam(read, "Unrecognized read group in merged stream: "+rg);
+//                }
+
+                if ( tumor_context.getReads().size() > MAX_READ_NUMBER ) {
+                    System.out.println("WARNING: a count of "+MAX_READ_NUMBER+" reads reached in a window "+
+                            refName+':'+tumor_context.getStart()+'-'+tumor_context.getStop()+" in tumor sample. The whole window will be dropped.");
+                    tumor_context.shift(WINDOW_SIZE);
+                    normal_context.shift(WINDOW_SIZE);
+                }
+                if ( normal_context.getReads().size() > MAX_READ_NUMBER ) {
+                    System.out.println("WARNING: a count of "+MAX_READ_NUMBER+" reads reached in a window "+
+                            refName+':'+normal_context.getStart()+'-'+normal_context.getStop()+" in normal sample. The whole window will be dropped");
+                    tumor_context.shift(WINDOW_SIZE);
+                    normal_context.shift(WINDOW_SIZE);
+                }
+
+
+            } else {
+                normal_context.add(read, ref.getBases());
+                if ( normal_context.getReads().size() > MAX_READ_NUMBER ) {
+                    System.out.println("WARNING: a count of "+MAX_READ_NUMBER+" reads reached in a window "+
+                            refName+':'+normal_context.getStart()+'-'+normal_context.getStop()+". The whole window will be dropped");
+                    normal_context.shift(WINDOW_SIZE);
+                }
+            }
+
+            return 1;
+	}
+
+    /** An auxiliary shortcut: returns true if position(location.getContig(), p) is past l  */
+    private boolean pastInterval(long p, GenomeLoc l) {
+        return ( location.getContigIndex() > l.getContigIndex() ||
+                 location.getContigIndex() == l.getContigIndex() && p > l.getStop() );
+    }
+
+    /** Emit calls of the specified type across genotyping intervals, from position lastGenotypedPosition+1 to
+     * pos-1, inclusive.
+     * @param contigIndex
+     * @param pos
+     * @param call
+     */
+    /*
+    private void emitNoCallsUpTo(int contigIndex, long pos, CallType call) {
+
+        if ( contigIndex < currentGenotypeInterval.getContigIndex() ||
+             contigIndex == currentGenotypeInterval.getContigIndex() && pos <= currentGenotypeInterval.getStart() ) return;
+
+        if ( contigIndex == currentGenotypeInterval.getContigIndex() && pos >= currentGenotypeInterval.getStart() ) {
+            for ( long p = lastGenotypedPosition+1; p < pos; p++ ) {
+
+            }
+        }
+        while( currentGenotypeInterval != null ) {
+
+            while ( )
+        if ( genotypeIntervalIterator.hasNext() ) {
+            currentGenotypeInterval = genotypeIntervalIterator.next() ;
+            if ( pastInterval(p,currentGenotypeInterval) ) {
+                // if we are about to jump over the whole next interval, we need to emit NO_COVERAGE calls there!
+                emitNoCoverageCalls(currentGenotypeInterval);
+            }
+        } else {
+            currentGenotypeInterval = null;
+        }
+        }
+    }
+*/
+    
+   /** Output indel calls up to the specified position and shift the window: after this method is executed, the
+    * first element of the window maps onto 'position', if possible, or at worst a few bases to the left of 'position' if we may need more
+    * reads to get full NQS-style statistics for an indel in the close proximity of 'position'.
+    *
+    * @param position
+    */
+   private void emit(long position, boolean force) {
+
+            long adjustedPosition = adjustPosition(position);
+
+            if ( adjustedPosition == -1 ) {
+                // failed to find appropriate shift position, the data are probably to messy anyway so we drop them altogether
+                normal_context.shift((int)(position-normal_context.getStart()));
+                return;
+            }
+            long move_to = adjustedPosition;
+
+            for ( int pos = normal_context.getStart() ; pos < Math.min(adjustedPosition,normal_context.getStop()+1) ; pos++ ) {
+
+                boolean genotype = false;
+                // first let's see if we need to genotype current position:
+
+                final long p = pos - 1; // our internally used positions (pos) are +1 compared to external format spec (e.g. vcf)
+
+                if ( pos <= lastGenotypedPosition ) continue;
+
+                while ( currentGenotypeInterval != null ) {
+
+                    // if we did not even reach next interval yet, no genotyping at current position:
+                    if ( location.getContigIndex() < currentGenotypeInterval.getContigIndex() ||
+                         location.getContigIndex() == currentGenotypeInterval.getContigIndex() &&
+                                 p < currentGenotypeInterval.getStart() ) break;
+                    if ( pastInterval(p, currentGenotypeInterval) ) {
+                        // we are past current genotyping interval, so we are done with it; let's load next interval:
+                        currentGenotypeInterval = genotypeIntervalIterator.hasNext() ? genotypeIntervalIterator.next() : null;
+                        continue; // re-enter the loop to check against the interval we just loaded
+                    }
+
+                    // we reach this point only if p is inside current genotyping interval; set the flag and bail out:
+                    genotype = true;
+                    break;
+                }
+
+//                if ( DEBUG ) System.out.println("DEBUG>> pos="+pos +"; genotyping interval="+currentGenotypeInterval+"; genotype="+genotype);
+
+                if ( normal_context.indelsAt(pos).size() == 0 && ! genotype ) continue;
+
+                IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
+
+                if ( normalCall.getCoverage() < minCoverage && ! genotype ) {
+                    if ( DEBUG ) {
+                        System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
+                    }
+                    continue; // low coverage
+                }
+
+                if ( DEBUG ) System.out.println("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" at "+pos);
+
+                long left = Math.max( pos-NQS_WIDTH, normal_context.getStart() );
+                long right = pos+( normalCall.getVariant() == null ? 0 : normalCall.getVariant().lengthOnRef())+NQS_WIDTH-1;
+
+                if ( right >= adjustedPosition && ! force) {
+                    // we are not asked to force-shift, and there is more coverage around the current indel that we still need to collect
+
+                    // we are not asked to force-shift, and there's still additional coverage to the right of current indel, so its too early to emit it;
+                    // instead we shift only up to current indel pos - MISMATCH_WIDTH, so that we could keep collecting that coverage
+                    move_to = adjustPosition(left);
+                    if ( move_to == -1 ) {
+                        // failed to find appropriate shift position, the data are probably to messy anyway so we drop them altogether
+                        normal_context.shift((int)(adjustedPosition-normal_context.getStart()));
+                        return;
+                    }
+                    if ( DEBUG ) System.out.println("DEBUG>> waiting for coverage; actual shift performed to "+ move_to);
+                    break;
+                }
+
+                // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right:
+                if ( right > normal_context.getStop() ) right = normal_context.getStop();
+
+    //            location = getToolkit().getGenomeLocParser().setStart(location,pos);
+    //            location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
+
+                location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(), pos);
+
+                boolean haveCall = normalCall.isCall(); // cache the value
+
+                if ( haveCall || genotype) {
+                    if ( haveCall ) normalCallsMade++;
+                    printVCFLine(vcf_writer,normalCall);
+                    if ( bedWriter != null ) normalCall.printBedLine(bedWriter);
+                    if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall);
+                    lastGenotypedPosition = pos;
+                }
+
+                normal_context.indelsAt(pos).clear();
+                    // we dealt with this indel; don't want to see it again
+                    // (we might otherwise in the case when 1) there is another indel that follows
+                    // within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
+
+//			for ( IndelVariant var : variants ) {
+//				System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
+//			}
+            }
+
+            if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
+            normal_context.shift((int)(move_to - normal_context.getStart() ) );
+    }
+
+    /** A shortcut. Returns true if we got indels within the specified interval in single and only window context
+     * (for single-sample calls) or in either of the two window contexts (for two-sample/somatic calls)
+     *
+     */
+    private boolean indelsPresentInInterval(long start, long stop) {
+        if ( tumor_context == null ) return  normal_context.hasIndelsInInterval(start,stop);
+        return tumor_context.hasIndelsInInterval(start,stop) ||
+              normal_context.hasIndelsInInterval(start,stop);
+    }
+        /** Takes the position, to which window shift is requested, and tries to adjust it in such a way that no NQS window is broken.
+         * Namely, this method checks, iteratively, if there is an indel within NQS_WIDTH bases ahead of initially requested or adjusted 
+         * shift position. If there is such an indel,
+         * then shifting to that position would lose some or all NQS-window bases to the left of the indel (since it's not going to be emitted
+         * just yet). Instead, this method tries to readjust the shift position leftwards so that full NQS window to the left of the next indel
+         * is preserved. This method tries thie strategy 4 times (so that it would never walk away too far to the left), and if it fails to find
+         * an appropriate adjusted shift position (which could happen if there are many indels following each other at short intervals), it will give up, 
+         * go back to the original requested shift position and try finding the first shift poisition that has no indel associated with it.
+         */
+
+    private long adjustPosition(long request) {
+        long initial_request = request;
+        int attempts = 0;
+        boolean failure = false;
+        while ( indelsPresentInInterval(request,request+NQS_WIDTH)  ) {
+            request -= NQS_WIDTH;
+            if ( DEBUG ) System.out.println("DEBUG>> indel observations present within "+NQS_WIDTH+" bases ahead. Resetting shift to "+request);
+            attempts++;
+            if ( attempts == 4 ) {
+                if ( DEBUG ) System.out.println("DEBUG>> attempts to preserve full NQS window failed; now trying to find any suitable position.") ;
+                failure = true;
+                break;
+            }
+        }
+
+        if ( failure ) {
+            // we tried 4 times but did not find a good shift position that would preserve full nqs window
+            // around all indels. let's fall back and find any shift position as long and there's no indel at the very
+            // first position after the shift (this is bad for other reasons); if it breaks a nqs window, so be it
+            request = initial_request;
+            attempts = 0;
+            while ( indelsPresentInInterval(request,request+1) ) {
+                request--;
+                if ( DEBUG ) System.out.println("DEBUG>> indel observations present within "+NQS_WIDTH+" bases ahead. Resetting shift to "+request);
+                attempts++;
+                if ( attempts == 50 ) {
+                    System.out.println("WARNING: Indel at every position in the interval "+refName+":"+request+"-"+initial_request+
+                            ". Can not find a break to shift context window to; no calls will be attempted in the current window.");
+                    return -1;
+                }
+            }
+        }
+        if ( DEBUG ) System.out.println("DEBUG>> Found acceptable target position "+request);
+        return request;
+    }
+
+    /** Output somatic indel calls up to the specified position and shift the coverage array(s): after this method is executed
+     * first elements of the coverage arrays map onto 'position', or a few bases prior to the specified position
+     * if there is an indel in close proximity to 'position' so that we may get more coverage around it later.
+     *
+     * @param position
+     */
+    private void emit_somatic(long position, boolean force) {
+
+        long adjustedPosition = adjustPosition(position);
+        if ( adjustedPosition == -1 ) {
+            // failed to find appropriate shift position, the data are probably to messy anyway so we drop them altogether
+            normal_context.shift((int)(position-normal_context.getStart()));
+            tumor_context.shift((int)(position-tumor_context.getStart()));
+            return;
+        }
+        long move_to = adjustedPosition;
+
+        if ( DEBUG ) System.out.println("DEBUG>> Emitting in somatic mode up to "+position+" force shift="+force+" current window="+tumor_context.getStart()+"-"+tumor_context.getStop());
+
+        for ( int pos = tumor_context.getStart() ; pos < Math.min(adjustedPosition,tumor_context.getStop()+1) ; pos++ ) {
+
+            boolean genotype = false;
+             // first let's see if we need to genotype current position:
+
+             final long p = pos - 1; // our internally used positions (pos) are +1 compared to external format spec (e.g. vcf)
+
+             if ( pos <= lastGenotypedPosition ) continue;
+
+             while ( currentGenotypeInterval != null ) {
+
+                 // if we did not even reach next interval yet, no genotyping at current position:
+                 if ( location.getContigIndex() < currentGenotypeInterval.getContigIndex() ||
+                      location.getContigIndex() == currentGenotypeInterval.getContigIndex() &&
+                              p < currentGenotypeInterval.getStart() ) break;
+                 if ( pastInterval(p, currentGenotypeInterval) ) {
+                     // we are past current genotyping interval, so we are done with it; let's load next interval:
+                     currentGenotypeInterval = genotypeIntervalIterator.hasNext() ? genotypeIntervalIterator.next() : null;
+                     continue; // re-enter the loop to check against the interval we just loaded
+                 }
+
+                 // we reach tjis point only if p is inside current genotyping interval; set the flag and bail out:
+                 genotype = true;
+                 break;
+             }
+//            if ( DEBUG) System.out.println("DEBUG>> pos="+pos +"; genotyping interval="+currentGenotypeInterval+"; genotype="+genotype);
+
+            if ( tumor_context.indelsAt(pos).size() == 0 && ! genotype ) continue; // no indels in tumor
+
+            if ( DEBUG && genotype ) System.out.println("DEBUG>> Genotyping requested at "+pos);
+
+            IndelPrecall tumorCall = new IndelPrecall(tumor_context,pos,NQS_WIDTH);
+            IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
+
+            if ( tumorCall.getCoverage() < minCoverage && ! genotype ) {
+                if ( DEBUG ) {
+                    System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)");
+                }
+                continue; // low coverage
+            }
+            if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) {
+                if ( DEBUG ) {
+                    System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
+                }
+                continue; // low coverage
+            }
+
+            if ( DEBUG ) {
+                System.out.print("DEBUG>> "+(tumorCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in tumor, ");
+                System.out.print("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in normal at "+pos);
+            }
+
+            long left = Math.max( pos-NQS_WIDTH, tumor_context.getStart() );
+            long right = pos+ ( tumorCall.getVariant() == null ? 0 : tumorCall.getVariant().lengthOnRef() )+NQS_WIDTH-1;
+
+            if ( right >= adjustedPosition && ! force) {
+                // we are not asked to force-shift, and there is more coverage around the current indel that we still need to collect
+
+                // we are not asked to force-shift, and there's still additional coverage to the right of current indel, so its too early to emit it;
+                // instead we shift only up to current indel pos - MISMATCH_WIDTH, so that we could keep collecting that coverage
+                move_to = adjustPosition(left);
+                if ( move_to == -1 ) {
+                    // failed to find appropriate shift position, the data are probably to messy anyway so we drop them altogether
+                    normal_context.shift((int)(adjustedPosition-normal_context.getStart()));
+                    tumor_context.shift((int)(adjustedPosition-tumor_context.getStart()));
+                    return;
+                }
+                if ( DEBUG ) System.out.println("DEBUG>> waiting for coverage; actual shift performed to "+ move_to);
+                break;
+            }
+
+            if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right
+
+//            location = getToolkit().getGenomeLocParser().setStart(location,pos);
+//            location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
+
+            location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(),pos); // retrieve annotation data
+
+            boolean haveCall = tumorCall.isCall(); // cache the value
+
+            if ( haveCall || genotype ) {
+                if ( haveCall ) tumorCallsMade++;
+
+                printVCFLine(vcf_writer,normalCall,tumorCall);
+
+                if ( bedWriter != null ) tumorCall.printBedLine(bedWriter);
+
+                if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall );
+                lastGenotypedPosition = pos;
+            }
+            tumor_context.indelsAt(pos).clear();
+            normal_context.indelsAt(pos).clear();
+                // we dealt with this indel; don't want to see it again
+                // (we might otherwise in the case when 1) there is another indel that follows
+                // within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
+
+//			for ( IndelVariant var : variants ) {
+//				System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
+//			}
+        }
+
+        if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")");
+        tumor_context.shift((int)(move_to - tumor_context.getStart() ) );
+        normal_context.shift((int)(move_to - normal_context.getStart() ) );
+    }
+
+    private String makeFullRecord(IndelPrecall normalCall, IndelPrecall tumorCall) {
+        StringBuilder fullRecord = new StringBuilder();
+        if ( tumorCall.getVariant() != null || normalCall.getVariant() == null) {
+            fullRecord.append(tumorCall.makeEventString());
+        } else {
+            fullRecord.append(normalCall.makeEventString());            
+        }
+        fullRecord.append('\t');
+        fullRecord.append(normalCall.makeStatsString("N_"));
+        fullRecord.append('\t');
+        fullRecord.append(tumorCall.makeStatsString("T_"));
+        fullRecord.append('\t');
+        return fullRecord.toString();
+    }
+
+    private String makeFullRecord(IndelPrecall normalCall) {
+        StringBuilder fullRecord = new StringBuilder();
+        fullRecord.append(normalCall.makeEventString());
+        fullRecord.append('\t');
+        fullRecord.append(normalCall.makeStatsString(""));
+        fullRecord.append('\t');
+        return fullRecord.toString();
+    }
+
+    private String getAnnotationString(RODRecordList ann) {
+        if ( ann == null ) return annGenomic;
+        else {
+            StringBuilder b = new StringBuilder();
+
+            if ( RefSeqFeature.isExon(ann) ) {
+                if ( RefSeqFeature.isCodingExon(ann) ) b.append(annCoding); // both exon and coding = coding exon sequence
+                else b.append(annUTR); // exon but not coding = UTR
+            } else {
+                if ( RefSeqFeature.isCoding(ann) ) b.append(annIntron); // not in exon, but within the coding region = intron
+                else b.append(annUnknown); // we have no idea what this is. this may actually happen when we have a fully non-coding exon...
+            }
+            b.append('\t');
+            b.append(((Transcript)ann.get(0).getUnderlyingObject()).getGeneName()); // there is at least one transcript in the list, guaranteed
+//			while ( it.hasNext() ) { //
+//				t.getGeneName()
+//			}
+            return b.toString();
+        }
+
+    }
+
+    public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall) {
+        RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
+        String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
+
+        StringBuilder fullRecord = new StringBuilder();
+        fullRecord.append(makeFullRecord(normalCall));
+        fullRecord.append(annotationString);
+        if ( ! normalCall.isCall() && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
+        try {
+            verboseWriter.write(fullRecord.toString());
+            verboseWriter.write('\n');
+        } catch (IOException e) {
+            throw new UserException.CouldNotCreateOutputFile(verboseOutput, "Write failed", e);
+        }
+
+    }
+
+
+    public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall) {
+        RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
+        String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
+
+        StringBuilder fullRecord = new StringBuilder();
+        fullRecord.append(makeFullRecord(normalCall,tumorCall));
+
+        if ( normalCall.getVariant() == null && tumorCall.getVariant() == null ) {
+            // did not observe anything
+            if ( normalCall.getCoverage() >= minNormalCoverage && tumorCall.getCoverage() >= minCoverage ) fullRecord.append("REFERENCE");
+            else {
+                if ( tumorCall.getCoverage() >= minCoverage ) fullRecord.append("REFERENCE"); // no coverage in normal but nothing in tumor
+                else {
+                    // no coverage in tumor; if we have no coverage in normal, it can be anything; if we do have coverage in normal,
+                    // this still could be a somatic event. so either way it is 'unknown'
+                    fullRecord.append("UNKNOWN");
+                }
+            }
+
+        }
+
+        if ( normalCall.getVariant() == null && tumorCall.getVariant() != null ) {
+            // looks like somatic call
+            if ( normalCall.getCoverage() >= minNormalCoverage ) fullRecord.append("SOMATIC"); // we confirm there is nothing in normal
+            else {
+                // low coverage in normal
+                fullRecord.append("EVENT_T"); // no coverage in normal, no idea whether it is germline or somatic
+            }
+        }
+
+        if ( normalCall.getVariant() != null && tumorCall.getVariant() == null ) {
+            // it's likely germline (with missing observation in tumor - maybe loh?
+            if ( tumorCall.getCoverage() >= minCoverage ) fullRecord.append("GERMLINE_LOH"); // we confirm there is nothing in tumor
+            else {
+                // low coverage in tumor, maybe we missed the event
+                fullRecord.append("GERMLINE"); // no coverage in tumor but we already saw it in normal...
+            }
+        }
+
+        if ( normalCall.getVariant() != null && tumorCall.getVariant() != null ) {
+            // events in both T/N, got to be germline!
+            fullRecord.append("GERMLINE"); 
+        }
+
+
+        fullRecord.append('\t');
+        fullRecord.append(annotationString);
+
+        if ( ! tumorCall.isCall() && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
+
+        try {
+            verboseWriter.write(fullRecord.toString());
+            verboseWriter.write('\n');
+        } catch (IOException e) {
+            throw new UserException.CouldNotCreateOutputFile(verboseOutput, "Write failed", e);
+        }
+    }
+
+    public void printVCFLine(VCFWriter vcf, IndelPrecall call) {
+
+        long start = call.getPosition()-1;
+        // If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed.
+        // The suggestion is instead of putting the base before the indel, to put the base after the indel.
+        // For now, just don't print out that site.
+        if ( start == 0 )
+            return;
+
+        long stop = start;
+
+        List alleles = new ArrayList(2); // actual observed (distinct!) alleles at the site
+        List homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype
+
+
+        if ( call.getVariant() == null ) {
+            // we will need to cteate genotype with two (hom) ref alleles (below).
+            // we can not use 'alleles' list here, since that list is supposed to contain
+            // only *distinct* alleles observed at the site or VCFContext will frown upon us...
+            alleles.add( Allele.create(refBases[(int)start-1],true) );
+            homref_alleles = new ArrayList(2);
+            homref_alleles.add( alleles.get(0));
+            homref_alleles.add( alleles.get(0));
+        } else {
+            // we always create alt allele when we observe anything but the ref, even if it is not a call!
+            // (Genotype will tell us whether it is an actual call or not!)
+            int event_length = call.getVariant().lengthOnRef();
+            if ( event_length < 0 ) event_length = 0;
+            fillAlleleList(alleles,call);
+            stop += event_length;
+        }
+
+        Map genotypes = new HashMap();
+
+        for ( String sample : normalSamples ) {
+
+            Map attrs = call.makeStatsAttributes(null);
+
+            if ( call.isCall() ) // we made a call - put actual het genotype here:
+                genotypes.put(sample,new Genotype(sample,alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false));
+            else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all) 
+                genotypes.put(sample,new Genotype(sample, homref_alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false));
+
+        }
+        Set filters = null;
+        if ( call.getVariant() != null && ! call.isCall() ) {
+            filters = new HashSet();
+            filters.add("NoCall");
+        }
+        VariantContext vc = new VariantContext("IGv2_Indel_call", refName, start, stop, alleles, genotypes,
+            -1.0 /* log error */,  filters, null, refBases[(int)start-1]);
+        vcf.add(vc);
+    }
+
+    /** Fills l with appropriate alleles depending on whether call is insertion or deletion
+     * (l MUST have a variant or this method will crash). It is guaranteed that the *first* allele added
+     * to the list is ref, and the next one is alt.
+     * @param l
+     * @param call
+     */
+    private void fillAlleleList(List l, IndelPrecall call) {
+        int event_length = call.getVariant().lengthOnRef();
+        if ( event_length == 0 ) { // insertion
+
+            l.add( Allele.create(Allele.NULL_ALLELE_STRING,true) );
+            l.add( Allele.create(call.getVariant().getBases(), false ));
+
+        } else { //deletion:
+            l.add( Allele.create(call.getVariant().getBases(), true ));
+            l.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
+        }
+    }
+
+    public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall) {
+
+        long start = tCall.getPosition()-1;
+        long stop = start;
+
+        // If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed.
+        // The suggestion is instead of putting the base before the indel, to put the base after the indel.
+        // For now, just don't print out that site.
+        if ( start == 0 )
+            return;
+
+        Map attrsNormal = nCall.makeStatsAttributes(null);
+        Map attrsTumor = tCall.makeStatsAttributes(null);
+
+        Map attrs = new HashMap();
+
+        boolean isSomatic = false;
+        if ( nCall.getCoverage() >= minNormalCoverage && nCall.getVariant() == null && tCall.getVariant() != null ) {
+            isSomatic = true;
+            attrs.put(VCFConstants.SOMATIC_KEY,true);
+        }
+        List alleles = new ArrayList(2); // all alleles at the site
+ //       List normal_alleles = null; // all alleles at the site
+        List homRefAlleles = null;
+
+//        if ( nCall.getVariant() == null || tCall.getVariant() == null ) {
+        homRefAlleles = new ArrayList(2) ; // we need this for somatic calls (since normal is ref-ref), and also for no-calls
+//        }
+        boolean homRefT = ( tCall.getVariant() == null );
+        boolean homRefN = ( nCall.getVariant() == null );
+        if ( tCall.getVariant() == null && nCall.getVariant() == null) {
+            // no indel at all  ; create base-representation ref/ref alleles for genotype construction
+            alleles.add( Allele.create(refBases[(int)start-1],true) );
+        } else {
+            // we got indel(s)
+            int event_length = 0;
+            if ( tCall.getVariant() != null ) {
+                // indel in tumor
+                event_length = tCall.getVariant().lengthOnRef();
+                fillAlleleList(alleles, tCall);
+            } else {
+                event_length = nCall.getVariant().lengthOnRef();
+                fillAlleleList(alleles, nCall);
+            }
+            if ( event_length > 0 ) stop += event_length;
+        }
+        homRefAlleles.add( alleles.get(0));
+        homRefAlleles.add( alleles.get(0));
+
+        Map genotypes = new HashMap();
+
+        for ( String sample : normalSamples ) {
+            genotypes.put(sample,new Genotype(sample, homRefN ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsNormal,false));
+        }
+
+        for ( String sample : tumorSamples ) {
+            genotypes.put(sample,new Genotype(sample, homRefT ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsTumor,false) );
+        }
+
+        Set filters = null;
+        if ( tCall.getVariant() != null && ! tCall.isCall() ) {
+            filters = new HashSet();
+            filters.add("NoCall");
+        }
+        if ( nCall.getCoverage() < minNormalCoverage ) {
+            if ( filters == null ) filters = new HashSet();
+            filters.add("NCov");
+        }
+        if ( tCall.getCoverage() < minCoverage ) {
+            if ( filters == null ) filters = new HashSet();
+            filters.add("TCov");
+        }
+
+        VariantContext vc = new VariantContext("IGv2_Indel_call", refName, start, stop, alleles, genotypes,
+            -1.0 /* log error */, filters, attrs, refBases[(int)start-1]);
+        vcf.add(vc);
+    }
+
+    @Override
+    public void onTraversalDone(Integer result) {
+        if ( DEBUG ) {
+            System.out.println("DEBUG>> Emitting last window at "+normal_context.getStart()+"-"+normal_context.getStop());
+        }
+        if ( call_somatic ) emit_somatic(1000000000, true);
+        else emit(1000000000,true); // emit everything we might have left
+
+        if ( metricsWriter != null ) {
+            metricsWriter.println(String.format("Normal calls made     %d", normalCallsMade));
+            metricsWriter.println(String.format("Tumor calls made      %d", tumorCallsMade));
+            metricsWriter.close();
+        }
+
+        try {
+            if ( bedWriter != null ) bedWriter.close();
+            if ( verboseWriter != null ) verboseWriter.close();
+        } catch (IOException e) {
+            System.out.println("Failed to close output BED file gracefully, data may be lost");
+            e.printStackTrace();
+        }
+        super.onTraversalDone(result);
+    }
+
+    @Override
+    public Integer reduce(Integer value, Integer sum) {
+        if ( value == -1 ) {
+            onTraversalDone(sum);
+            System.exit(1);
+        }
+        sum += value;
+        return sum;
+    }
+
+    @Override
+    public Integer reduceInit() {
+        return new Integer(0);
+    }
+
+
+        static class IndelVariant {
+            public static enum Type { I, D};
+            private String bases;
+            private Type type;
+            private ArrayList fromStartOffsets = null;
+            private ArrayList fromEndOffsets = null;
+
+            private Set reads = new HashSet(); // keep track of reads that have this indel
+            private Set samples = new HashSet();   // which samples had the indel described by this object
+
+            public IndelVariant(ExpandedSAMRecord read , Type type, String bases) {
+                this.type = type;
+                this.bases = bases.toUpperCase();
+                addObservation(read);
+                fromStartOffsets = new ArrayList();
+                fromEndOffsets = new ArrayList();
+            }
+
+            /** Adds another observation for the current indel. It is assumed that the read being registered
+             * does contain the observation, no checks are performed. Read's sample is added to the list of samples
+             * this indel was observed in as well.
+             * @param read
+             */
+            public void addObservation(ExpandedSAMRecord read) {
+                if ( reads.contains(read) ) {
+                    //TODO fix CleanedReadInjector and reinstate exception here: duplicate records may signal a problem with the bam
+                    // seeing the same read again can mean only one thing: the input bam file is corrupted and contains
+                    // duplicate records. We KNOW that this may happen for the time being due to bug in CleanedReadInjector
+                    // so this is a short-term patch: don't cry, but just ignore the duplicate record
+
+                    //throw new StingException("Attempting to add indel observation that was already registered");
+                    return;
+                }
+                reads.add(read);
+                String sample = null;
+                if ( read.getSAMRecord().getReadGroup() != null ) sample = read.getSAMRecord().getReadGroup().getSample();
+                if ( sample != null ) samples.add(sample);
+            }
+
+
+            /** Returns length of the event on the reference (number of deleted bases
+             * for deletions, -1 for insertions.
+             * @return
+             */
+            public int lengthOnRef() {
+                if ( type == Type.D ) return bases.length();
+                else return 0;
+            }
+
+
+            public void addSample(String sample) {
+                if ( sample != null )
+                samples.add(sample);
+            }
+
+            public void addReadPositions(int fromStart, int fromEnd) {
+                fromStartOffsets.add(fromStart);
+                fromEndOffsets.add(fromEnd);
+            }
+
+            public List getOffsetsFromStart() { return fromStartOffsets ; }
+            public List getOffsetsFromEnd() { return fromEndOffsets; }
+
+            public String getSamples() {
+                StringBuffer sb = new StringBuffer();
+                Iterator i = samples.iterator();
+                while ( i.hasNext() ) {
+                    sb.append(i.next());
+                    if ( i.hasNext() )
+                        sb.append(",");
+                }
+                return sb.toString();
+            }
+
+            public Set getReadSet() { return reads; }
+
+            public int getCount() { return reads.size(); }
+
+            public String getBases() { return bases; }
+
+            public Type getType() { return type; }
+
+            @Override
+            public boolean equals(Object o) {
+                if ( ! ( o instanceof IndelVariant ) ) return false;
+                IndelVariant that = (IndelVariant)o;
+                return ( this.type == that.type && this.bases.equals(that.bases) );
+            }
+
+            public boolean equals(Type type, String bases) {
+                return ( this.type == type && this.bases.equals(bases.toUpperCase()) );
+            }
+        }
+
+    /**
+     * Utility class that encapsulates the logic related to collecting all the stats and counts required to
+     * make (or discard) a call, as well as the calling heuristics that uses those data.
+      */
+    class IndelPrecall {
+//        private boolean DEBUG = false;
+        private int NQS_MISMATCH_CUTOFF = 1000000;
+        private double AV_MISMATCHES_PER_READ = 1.5;
+
+        private int nqs = 0;
+        private IndelVariant consensus_indel = null; // indel we are going to call
+        private long pos = -1 ; // position on the ref
+        private int total_coverage = 0; // total number of reads overlapping with the event
+        private int consensus_indel_count = 0; // number of reads, in which consensus indel was observed
+        private int all_indel_count = 0 ; // number of reads, in which any indel was observed at current position
+
+        private int total_mismatches_in_nqs_window = 0; // total number of mismatches in the nqs window around the indel
+        private int total_bases_in_nqs_window = 0; // total number of bases in the nqs window (some reads may not fully span the window so it's not coverage*nqs_size)
+        private int total_base_qual_in_nqs_window = 0; // sum of qualitites of all the bases in the nqs window
+        private int total_mismatching_base_qual_in_nqs_window = 0; // sum of qualitites of all mismatching bases in the nqs window
+
+        private int indel_read_mismatches_in_nqs_window = 0;   // mismatches inside the nqs window in indel-containing reads only
+        private int indel_read_bases_in_nqs_window = 0;  // number of bases in the nqs window from indel-containing reads only
+        private int indel_read_base_qual_in_nqs_window = 0; // sum of qualitites of bases in nqs window from indel-containing reads only
+        private int indel_read_mismatching_base_qual_in_nqs_window = 0; // sum of qualitites of mismatching bases in the nqs window from indel-containing reads only
+
+
+        private int consensus_indel_read_mismatches_in_nqs_window = 0; // mismatches within the nqs window from consensus indel reads only
+        private int consensus_indel_read_bases_in_nqs_window = 0;  // number of bases in the nqs window from consensus indel-containing reads only
+        private int consensus_indel_read_base_qual_in_nqs_window = 0; // sum of qualitites of bases in nqs window from consensus indel-containing reads only
+        private int consensus_indel_read_mismatching_base_qual_in_nqs_window = 0; // sum of qualitites of mismatching bases in the nqs window from consensus indel-containing reads only
+
+
+        private double consensus_indel_read_total_mm = 0.0; // sum of all mismatches in reads that contain consensus indel
+        private double all_indel_read_total_mm = 0.0; // sum of all mismatches in reads that contain any indel at given position
+        private double all_read_total_mm = 0.0; // sum of all mismatches in all reads
+
+        private double consensus_indel_read_total_mapq = 0.0; // sum of mapping qualitites of all reads with consensus indel
+        private double all_indel_read_total_mapq = 0.0 ; // sum of mapping qualitites of all reads with (any) indel at current position
+        private double all_read_total_mapq = 0.0; // sum of all mapping qualities of all reads
+
+        private PrimitivePair.Int consensus_indel_read_orientation_cnt = new PrimitivePair.Int();
+        private PrimitivePair.Int all_indel_read_orientation_cnt = new PrimitivePair.Int();
+        private PrimitivePair.Int all_read_orientation_cnt = new PrimitivePair.Int();
+
+        private int from_start_median = 0;
+        private int from_start_mad = 0;
+        private int from_end_median = 0;
+        private int from_end_mad = 0;
+
+        /** Makes an empty call (no-call) with all stats set to 0
+         *
+         * @param position
+         */
+        public IndelPrecall(long position) {
+            this.pos = position;
+        }
+
+        public IndelPrecall(WindowContext context, long position, int nqs_width) {
+            this.pos = position;
+            this.nqs = nqs_width;
+            total_coverage = context.coverageAt(pos,true);
+            List variants = context.indelsAt(pos);
+            findConsensus(variants);
+
+            // pos is the first base after the event: first deleted base or first base after insertion.
+            // hence, [pos-nqs, pos+nqs-1] (inclusive) is the window with nqs bases on each side of a no-event or an insertion
+            // and [pos-nqs, pos+Ndeleted+nqs-1] is the window with nqs bases on each side of a deletion.
+            // we initialize the nqs window for no-event/insertion case
+            long left = Math.max( pos-nqs, context.getStart() );
+            long right = Math.min(pos+nqs-1, context.getStop());
+//if ( pos == 3534096 ) System.out.println("pos="+pos +" total reads: "+context.getReads().size());
+            Iterator read_iter = context.getReads().iterator();
+
+
+            while ( read_iter.hasNext() ) {
+                ExpandedSAMRecord rec = read_iter.next();
+                SAMRecord read = rec.getSAMRecord();
+                byte[] flags = rec.getExpandedMMFlags();
+                byte[] quals = rec.getExpandedQuals();
+                int mm = rec.getMMCount();
+
+
+                if( read.getAlignmentStart() > pos || read.getAlignmentEnd() < pos ) continue;
+
+                long local_right = right; // end of nqs window for this particular read. May need to be advanced further right
+                                          // if read has a deletion. The gap in the middle of nqs window will be skipped
+                                          // automatically since flags/quals are set to -1 there
+
+                boolean read_has_a_variant = false;
+                boolean read_has_consensus = ( consensus_indel!= null && consensus_indel.getReadSet().contains(rec) );
+                for ( IndelVariant v : variants ) {
+                    if ( v.getReadSet().contains(rec) ) {
+                        read_has_a_variant = true;
+                        local_right += v.lengthOnRef();
+                        break;
+                    }
+                }
+
+                if ( read_has_consensus ) {
+                    consensus_indel_read_total_mm += mm;
+                    consensus_indel_read_total_mapq += read.getMappingQuality();
+                    if ( read.getReadNegativeStrandFlag() ) consensus_indel_read_orientation_cnt.second++;
+                    else consensus_indel_read_orientation_cnt.first++;
+                }
+                if ( read_has_a_variant ) {
+                    all_indel_read_total_mm += mm;
+                    all_indel_read_total_mapq += read.getMappingQuality();
+                    if ( read.getReadNegativeStrandFlag() ) all_indel_read_orientation_cnt.second++;
+                    else all_indel_read_orientation_cnt.first++;
+                }
+
+                all_read_total_mm+= mm;
+                all_read_total_mapq += read.getMappingQuality();
+                if ( read.getReadNegativeStrandFlag() ) all_read_orientation_cnt.second++;
+                else all_read_orientation_cnt.first++;
+
+                for ( int pos_in_flags = Math.max((int)(left - read.getAlignmentStart()),0);
+                      pos_in_flags <= Math.min((int)local_right-read.getAlignmentStart(),flags.length - 1);
+                       pos_in_flags++) {
+
+                        if ( flags[pos_in_flags] == -1 ) continue; // gap (deletion), skip it; we count only bases aligned to the ref
+                        total_bases_in_nqs_window++;
+                        if ( read_has_consensus ) consensus_indel_read_bases_in_nqs_window++;
+                        if ( read_has_a_variant ) indel_read_bases_in_nqs_window++;
+
+                        if ( quals[pos_in_flags] != -1 ) {
+
+                            total_base_qual_in_nqs_window += quals[pos_in_flags];
+                            if ( read_has_a_variant ) indel_read_base_qual_in_nqs_window += quals[pos_in_flags];
+                            if ( read_has_consensus ) consensus_indel_read_base_qual_in_nqs_window += quals[pos_in_flags];
+                        }
+
+                        if ( flags[pos_in_flags] == 1 ) { // it's a mismatch
+                            total_mismatches_in_nqs_window++;
+                            total_mismatching_base_qual_in_nqs_window += quals[pos_in_flags];
+
+                            if ( read_has_consensus ) {
+                                consensus_indel_read_mismatches_in_nqs_window++;
+                                consensus_indel_read_mismatching_base_qual_in_nqs_window += quals[pos_in_flags];
+                            }
+                            
+                            if ( read_has_a_variant ) {
+                                indel_read_mismatches_in_nqs_window++;
+                                indel_read_mismatching_base_qual_in_nqs_window += quals[pos_in_flags];
+                            }
+                        }
+                }
+//         if ( pos == 3534096 ) {
+//             System.out.println(read.getReadName());
+//             System.out.println(" cons nqs bases="+consensus_indel_read_bases_in_nqs_window);
+//             System.out.println(" qual sum="+consensus_indel_read_base_qual_in_nqs_window);
+//         }
+
+            }
+
+            // compute median/mad for offsets from the read starts/ends
+            if ( consensus_indel != null ) {
+                from_start_median = median(consensus_indel.getOffsetsFromStart()) ;
+                from_start_mad = mad(consensus_indel.getOffsetsFromStart(),from_start_median);
+                from_end_median = median(consensus_indel.getOffsetsFromEnd()) ;
+                from_end_mad = mad(consensus_indel.getOffsetsFromEnd(),from_end_median);   
+            }
+        }
+
+        /** As a side effect will sort l!
+         *
+         * @param l
+         * @return
+         */
+        private int median(List l) {
+            Collections.sort(l);
+            int k = l.size()/2;
+            return ( l.size() % 2 == 0 ?
+                  (l.get(k-1).intValue()+l.get(k).intValue())/2 :
+                   l.get(k).intValue());
+        }
+
+        private int median(int[] l) {
+            Arrays.sort(l);
+            int k = l.length/2;
+            return ( l.length % 2 == 0 ?
+                  (l[k-1]+l[k])/2 :
+                   l[k]);
+        }
+
+        private int mad(List l, int med) {
+            int [] diff = new int[l.size()];
+            for ( int i = 0; i < l.size(); i++ ) {
+                   diff[i] = Math.abs(l.get(i).intValue() - med);
+            }
+            return median(diff);
+        }
+
+        public long getPosition() { return pos; }
+
+        public boolean hasObservation() { return consensus_indel != null; }
+
+        public int getCoverage() { return total_coverage; }
+
+        public double getTotalMismatches() { return all_read_total_mm; }
+        public double getConsensusMismatches() { return consensus_indel_read_total_mm; }
+        public double getAllVariantMismatches() { return all_indel_read_total_mm; }
+
+        /** Returns average number of mismatches per consensus indel-containing read */
+        public double getAvConsensusMismatches() {
+            return ( consensus_indel_count != 0 ? consensus_indel_read_total_mm/consensus_indel_count : 0.0 );
+        }
+
+        /** Returns average number of mismatches per read across all reads matching the ref (not containing any indel variants) */
+        public double getAvRefMismatches() {
+            int coverage_ref = total_coverage-all_indel_count;
+            return ( coverage_ref != 0 ? (all_read_total_mm - all_indel_read_total_mm )/coverage_ref : 0.0 );
+        }
+
+        public PrimitivePair.Int getConsensusStrandCounts() {
+            return consensus_indel_read_orientation_cnt;
+        }
+
+        public PrimitivePair.Int getRefStrandCounts() {
+            return new PrimitivePair.Int(all_read_orientation_cnt.first-all_indel_read_orientation_cnt.first,
+                                         all_read_orientation_cnt.second - all_indel_read_orientation_cnt.second);
+        }
+
+        /** Returns a sum of mapping qualities of all reads spanning the event. */
+        public double getTotalMapq() { return all_read_total_mapq; }
+
+        /** Returns a sum of mapping qualities of all reads, in which the consensus variant is observed. */
+        public double getConsensusMapq() { return consensus_indel_read_total_mapq; }
+
+        /** Returns a sum of mapping qualities of all reads, in which any variant is observed at the current event site. */
+        public double getAllVariantMapq() { return all_indel_read_total_mapq; }
+
+        /** Returns average mapping quality per consensus indel-containing read. */
+        public double getAvConsensusMapq() {
+            return ( consensus_indel_count != 0 ? consensus_indel_read_total_mapq/consensus_indel_count : 0.0 );
+        }
+
+        /** Returns average number of mismatches per read across all reads matching the ref (not containing any indel variants). */
+        public double getAvRefMapq() {
+            int coverage_ref = total_coverage-all_indel_count;
+            return ( coverage_ref != 0 ? (all_read_total_mapq - all_indel_read_total_mapq )/coverage_ref : 0.0 );
+        }
+
+        /** Returns fraction of bases in NQS window around the indel that are mismatches, across all reads,
+         * in which consensus indel is observed. NOTE: NQS window for indel containing reads is defined around
+         * the indel itself (e.g. for a 10-base deletion spanning [X,X+9], the 5-NQS window is {[X-5,X-1],[X+10,X+15]}
+         * */
+        public double getNQSConsensusMMRate() {
+            if ( consensus_indel_read_bases_in_nqs_window == 0 ) return 0;
+            return ((double)consensus_indel_read_mismatches_in_nqs_window)/consensus_indel_read_bases_in_nqs_window;
+        }
+
+        /** Returns fraction of bases in NQS window around the indel start position that are mismatches, across all reads
+         * that align to the ref (i.e. contain no indel observation at the current position). NOTE: NQS window for ref
+         * reads is defined around the event start position, NOT around the actual consensus indel.
+         * */
+        public double getNQSRefMMRate() {
+            int num_ref_bases = total_bases_in_nqs_window - indel_read_bases_in_nqs_window;
+            if ( num_ref_bases == 0 ) return 0;
+            return ((double)(total_mismatches_in_nqs_window - indel_read_mismatches_in_nqs_window))/num_ref_bases;
+        }
+
+        /** Returns average base quality in NQS window around the indel, across all reads,
+         * in which consensus indel is observed. NOTE: NQS window for indel containing reads is defined around
+         * the indel itself (e.g. for a 10-base deletion spanning [X,X+9], the 5-NQS window is {[X-5,X-1],[X+10,X+15]}
+         * */
+        public double getNQSConsensusAvQual() {
+            if ( consensus_indel_read_bases_in_nqs_window == 0 ) return 0;
+            return ((double)consensus_indel_read_base_qual_in_nqs_window)/consensus_indel_read_bases_in_nqs_window;
+        }
+
+        /** Returns fraction of bases in NQS window around the indel start position that are mismatches, across all reads
+         * that align to the ref (i.e. contain no indel observation at the current position). NOTE: NQS window for ref
+         * reads is defined around the event start position, NOT around the actual consensus indel.
+         * */
+        public double getNQSRefAvQual() {
+            int num_ref_bases = total_bases_in_nqs_window - indel_read_bases_in_nqs_window;
+            if ( num_ref_bases == 0 ) return 0;
+            return ((double)(total_base_qual_in_nqs_window - indel_read_base_qual_in_nqs_window))/num_ref_bases;
+        }
+
+        public int getTotalNQSMismatches() { return total_mismatches_in_nqs_window; }
+
+        public int getAllVariantCount() { return all_indel_count; }
+        public int getConsensusVariantCount() { return consensus_indel_count; }
+
+//        public boolean failsNQSMismatch() {
+//            //TODO wrong fraction: mismatches are counted only in indel-containing reads, but total_coverage is used!
+//            return ( indel_read_mismatches_in_nqs_window > NQS_MISMATCH_CUTOFF ) ||
+//                    ( indel_read_mismatches_in_nqs_window > total_coverage * AV_MISMATCHES_PER_READ );
+//        }
+
+        public IndelVariant getVariant() { return consensus_indel; }
+
+        public boolean isCall() {
+            boolean ret =  ( consensus_indel_count >= minIndelCount &&
+                    (double)consensus_indel_count > minFraction * total_coverage &&
+                    (double) consensus_indel_count > minConsensusFraction*all_indel_count && total_coverage >= minCoverage);
+            if ( DEBUG && ! ret ) System.out.println("DEBUG>>  NOT a call: count="+consensus_indel_count+
+                        " total_count="+all_indel_count+" cov="+total_coverage+
+                " minConsensusF="+((double)consensus_indel_count)/all_indel_count+
+                    " minF="+((double)consensus_indel_count)/total_coverage);
+            return ret;
+
+        }
+
+        /** Utility method: finds the indel variant with the largest count (ie consensus) among all the observed
+         * variants, and sets the counts of consensus observations and all observations of any indels (including non-consensus)
+         * @param variants
+         * @return
+         */
+        private void findConsensus(List variants) {
+            for ( IndelVariant var : variants ) {
+                if ( DEBUG ) System.out.println("DEBUG>> Variant "+var.getBases()+" (cnt="+var.getCount()+")");
+                int cnt = var.getCount();
+                all_indel_count +=cnt;
+                if ( cnt > consensus_indel_count ) {
+                    consensus_indel = var;
+                    consensus_indel_count = cnt;
+                }
+            }
+            if ( DEBUG && consensus_indel != null ) System.out.println("DEBUG>> Returning: "+consensus_indel.getBases()+
+                    " (cnt="+consensus_indel.getCount()+") with total count of "+all_indel_count);
+        }
+
+
+
+        public void printBedLine(Writer bed) {
+            int event_length;
+            if ( consensus_indel == null ) event_length = 0;
+            else {
+                event_length = consensus_indel.lengthOnRef();
+                if ( event_length < 0 ) event_length = 0;
+            }
+
+            StringBuffer message = new StringBuffer();
+            message.append(refName+"\t"+(pos-1)+"\t");
+            message.append((pos-1+event_length)+"\t");
+            if ( consensus_indel != null ) {
+                message.append((event_length>0? "-":"+")+consensus_indel.getBases());
+            } else {
+                message.append('.');
+            }
+            message.append(":"+all_indel_count+"/"+total_coverage);
+            try {
+                bed.write(message.toString()+"\n");
+            } catch (IOException e) {
+               throw new UserException.CouldNotCreateOutputFile(bedOutput, "Error encountered while writing into output BED file", e);
+            }
+        }
+
+        public String makeEventString() {
+            int event_length;
+            if ( consensus_indel == null ) event_length = 0;
+            else {
+                event_length = consensus_indel.lengthOnRef();
+                if ( event_length < 0 ) event_length = 0;
+            }
+            StringBuffer message = new StringBuffer();
+            message.append(refName);
+            message.append('\t');
+            message.append(pos-1);
+            message.append('\t');
+            message.append(pos-1+event_length);
+            message.append('\t');
+            if ( consensus_indel != null ) {
+                message.append((event_length>0?'-':'+'));
+                message.append(consensus_indel.getBases());
+            } else {
+                message.append('.');
+            }
+            return message.toString();
+        }
+
+        public String makeStatsString(String prefix) {
+             StringBuilder message = new StringBuilder();
+             message.append(prefix+"OBS_COUNTS[C/A/T]:"+getConsensusVariantCount()+"/"+getAllVariantCount()+"/"+getCoverage());
+             message.append('\t');
+             message.append(prefix+"AV_MM[C/R]:"+String.format("%.2f/%.2f",getAvConsensusMismatches(),
+                                 getAvRefMismatches()));
+             message.append('\t');
+             message.append(prefix+"AV_MAPQ[C/R]:"+String.format("%.2f/%.2f",getAvConsensusMapq(),
+                                getAvRefMapq()));
+             message.append('\t');
+             message.append(prefix+"NQS_MM_RATE[C/R]:"+String.format("%.2f/%.2f",getNQSConsensusMMRate(),getNQSRefMMRate()));
+             message.append('\t');
+             message.append(prefix+"NQS_AV_QUAL[C/R]:"+String.format("%.2f/%.2f",getNQSConsensusAvQual(),getNQSRefAvQual()));
+
+             PrimitivePair.Int strand_cons = getConsensusStrandCounts();
+             PrimitivePair.Int strand_ref = getRefStrandCounts();
+             message.append('\t');
+             message.append(prefix+"STRAND_COUNTS[C/C/R/R]:"+strand_cons.first+"/"+strand_cons.second+"/"+strand_ref.first+"/"+strand_ref.second);
+
+             message.append('\t');
+             message.append(prefix+"OFFSET_RSTART:"+from_start_median+"/"+from_start_mad);
+             message.append('\t');
+             message.append(prefix+"OFFSET_REND:"+from_end_median+"/"+from_end_mad);
+
+             return message.toString();
+
+         }
+
+        /**
+         * Places alignment statistics into attribute map and returns the map. If attr parameter is null,
+         * a new map is allocated, filled and returned. If attr is not null, new attributes are added to that
+         * preexisting map, and the same instance of the (updated) map is returned.
+         *
+         * @param attr
+         * @return
+         */
+        public Map makeStatsAttributes(Map attr) {
+             if ( attr == null ) attr = new HashMap();
+
+             VCFIndelAttributes.recordDepth(getConsensusVariantCount(),getAllVariantCount(),getCoverage(),attr);
+
+             VCFIndelAttributes.recordAvMM(getAvConsensusMismatches(),getAvRefMismatches(),attr);
+
+             VCFIndelAttributes.recordAvMapQ(getAvConsensusMapq(),getAvRefMapq(),attr);
+
+             VCFIndelAttributes.recordNQSMMRate(getNQSConsensusMMRate(),getNQSRefMMRate(),attr);
+
+             VCFIndelAttributes.recordNQSAvQ(getNQSConsensusAvQual(),getNQSRefAvQual(),attr);
+
+             VCFIndelAttributes.recordOffsetFromStart(from_start_median,from_start_mad,attr);
+
+             VCFIndelAttributes.recordOffsetFromEnd(from_end_median,from_end_mad,attr);
+
+             PrimitivePair.Int strand_cons = getConsensusStrandCounts();
+             PrimitivePair.Int strand_ref = getRefStrandCounts();
+
+             VCFIndelAttributes.recordStrandCounts(strand_cons.first,strand_cons.second,strand_ref.first,strand_ref.second,attr);
+             return attr;
+         }
+    }
+
+    interface IndelListener {
+        public void addObservation(int pos, IndelVariant.Type t, String bases, int fromStart, int fromEnd, ExpandedSAMRecord r);
+    }
+
+    class WindowContext implements IndelListener {
+            private Set reads;
+            private int start=0; // where the window starts on the ref, 1-based
+            private CircularArray< List< IndelVariant > > indels;
+
+            private List emptyIndelList = new ArrayList();
+
+
+            public WindowContext(int start, int length) {
+                this.start = start;
+                indels = new CircularArray< List >(length);
+//                reads = new LinkedList();
+                reads = new HashSet();
+            }
+
+            /** Returns 1-based reference start position of the interval this object keeps context for.
+             *
+             * @return
+             */
+            public int getStart() { return start; }
+
+            /** Returns 1-based reference stop position (inclusive) of the interval this object keeps context for.
+             *
+             * @return
+             */
+            public int getStop() { return start + indels.length() - 1; }
+
+            /** Resets reference start position to 0 and clears the context.
+             *
+             */
+            public void clear() {
+                start = 0;
+                reads.clear();
+                indels.clear();
+            }
+
+        /**
+         * Returns true if any indel observations are present in the specified interval
+         * [begin,end] (1-based, inclusive). Interval can be partially of fully outside of the
+         * current context window: positions outside of the window will be ignored.
+         * @param begin
+         * @param end
+         */
+            public boolean hasIndelsInInterval(long begin, long end) {
+                for ( long k = Math.max(start,begin); k < Math.min(getStop(),end); k++ ) {
+                    if ( indelsAt(k) != emptyIndelList ) return true;
+                }
+                return false;               
+            }
+
+            public Set getReads() { return reads; }
+
+            /** Returns the number of reads spanning over the specified reference position                                                                                                       
+             * (regardless of whether they have a base or indel at that specific location).
+             * The second argument controls whether to count with indels in mind (this is relevant for insertions only,
+             * deletions do not require any special treatment since they occupy non-zero length on the ref and since
+             * alignment can not start or end with a deletion). For insertions, note that, internally, we assign insertions
+             * to the reference position right after the actual event, and we count all events assigned to a given position.
+             * This count (reads with indels) should be contrasted to reads without indels, or more rigorously, reads
+             * that support the ref rather than the indel. Few special cases may occur here:
+             * 1) an alignment that ends (as per getAlignmentEnd()) right before the current position but has I as its
+             * last element: we have to count that read into the "coverage" at the current position for the purposes of indel
+             * assessment, as the indel in that read will be counted at the current position, so the total coverage
+             * should be consistent with that.
+             */
+             /* NOT IMPLEMENTED: 2) alsignments that start exactly at the current position do not count for the purpose of insertion
+             * assessment since they do not contribute any evidence to either Ref or Alt=insertion hypothesis, unless
+             * the alignment starts with I (so that we do have evidence for an indel assigned to the current position and
+             * read should be counted). For deletions, reads starting at the current position should always be counted (as they
+             * show no deletion=ref).
+             * @param refPos position on the reference; must be within the bounds of the window
+             */
+            public int coverageAt(final long refPos, boolean countForIndels) {
+                int cov = 0;
+                for ( ExpandedSAMRecord read : reads ) {
+                    if ( read.getSAMRecord().getAlignmentStart() > refPos || read.getSAMRecord().getAlignmentEnd() < refPos ) {
+                        if ( countForIndels && read.getSAMRecord().getAlignmentEnd() == refPos - 1) {
+                            Cigar c = read.getSAMRecord().getCigar();
+                            if ( c.getCigarElement(c.numCigarElements()-1).getOperator() == CigarOperator.I ) cov++;
+                        }
+                        continue;
+                    }
+                    cov++;
+                } 
+                return cov;
+            }
+
+
+            /** Shifts current window to the right along the reference contig by the specified number of bases.
+             * The context will be updated accordingly (indels and reads that go out of scope will be dropped).
+             * @param offset
+             */
+            public void shift(int offset) {
+                start += offset;
+
+                indels.shiftData(offset);
+                if ( indels.get(0) != null && indels.get(0).size() != 0 ) {
+                    IndelVariant indel =  indels.get(0).get(0);
+
+                    System.out.println("WARNING: Indel(s) at first position in the window ("+refName+":"+start+"): currently not supported: "+
+                    (indel.getType()==IndelVariant.Type.I?"+":"-")+indel.getBases()+"; read: "+indel.getReadSet().iterator().next().getSAMRecord().getReadName()+"; site ignored");
+                    indels.get(0).clear();
+//                    throw new StingException("Indel found at the first position ("+start+") after a shift was performed: currently not supported: "+
+//                    (indel.getType()==IndelVariant.Type.I?"+":"-")+indel.getBases()+"; reads: "+indel.getReadSet().iterator().next().getSAMRecord().getReadName());
+                }
+                
+                Iterator read_iter = reads.iterator();
+
+                while ( read_iter.hasNext() ) {
+                    ExpandedSAMRecord r = read_iter.next();
+                    if ( r.getSAMRecord().getAlignmentEnd() < start ) { // discard reads and associated data that went out of scope
+                        read_iter.remove();
+                    }
+                }
+            }
+
+            public void add(SAMRecord read, byte [] ref) {
+
+                if ( read.getAlignmentStart() < start ) return; // silently ignore reads starting before the window start
+
+                ExpandedSAMRecord er = new ExpandedSAMRecord(read,ref,read.getAlignmentStart()-start,this);
+                //TODO duplicate records may actually indicate a problem with input bam file; throw an exception when the bug in CleanedReadInjector is fixed
+                if ( reads.contains(er)) return; // ignore duplicate records
+                reads.add(er);
+            }
+
+            public void addObservation(int pos, IndelVariant.Type type, String bases, int fromStart, int fromEnd, ExpandedSAMRecord rec) {
+                List indelsAtSite;
+                try {
+                    indelsAtSite = indels.get(pos);
+                } catch (IndexOutOfBoundsException e) {
+                    SAMRecord r = rec.getSAMRecord();
+                    System.out.println("Failed to add indel observation, probably out of coverage window bounds (trailing indel?):\nRead "+
+                            r.getReadName()+": "+
+                        "read length="+r.getReadLength()+"; cigar="+r.getCigarString()+"; start="+
+                        r.getAlignmentStart()+"; end="+r.getAlignmentEnd()+"; window start="+getStart()+
+                        "; window end="+getStop());
+                    throw e;
+                }
+
+                if ( indelsAtSite == null ) {
+                    indelsAtSite = new ArrayList();
+                    indels.set(pos, indelsAtSite);
+                }
+
+                IndelVariant indel = null;
+                for ( IndelVariant v : indelsAtSite ) {
+                    if ( ! v.equals(type, bases) ) continue;
+
+                    indel = v;
+                    indel.addObservation(rec);
+                    break;
+                }
+                
+                if ( indel == null ) {  // not found:
+                    indel = new IndelVariant(rec, type, bases);
+                    indelsAtSite.add(indel);
+                }
+                indel.addReadPositions(fromStart,fromEnd);
+            }
+
+            public List indelsAt( final long refPos ) {
+                List l = indels.get((int)( refPos - start ));
+                if ( l == null ) return emptyIndelList;
+                else return l;
+            }
+
+
+        }
+
+
+    class ExpandedSAMRecord {
+        private SAMRecord read;
+        private byte[] mismatch_flags;
+        private byte[] expanded_quals;
+        private int mms;
+
+        public ExpandedSAMRecord(SAMRecord r, byte [] ref, long offset, IndelListener l) {
+
+            read = r;
+            final long rStart = read.getAlignmentStart();
+            final long rStop = read.getAlignmentEnd();
+            final byte[] readBases = read.getReadString().toUpperCase().getBytes();
+
+            ref = new String(ref).toUpperCase().getBytes();
+
+            mismatch_flags = new byte[(int)(rStop-rStart+1)];
+            expanded_quals = new byte[(int)(rStop-rStart+1)];
+
+            // now let's extract indels:
+
+            Cigar c = read.getCigar();
+            final int nCigarElems = c.numCigarElements();
+
+
+            int readLength = 0; // length of the aligned part of the read NOT counting clipped bases
+            for ( CigarElement cel : c.getCigarElements() ) {
+
+                switch(cel.getOperator()) {
+                case H:
+                case S:
+                case D:
+                case N:
+                case P:
+                    break; // do not count gaps or clipped bases
+                case I:
+                case M:
+                    readLength += cel.getLength();
+                    break; // advance along the gapless block in the alignment
+                default :
+                    throw new IllegalArgumentException("Unexpected operator in cigar string: "+cel.getOperator());
+                }
+            }
+
+            int fromStart = 0;
+            int posOnRead = 0;
+            int posOnRef = 0; // the chunk of reference ref[] that we have access to is aligned with the read:
+                                  // its start on the actual full reference contig is r.getAlignmentStart()
+            for ( int i = 0 ; i < nCigarElems ; i++ ) {
+
+                final CigarElement ce = c.getCigarElement(i);
+                IndelVariant.Type type = null;
+                String indel_bases = null;
+                int eventPosition = posOnRef;
+
+                switch(ce.getOperator()) {
+                case H: break; // hard clipped reads do not have clipped indel_bases in their sequence, so we just ignore the H element...
+                case I:
+                    type = IndelVariant.Type.I;
+                    indel_bases = read.getReadString().substring(posOnRead,posOnRead+ce.getLength());
+                    // will increment position on the read below, there's no 'break' statement yet...
+                case S:
+                    // here we also skip soft-clipped indel_bases on the read; according to SAM format specification,
+                    // alignment start position on the reference points to where the actually aligned
+                    // (not clipped) indel_bases go, so we do not need to increment reference position here
+                    posOnRead += ce.getLength();
+                    break;
+                case D:
+                    type = IndelVariant.Type.D;
+                    indel_bases = new String( ref, posOnRef, ce.getLength() );
+                    for( int k = 0 ; k < ce.getLength(); k++, posOnRef++ ) mismatch_flags[posOnRef] = expanded_quals[posOnRef] = -1;
+
+                    break;
+                case M:
+                    for ( int k = 0; k < ce.getLength(); k++, posOnRef++, posOnRead++ ) {
+                        if ( readBases[posOnRead] != ref[posOnRef] )  { // mismatch!
+                            mms++;
+                            mismatch_flags[posOnRef] = 1;
+                        }
+                        expanded_quals[posOnRef] = read.getBaseQualities()[posOnRead];
+                    }
+                    fromStart += ce.getLength();
+                    break; // advance along the gapless block in the alignment
+                default :
+                    throw new IllegalArgumentException("Unexpected operator in cigar string: "+ce.getOperator());
+                }
+
+                if ( type == null ) continue; // element was not an indel, go grab next element...
+
+                // we got an indel if we are here...
+                if ( i == 0 ) logger.debug("Indel at the start of the read "+read.getReadName());
+                if ( i == nCigarElems - 1) logger.debug("Indel at the end of the read "+read.getReadName());
+
+                // note that here we will be assigning indels to the first deleted base or to the first
+                // base after insertion, not to the last base before the event!
+                int fromEnd = readLength - fromStart;
+                if ( type == IndelVariant.Type.I ) fromEnd -= ce.getLength();
+
+                l.addObservation((int)(offset+eventPosition), type, indel_bases, fromStart, fromEnd, this);
+
+                if ( type == IndelVariant.Type.I ) fromStart += ce.getLength();
+
+            }
+        }
+
+        public SAMRecord getSAMRecord() { return read; }
+
+        public byte [] getExpandedMMFlags() { return mismatch_flags; }
+
+        public byte [] getExpandedQuals() { return expanded_quals; }
+
+        public int getMMCount() { return mms; }
+
+        public boolean equals(Object o) {
+            if ( this == o ) return true;
+            if ( read == null ) return false;
+            if ( o instanceof SAMRecord ) return read.equals(o);
+            if ( o instanceof ExpandedSAMRecord ) return read.equals(((ExpandedSAMRecord)o).read);
+            return false;
+        }
+
+
+    }
+
+}
+
+
+class VCFIndelAttributes {
+    public static String ALLELIC_DEPTH_KEY = "AD";
+    public static String DEPTH_TOTAL_KEY = VCFConstants.DEPTH_KEY;
+
+    public static String MAPQ_KEY = "MQS";
+
+    public static String MM_KEY = "MM";
+
+    public static String NQS_MMRATE_KEY = "NQSMM";
+
+    public static String NQS_AVQ_KEY = "NQSBQ";
+
+    public static String STRAND_COUNT_KEY = "SC";
+    public static String RSTART_OFFSET_KEY = "RStart";
+    public static String REND_OFFSET_KEY = "REnd";
+
+    public static Set getAttributeHeaderLines() {
+        Set lines = new HashSet();
+
+        lines.add(new VCFFormatHeaderLine(ALLELIC_DEPTH_KEY, 2, VCFHeaderLineType.Integer, "# of reads supporting consensus indel/reference at the site"));
+        lines.add(new VCFFormatHeaderLine(DEPTH_TOTAL_KEY, 1, VCFHeaderLineType.Integer, "Total coverage at the site"));
+
+        lines.add(new VCFFormatHeaderLine(MAPQ_KEY, 2, VCFHeaderLineType.Float, "Average mapping qualities of consensus indel-supporting reads/reference-supporting reads"));
+
+        lines.add(new VCFFormatHeaderLine(MM_KEY, 2, VCFHeaderLineType.Float, "Average # of mismatches per consensus indel-supporting read/per reference-supporting read"));
+
+        lines.add(new VCFFormatHeaderLine(NQS_MMRATE_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: fraction of mismatching bases in consensus indel-supporting reads/in reference-supporting reads"));
+
+        lines.add(new VCFFormatHeaderLine(NQS_AVQ_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: average quality of bases from consensus indel-supporting reads/from reference-supporting reads"));
+
+        lines.add(new VCFFormatHeaderLine(STRAND_COUNT_KEY, 4, VCFHeaderLineType.Integer, "Strandness: counts of forward-/reverse-aligned indel-supporting reads / forward-/reverse-aligned reference supporting reads"));
+
+        lines.add(new VCFFormatHeaderLine(RSTART_OFFSET_KEY, 2, VCFHeaderLineType.Integer, "Median/mad of indel offsets from the starts of the reads"));
+        lines.add(new VCFFormatHeaderLine(REND_OFFSET_KEY, 2, VCFHeaderLineType.Integer, "Median/mad of indel offsets from the ends of the reads"));
+
+        return lines;
+    }
+
+    public static Map recordStrandCounts(int cnt_cons_fwd, int cnt_cons_rev, int cnt_ref_fwd, int cnt_ref_rev, Map attrs) {
+        attrs.put(STRAND_COUNT_KEY, new Integer[] {cnt_cons_fwd, cnt_cons_rev, cnt_ref_fwd, cnt_ref_rev} );
+        return attrs;
+    }
+
+    public static Map recordDepth(int cnt_cons, int cnt_indel, int cnt_total, Map attrs) {
+        attrs.put(ALLELIC_DEPTH_KEY, new Integer[] {cnt_cons, cnt_indel} );
+        attrs.put(DEPTH_TOTAL_KEY, cnt_total);
+        return attrs;
+    }
+
+    public static Map recordAvMapQ(double cons, double ref, Map attrs) {
+        attrs.put(MAPQ_KEY, new Float[] {(float)cons, (float)ref} );
+        return attrs;
+    }
+
+    public static Map recordAvMM(double cons, double ref, Map attrs) {
+        attrs.put(MM_KEY, new Float[] {(float)cons, (float)ref} );
+        return attrs;
+    }
+
+    public static Map recordNQSMMRate(double cons, double ref, Map attrs) {
+        attrs.put(NQS_MMRATE_KEY, new Float[] {(float)cons, (float)ref} );
+        return attrs;
+    }
+
+    public static Map recordNQSAvQ(double cons, double ref, Map attrs) {
+        attrs.put(NQS_AVQ_KEY, new Float[] {(float)cons, (float)ref} );
+        return attrs;
+    }
+
+    public static Map recordOffsetFromStart(int median, int mad, Map attrs) {
+        attrs.put(RSTART_OFFSET_KEY, new Integer[] {median, mad} );
+        return attrs;
+    }
+
+    public static Map recordOffsetFromEnd(int median, int mad, Map attrs) {
+        attrs.put(REND_OFFSET_KEY, new Integer[] {median, mad} );
+        return attrs;
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java
deleted file mode 100755
index 9aa370d3f..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java
+++ /dev/null
@@ -1,890 +0,0 @@
-/*
- * Copyright (c) 2010, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.phasing;
-
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Output;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
-import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature;
-import org.broadinstitute.sting.gatk.walkers.*;
-import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.AminoAcid;
-import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.AminoAcidTable;
-import org.broadinstitute.sting.utils.BaseUtils;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.codecs.vcf.*;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
-
-import java.util.*;
-
-import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFromRods;
-
-
-/**
- * Walks along all variant ROD loci, and dynamically annotates alleles at MNP records.
- */
-@Allows(value = {DataSource.REFERENCE})
-@Requires(value = {DataSource.REFERENCE}, referenceMetaData = {@RMD(name = AnnotateMNPsWalker.REFSEQ_ROD_NAME, type = AnnotatorInputTableFeature.class), @RMD(name = AnnotateMNPsWalker.VARIANT_ROD_NAME, type = ReferenceOrderedDatum.class)})
-
-public class AnnotateMNPsWalker extends RodWalker {
-
-    @Output(doc = "File to which variants should be written", required = true)
-    protected VCFWriter writer = null;
-    private ManualSortingVCFWriter sortingWriter = null;
-
-    @Argument(fullName = "emitOnlyMNPs", shortName = "emitOnlyMNPs", doc = "Only output MNP records; [default:false]", required = false)
-    protected boolean emitOnlyMNPs = false;    
-
-    private LinkedList rodNames = null;
-    private GenomeLocParser locParser = null;
-    private TreeMap> MNPstartToStops = null; // Must be TreeMap sorted by START sites!
-
-    public final static String REFSEQ_ROD_NAME = "refseq";
-    public final static String VARIANT_ROD_NAME = "variant";
-
-    private LocusToFeatures locusToRefSeqFeatures = null;
-
-
-    protected final static String MNP_ANNOTATION_KEY_PREFIX = "MNP.refseq.";
-
-    protected final static String REFSEQ_NAME = "name";
-    protected final static String REFSEQ_NAME2 = "name2";
-
-    protected final static String REFSEQ_POSITION_TYPE = "positionType";
-    protected final static String REFSEQ_CDS = "CDS";
-
-    protected final static String REFSEQ_STRAND = "transcriptStrand";
-    protected final static String REFSEQ_POS_STRAND = "+";
-    protected final static String REFSEQ_NEG_STRAND = "-";
-
-    protected final static String REFSEQ_CODON_COORD = "codonCoord";
-    protected final static String REFSEQ_CODING_FRAME = "frame";
-
-    protected final static String REFSEQ_REF_CODON = "referenceCodon";
-    protected final static String REFSEQ_REF_AA = "referenceAA";
-
-    protected final static String REFSEQ_ALT_BASE = "haplotypeAlternate";
-
-    protected final static String REFSEQ_VARIANT_CODON = "variantCodon";
-    protected final static String REFSEQ_VARIANT_AA = "variantAA";
-    protected final static String REFSEQ_CHANGES_AA = "changesAA";
-    protected final static String REFSEQ_FUNCTIONAL_CLASS = "functionalClass";
-    protected final static String REFSEQ_PROTEIN_COORD_DESCRIPTION = "proteinCoordStr";
-
-    protected final static String REFSEQ_CODING_ANNOTATIONS = "codingVariants";
-    protected final static String REFSEQ_NUM_AA_CHANGES = "numAAchanges";
-    protected final static String REFSEQ_HAS_MULT_AA_CHANGES = "alleleHasMultAAchanges";
-
-    public void initialize() {
-        rodNames = new LinkedList();
-        rodNames.add(VARIANT_ROD_NAME);
-
-        locParser = getToolkit().getGenomeLocParser();
-        MNPstartToStops = new TreeMap>(); // sorted by start sites
-
-        initializeVcfWriter();
-
-        locusToRefSeqFeatures = new LocusToFeatures();
-    }
-
-    private void initializeVcfWriter() {
-        sortingWriter = new ManualSortingVCFWriter(writer);
-        writer = sortingWriter;
-
-        // setup the header fields:
-        Set hInfo = new HashSet();
-        hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
-        hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
-
-        Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames);
-        writer.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples())));
-    }
-
-    public boolean generateExtendedEvents() {
-        return false;
-    }
-
-    public Integer reduceInit() {
-        return 0;
-    }
-
-    /**
-     * For each site of interest, annotate it if it's a MNP.
-     *
-     * @param tracker the meta-data tracker
-     * @param ref     the reference base
-     * @param context the context for the given locus
-     * @return count of MNPs observed
-     */
-    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        if (tracker == null)
-            return null;
-
-        int numMNPsObserved = 0;
-        GenomeLoc curLocus = ref.getLocus();
-        clearOldLocusFeatures(curLocus);
-
-        boolean requireStartHere = false; // see EVERY site of the MNP
-        boolean takeFirstOnly = false; // take as many entries as the VCF file has
-        for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) {
-            GenomeLoc vcLoc = VariantContextUtils.getLocation(locParser, vc);
-            boolean atStartOfVc = curLocus.getStart() == vcLoc.getStart();
-            boolean atEndOfVc = curLocus.getStart() == vcLoc.getStop();
-
-            if (vc.isMNP()) {
-                logger.debug("Observed MNP at " + vcLoc);
-
-                if (isChrM(vc)) {
-                    if (atStartOfVc) {
-                        logger.warn("Skipping mitochondrial MNP at " + vcLoc + " due to complexity of coding table [need to know if first codon, etc.]...");
-                        writeVCF(vc);
-                    }
-                    continue;
-                }
-
-                GenomeLoc stopLoc = locParser.createGenomeLoc(curLocus.getContig(), vcLoc.getStop());
-                final List refSeqRODs = tracker.getReferenceMetaData(REFSEQ_ROD_NAME);
-                for (Object refSeqObject : refSeqRODs) {
-                    AnnotatorInputTableFeature refSeqAnnotation = (AnnotatorInputTableFeature) refSeqObject;
-                    locusToRefSeqFeatures.putLocusFeatures(curLocus, refSeqAnnotation, stopLoc);
-                }
-
-                if (atStartOfVc) { // MNP is starting here, so register that we're waiting for it
-                    Set stopLocs = MNPstartToStops.get(curLocus);
-                    if (stopLocs == null) {
-                        stopLocs = new HashSet();
-                        MNPstartToStops.put(curLocus, stopLocs);
-                    }
-                    stopLocs.add(stopLoc);
-                }
-
-                if (atEndOfVc) {
-                    numMNPsObserved++; // only count a MNP at its stop site
-                    logger.debug("Observed end of MNP at " + curLocus);
-                    logger.debug("Current list of per-locus features\n" + locusToRefSeqFeatures);
-
-                    Map MNPannotations = annotateMNP(vc);
-                    MNPannotations.putAll(RefSeqDataParser.removeRefSeqAttributes(vc.getAttributes())); // remove any RefSeq INFO, since adding it in more thoroughly here
-                    vc = VariantContext.modifyAttributes(vc, MNPannotations);
-                    writeVCF(vc);
-
-                    GenomeLoc startLoc = locParser.createGenomeLoc(curLocus.getContig(), vcLoc.getStart());
-                    Set stopLocs = MNPstartToStops.get(startLoc);
-                    if (stopLocs != null) { // otherwise, just removed stopLocs due to another MNP that has the same (start, stop)
-                        stopLocs.remove(stopLoc);
-                        if (stopLocs.isEmpty()) // no longer waiting for startLoc
-                            MNPstartToStops.remove(startLoc);
-                    }
-                }
-            }
-            else if (atStartOfVc && !emitOnlyMNPs) {// only want to write other VariantContexts records once (where they start):
-                writeVCF(vc);
-            }
-        }
-
-        Integer mostUpstreamWritableLoc = null;
-        if (!MNPstartToStops.isEmpty()) {
-            GenomeLoc waitingForLoc = MNPstartToStops.entrySet().iterator().next().getKey();
-            mostUpstreamWritableLoc = waitingForLoc.getStart() - 1;
-        }
-        sortingWriter.setmostUpstreamWritableLocus(mostUpstreamWritableLoc);
-
-        return numMNPsObserved;
-    }
-
-    private static boolean isChrM(final VariantContext vc) {
-        return vc.getChr().equals("chrM") || vc.getChr().equals("MT");
-    }
-
-    private Map annotateMNP(VariantContext vc) {
-        Map annotations = new HashMap();
-
-        RefSeqNameToFeatures nameToPositionalFeatures = new RefSeqNameToFeatures(vc);
-        MNPannotationKeyBuilder kb = new MNPannotationKeyBuilder(nameToPositionalFeatures);
-
-        for (Map.Entry nameToFeatureEntry : nameToPositionalFeatures.entrySet()) {
-            String featureName = nameToFeatureEntry.getKey();
-            RefSeqFeatureList feature = nameToFeatureEntry.getValue();
-            CodonAnnotationsForAltAlleles codonAnnotationsForAlleles = new CodonAnnotationsForAltAlleles(vc, feature);
-
-            annotations.put(kb.getKey(REFSEQ_CODING_ANNOTATIONS), codonAnnotationsForAlleles.getCodonAnnotationsString());
-            annotations.put(kb.getKey(REFSEQ_NUM_AA_CHANGES), codonAnnotationsForAlleles.getNumAAchangesString());
-            annotations.put(kb.getKey(REFSEQ_HAS_MULT_AA_CHANGES), codonAnnotationsForAlleles.hasAlleleWithMultipleAAchanges);
-            annotations.put(kb.getKey(REFSEQ_NAME), featureName);
-            annotations.put(kb.getKey(REFSEQ_NAME2), feature.name2);
-            annotations.put(kb.getKey(REFSEQ_POSITION_TYPE), REFSEQ_CDS);
-            annotations.put(kb.getKey(REFSEQ_STRAND), (feature.positiveStrand ? REFSEQ_POS_STRAND : REFSEQ_NEG_STRAND));
-            annotations.put(kb.getKey(REFSEQ_CODON_COORD), feature.getCodonCoordString());
-
-            kb.incrementFeatureIndex();
-        }
-
-        return annotations;
-    }
-
-    private static class MNPannotationKeyBuilder {
-        private int featureIndex;
-        private boolean multipleEntries;
-
-        public MNPannotationKeyBuilder(RefSeqNameToFeatures nameToPositionalFeatures) {
-            this.featureIndex = 1;
-            this.multipleEntries = nameToPositionalFeatures.nameToFeatures.size() > 1;
-        }
-
-        public void incrementFeatureIndex() {
-            featureIndex++;
-        }
-
-        public String getKey(String type) {
-            String annotationKey = MNP_ANNOTATION_KEY_PREFIX + type;
-            if (multipleEntries)
-                annotationKey += "_" + featureIndex;
-            return annotationKey;
-        }
-    }
-
-    private static byte[] ByteArrayToPrimitive(Byte[] nonNullArray) {
-        byte[] primArray = new byte[nonNullArray.length];
-
-        for (int i = 0; i < nonNullArray.length; i++) {
-            if (nonNullArray[i] == null)
-                throw new ReviewedStingException("nonNullArray[i] == null");
-            primArray[i] = nonNullArray[i];
-        }
-
-        return primArray;
-    }
-
-    private void clearOldLocusFeatures(GenomeLoc curLoc) {
-        Iterator> locusFeaturesIt = locusToRefSeqFeatures.entrySet().iterator();
-        while (locusFeaturesIt.hasNext()) {
-            Map.Entry locusFeaturesEntry = locusFeaturesIt.next();
-            if (curLoc.isPast(locusFeaturesEntry.getValue().getFurthestLocusUsingFeatures()))
-                locusFeaturesIt.remove();
-        }
-    }
-
-    public Integer reduce(Integer count, Integer total) {
-        if (count != null)
-            total = total + count;
-
-        return total;
-    }
-
-    /**
-     * @param result the number of MNPs processed.
-     */
-    public void onTraversalDone(Integer result) {
-        System.out.println("Number of MNPs observed: " + result);
-        writer.close();
-    }
-
-    private void writeVCF(VariantContext vc) {
-        WriteVCF.writeVCF(vc, writer, logger);
-    }
-
-    /*
-     Inner classes:
-     */
-
-    // Maps: RefSeq entry name -> features for ALL positions of a particular VariantContext MNP:
-
-    private class RefSeqNameToFeatures {
-        private Map nameToFeatures;
-
-        public RefSeqNameToFeatures(VariantContext vc) {
-            this.nameToFeatures = new HashMap();
-
-            int MNPstart = vc.getStart();
-            int MNPstop = vc.getEnd();
-            int MNPlength = MNPstop - MNPstart + 1;
-
-            for (int i = 0; i < MNPlength; i++) {
-                int genomicPosition = MNPstart + i;
-                GenomeLoc posLoc = locParser.createGenomeLoc(vc.getChr(), genomicPosition);
-
-                PositionalRefSeqFeatures locFeatures = locusToRefSeqFeatures.getLocusFeatures(posLoc);
-                if (locFeatures == null) // no features for posLoc
-                    continue;
-
-                for (Map.Entry nameToFeatureEntry : locFeatures.entrySet()) {
-                    String name = nameToFeatureEntry.getKey();
-                    PositionalRefSeqFeature posFeature = nameToFeatureEntry.getValue();
-
-                    RefSeqFeatureList featureList = nameToFeatures.get(name);
-                    if (featureList == null) {
-                        featureList = new RefSeqFeatureList(MNPlength);
-                        nameToFeatures.put(name, featureList);
-                    }
-                    featureList.updateFeatureAtPosition(i, posFeature);
-                }
-            }
-        }
-
-        public Set> entrySet() {
-            return nameToFeatures.entrySet();
-        }
-    }
-
-    // For a particular RefSeq entry, contains the features for ALL positions of a particular VariantContext MNP
-
-    private static class RefSeqFeatureList {
-        private final static String CODON_FRAME_START = "(";
-        private final static String CODON_FRAME_END = ")";
-        private final static String CODON_DELIM = "|";
-
-        private CodingRefSeqFeature[] refSeqFeatures;
-        private String name2;
-        private Boolean positiveStrand;
-
-        private Map> codonToIndices; // Map of: codon index -> MNP indices that refer to codon
-
-        public RefSeqFeatureList(int MNPlength) {
-            this.refSeqFeatures = new CodingRefSeqFeature[MNPlength];
-            for (int i = 0; i < MNPlength; i++)
-                this.refSeqFeatures[i] = null;
-
-            this.name2 = null;
-            this.positiveStrand = null;
-            this.codonToIndices = new TreeMap>();
-        }
-
-        public void updateFeatureAtPosition(int index, PositionalRefSeqFeature feature) {
-            if (name2 == null) {
-                name2 = feature.name2;
-                positiveStrand = feature.positiveStrand;
-            }
-            else if (!name2.equals(feature.name2) || positiveStrand != feature.positiveStrand) {
-                throw new UserException("Inconsistency between previous RefSeq entry and: " + feature);
-            }
-
-            CodingRefSeqFeature crsf = new CodingRefSeqFeature(feature);
-            refSeqFeatures[index] = crsf;
-
-            List indicesWithCodon = codonToIndices.get(crsf.codonCoord);
-            if (indicesWithCodon == null) {
-                indicesWithCodon = new LinkedList();
-                codonToIndices.put(crsf.codonCoord, indicesWithCodon);
-            }
-            indicesWithCodon.add(index);
-        }
-
-        public Set>> codonIndicesEntrySet() {
-            return codonToIndices.entrySet();
-        }
-
-        public String getCodonCoordString() {
-            StringBuilder sb = new StringBuilder();
-
-            for (int i = 0; i < refSeqFeatures.length; i++) {
-                CodingRefSeqFeature crsf = refSeqFeatures[i];
-                if (crsf != null)
-                    sb.append(crsf.codonCoord).append(CODON_FRAME_START).append(crsf.codingFrame).append(CODON_FRAME_END);
-                if (i < refSeqFeatures.length - 1)
-                    sb.append(CODON_DELIM);
-            }
-
-            return sb.toString();
-        }
-    }
-
-    private static class CodingRefSeqFeature {
-        protected int codonCoord;
-        protected int codingFrame;
-        protected String referenceCodon;
-        protected String referenceAA;
-
-        public CodingRefSeqFeature(PositionalRefSeqFeature feature) {
-            this.codonCoord = feature.codonCoord;
-            this.codingFrame = feature.codingFrame;
-            this.referenceCodon = feature.referenceCodon.toUpperCase();
-            this.referenceAA = feature.referenceAA;
-        }
-    }
-
-    private static class CodonAnnotationsForAltAlleles {
-        protected final static int MIN_CODON_INDEX = 0;
-        protected final static int NUM_CODON_INDICES = 3;
-        private final static String CODON_ANNOTATION_DELIM = ",";
-
-        private List alleleAnnotations;
-        private int[] alleleToNumAAchanges;
-        private boolean hasAlleleWithMultipleAAchanges;
-
-        public CodonAnnotationsForAltAlleles(VariantContext vc, RefSeqFeatureList feature) {
-            this.alleleAnnotations = new LinkedList();
-
-            Set altAlleles = vc.getAlternateAlleles();
-            int numAltAlleles = altAlleles.size();
-            this.alleleToNumAAchanges = new int[numAltAlleles];
-            for (int i = 0; i < numAltAlleles; i++)
-                this.alleleToNumAAchanges[i] = 0;
-
-            int MNPstart = vc.getStart();
-            int MNPstop = vc.getEnd();
-            int MNPlength = MNPstop - MNPstart + 1;
-
-            for (Map.Entry> codonToIndicesEntry : feature.codonIndicesEntrySet()) {
-                int codonIndex = codonToIndicesEntry.getKey();
-                List indices = codonToIndicesEntry.getValue();
-                if (indices.isEmpty())
-                    throw new ReviewedStingException("indices should not exist if it's empty!");
-
-                for (int index : indices) {
-                    int frame = feature.refSeqFeatures[index].codingFrame;
-                    if (feature.refSeqFeatures[index].codonCoord != codonIndex)
-                        throw new ReviewedStingException("LOGICAL ERROR: feature.refSeqFeatures[index].codonCoord != codonIndex");
-                    if (frame < MIN_CODON_INDEX || frame >= NUM_CODON_INDICES)
-                        throw new UserException("RefSeq codon frame not one of {0,1,2}");
-                }
-                CodingRefSeqFeature firstFeatureForCodon = feature.refSeqFeatures[indices.get(0)];
-                String refCodon = firstFeatureForCodon.referenceCodon;
-
-                SingleCodonAnnotationsForAlleles codonAnnotation = new SingleCodonAnnotationsForAlleles(codonIndex, altAlleles, MNPlength, refCodon, firstFeatureForCodon, indices, feature);
-                alleleAnnotations.add(codonAnnotation);
-
-                // From a single codon, summarize the data for ALL alleles:
-                for (int i = 0; i < numAltAlleles; i++) {
-                    if (codonAnnotation.annotationsForAlleles[i].codonFunc.changesAA) {
-                        alleleToNumAAchanges[i]++;
-                        if (alleleToNumAAchanges[i] > 1)
-                            this.hasAlleleWithMultipleAAchanges = true;
-                    }
-                }
-            }
-        }
-
-        public String getCodonAnnotationsString() {
-            StringBuilder sb = new StringBuilder();
-
-            int index = 0;
-            for (SingleCodonAnnotationsForAlleles codonToAlleles : alleleAnnotations) {
-                sb.append(codonToAlleles);
-                if (index < alleleAnnotations.size() - 1)
-                    sb.append(CODON_ANNOTATION_DELIM);
-                index++;
-            }
-
-            return sb.toString();
-        }
-
-        public String getNumAAchangesString() {
-            StringBuilder sb = new StringBuilder();
-
-            for (int index = 0; index < alleleToNumAAchanges.length; index++) {
-                sb.append(alleleToNumAAchanges[index]);
-                if (index < alleleToNumAAchanges.length - 1)
-                    sb.append(SingleCodonAnnotationsForAlleles.ALLELE_ANNOTATION_DELIM);
-            }
-
-            return sb.toString();
-        }
-    }
-
-    private static class SingleCodonAnnotationsForAlleles {
-        private final static String CODON_MAP_SYMBOL = "->";
-        private final static String CODON_ANNOTATION_START = "[";
-        private final static String CODON_ANNOTATION_END = "]";
-        private final static String REF_CODON_INFO_DELIM = "|";
-        private final static String ALLELE_ANNOTATION_DELIM = ",";
-        private final static String ASSIGNMENT = ":";
-
-        private int codonIndex;
-        private String refCodon;
-        private String refAA;
-
-        private SingleCodonAnnotationsForAllele[] annotationsForAlleles;
-
-        public SingleCodonAnnotationsForAlleles(int codonIndex, Collection altAlleles, int MNPlength, String refCodon, CodingRefSeqFeature firstFeatureForCodon, List indices, RefSeqFeatureList feature) {
-            if (refCodon.length() != CodonAnnotationsForAltAlleles.NUM_CODON_INDICES)
-                throw new UserException("RefSeq reference codon " + refCodon + " is not of length " + CodonAnnotationsForAltAlleles.NUM_CODON_INDICES);
-
-            AminoAcid refAA = AminoAcidTable.getEukaryoticAA(refCodon);
-            if (!refAA.getCode().equals(firstFeatureForCodon.referenceAA))
-                throw new UserException("RefSeq: translated reference codon= " + refAA + " != " + firstFeatureForCodon.referenceAA + " = reference AA");
-
-            this.codonIndex = codonIndex;
-            this.refCodon = refCodon;
-            this.refAA = refAA.getCode();
-            this.annotationsForAlleles = new SingleCodonAnnotationsForAllele[altAlleles.size()];
-
-            int altInd = 0;
-            for (Allele altAllele : altAlleles) {
-                if (altAllele.length() != MNPlength)
-                    throw new ReviewedStingException("length(altAllele) != length(MNP)");
-                byte[] altBases = altAllele.getBases();
-
-                Byte[] variantCodonArr = new Byte[CodonAnnotationsForAltAlleles.NUM_CODON_INDICES];
-                for (int i = CodonAnnotationsForAltAlleles.MIN_CODON_INDEX; i < CodonAnnotationsForAltAlleles.NUM_CODON_INDICES; i++)
-                    variantCodonArr[i] = null;
-
-                for (int index : indices) {
-                    int frame = feature.refSeqFeatures[index].codingFrame;
-                    if (variantCodonArr[frame] != null)
-                        throw new UserException("RefSeq assigns codon " + codonIndex + " twice at same frame: " + frame);
-
-                    byte base = altBases[index];
-                    if (!feature.positiveStrand) // negative strand codon
-                        base = BaseUtils.simpleComplement(base);
-
-                    variantCodonArr[frame] = base;
-                }
-
-                /* For missing frames, there MUST exist AT LEAST one index that refers to this codon,
-                  so use it to derive the missing bases [ALREADY complemented if on the negative strand]:
-                */
-                for (int frame = CodonAnnotationsForAltAlleles.MIN_CODON_INDEX; frame < CodonAnnotationsForAltAlleles.NUM_CODON_INDICES; frame++) {
-                    if (variantCodonArr[frame] == null)
-                        variantCodonArr[frame] = (byte) refCodon.charAt(frame);
-                }
-                String variantCodon = new String(ByteArrayToPrimitive(variantCodonArr)).toUpperCase();
-
-                SingleCodonAnnotationsForAllele alleleAnnotation = new SingleCodonAnnotationsForAllele(variantCodon, refCodon, refAA, codonIndex);
-                annotationsForAlleles[altInd] = alleleAnnotation;
-                altInd++;
-            }
-        }
-
-        public String toString() {
-            StringBuilder sb = new StringBuilder();
-
-            sb.append(codonIndex).append(CODON_MAP_SYMBOL).append(CODON_ANNOTATION_START);
-            sb.append(REFSEQ_REF_CODON).append(ASSIGNMENT).append(refCodon).append(REF_CODON_INFO_DELIM);
-            sb.append(REFSEQ_REF_AA).append(ASSIGNMENT).append(refAA).append(REF_CODON_INFO_DELIM);
-
-            int index = 0;
-            for (SingleCodonAnnotationsForAllele annotation : annotationsForAlleles) {
-                sb.append(annotation);
-                if (index < annotationsForAlleles.length - 1)
-                    sb.append(ALLELE_ANNOTATION_DELIM);
-                index++;
-            }
-            sb.append(CODON_ANNOTATION_END);
-
-            return sb.toString();
-        }
-    }
-
-    private static class SingleCodonAnnotationsForAllele {
-        private final static String ALLELE_START = "{";
-        private final static String ALLELE_END = "}";
-        private final static String CODON_INFO_DELIM = "|";
-        private final static String ASSIGNMENT = ":";
-        private final static String MNP_DEPENDENT_AA = "MNPdependentAA";
-
-        private CodonFunction codonFunc;
-        private String proteinCoordStr;
-        private boolean MNPdependentAA;
-        private String originalAA;
-
-        public SingleCodonAnnotationsForAllele(String variantCodon, String refCodon, AminoAcid refAA, int codonIndex) {
-            this.codonFunc = new CodonFunction(variantCodon, refCodon, refAA);
-            this.proteinCoordStr = "p." + refAA.getLetter() + codonIndex + codonFunc.variantAA.getLetter();
-
-            int refCodonLength = refCodon.length();
-            if (codonFunc.variantCodon.length() != refCodonLength)
-                throw new ReviewedStingException("codonFunc.variantCodon.length() != refCodonLength, but ALREADY checked that they're both 3");
-
-            this.MNPdependentAA = true;
-            this.originalAA = "(";
-            for (int i = 0; i < refCodonLength; i++) {
-                // Take [0,i-1] and [i+1, end] from refCodon, and i from variantCodon:
-                String singleBaseChangeCodon = refCodon.substring(0, i) + variantCodon.substring(i, i+1) + refCodon.substring(i+1, refCodonLength);
-                CodonFunction singleBaseChangeCodonFunc = new CodonFunction(singleBaseChangeCodon, refCodon, refAA);
-                if (singleBaseChangeCodonFunc.variantAA.equals(codonFunc.variantAA)) {
-                    this.MNPdependentAA = false;
-                    this.originalAA = "";
-                    break;
-                }
-
-                this.originalAA = this.originalAA + "" + singleBaseChangeCodonFunc.variantAA.getLetter();
-                if (i < refCodonLength - 1)
-                    this.originalAA = this.originalAA + ",";
-            }
-
-            if (this.MNPdependentAA)
-                this.originalAA = this.originalAA + ")";
-        }
-
-        private static class CodonFunction {
-            private String variantCodon;
-            private AminoAcid variantAA;
-            private boolean changesAA;
-            private String functionalClass;
-
-            public CodonFunction(String variantCodon, String refCodon, AminoAcid refAA) {
-                this.variantCodon = variantCodon;
-                this.variantAA = AminoAcidTable.getEukaryoticAA(this.variantCodon);
-                this.changesAA = !refAA.equals(variantAA);
-
-                if (!this.variantCodon.equals(refCodon)) {
-                    if (changesAA) {
-                        if (variantAA.isStop()) {
-                            functionalClass = "nonsense";
-                        }
-                        else if (refAA.isStop()) {
-                            functionalClass = "readthrough";
-                        }
-                        else {
-                            functionalClass = "missense";
-                        }
-                    }
-                    else { // the same aa:
-                        functionalClass = "silent";
-                    }
-                }
-                else { // the same codon:
-                    functionalClass = "no_change";
-                }
-            }
-        }
-
-        public String toString() {
-            StringBuilder sb = new StringBuilder();
-
-            sb.append(ALLELE_START);
-            sb.append(REFSEQ_VARIANT_CODON).append(ASSIGNMENT).append(codonFunc.variantCodon).append(CODON_INFO_DELIM);
-            sb.append(REFSEQ_VARIANT_AA).append(ASSIGNMENT).append(codonFunc.variantAA.getCode()).append(CODON_INFO_DELIM);
-            sb.append(REFSEQ_CHANGES_AA).append(ASSIGNMENT).append(codonFunc.changesAA).append(CODON_INFO_DELIM);
-            sb.append(REFSEQ_FUNCTIONAL_CLASS).append(ASSIGNMENT).append(codonFunc.functionalClass).append(CODON_INFO_DELIM);
-            sb.append(REFSEQ_PROTEIN_COORD_DESCRIPTION).append(ASSIGNMENT).append(proteinCoordStr).append(CODON_INFO_DELIM);
-            sb.append(MNP_DEPENDENT_AA).append(ASSIGNMENT).append(MNPdependentAA).append(originalAA);
-            sb.append(ALLELE_END);
-
-            return sb.toString();
-        }
-    }
-}
-
-
-// External classes:
-
-class LocusToFeatures {
-    private Map locusToFeatures;
-
-    public LocusToFeatures() {
-        this.locusToFeatures = new TreeMap();
-    }
-
-    public PositionalRefSeqFeatures getLocusFeatures(GenomeLoc loc) {
-        return locusToFeatures.get(loc);
-    }
-
-    public void putLocusFeatures(GenomeLoc loc, AnnotatorInputTableFeature refSeqAnnotation, GenomeLoc locusUsingThis) {
-        PositionalRefSeqFeatures locFeatures = locusToFeatures.get(loc);
-        if (locFeatures == null) {
-            locFeatures = new PositionalRefSeqFeatures(locusUsingThis);
-            locusToFeatures.put(loc, locFeatures);
-        }
-        locFeatures.putFeature(refSeqAnnotation, locusUsingThis);
-    }
-
-    public Set> entrySet() {
-        return locusToFeatures.entrySet();
-    }
-
-    public String toString() { // INTERNAL use only
-        StringBuilder sb = new StringBuilder();
-
-        for (Map.Entry locFeatures : entrySet()) {
-            GenomeLoc loc = locFeatures.getKey();
-            PositionalRefSeqFeatures features = locFeatures.getValue();
-            sb.append("Locus: ").append(loc).append("\n").append(features);
-        }
-
-        return sb.toString();
-    }
-}
-
-class PositionalRefSeqFeatures {
-    private final static String[] REQUIRE_COLUMNS =
-            {AnnotateMNPsWalker.REFSEQ_NAME, AnnotateMNPsWalker.REFSEQ_POSITION_TYPE};
-
-    private Map nameToFeature;
-    private GenomeLoc furthestLocusUsingFeatures;
-
-    public PositionalRefSeqFeatures(GenomeLoc locusUsingThis) {
-        this.nameToFeature = new HashMap();
-        this.furthestLocusUsingFeatures = locusUsingThis;
-    }
-
-    public void putFeature(AnnotatorInputTableFeature refSeqAnnotation, GenomeLoc locusUsingThis) {
-        for (String column : REQUIRE_COLUMNS) {
-            if (!refSeqAnnotation.containsColumnName(column))
-                throw new UserException("In RefSeq: " + refSeqAnnotation + " Missing column " + column);
-        }
-
-        if (locusUsingThis.isPast(furthestLocusUsingFeatures))
-            furthestLocusUsingFeatures = locusUsingThis;
-
-        String posType = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_POSITION_TYPE);
-        if (!posType.equals(AnnotateMNPsWalker.REFSEQ_CDS)) // only interested in coding sequence annotations
-            return;
-
-        PositionalRefSeqFeature newLocusFeature = new PositionalRefSeqFeature(refSeqAnnotation);
-
-        String refSeqName = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_NAME);
-        PositionalRefSeqFeature locusFeature = nameToFeature.get(refSeqName);
-        if (locusFeature == null) {
-            locusFeature = newLocusFeature;
-            nameToFeature.put(refSeqName, locusFeature);
-        }
-        else if (!locusFeature.equals(newLocusFeature)) {
-            throw new UserException("Inconsistency between previous RefSeq entry and: " + refSeqAnnotation);
-        }
-
-        locusFeature.updateFeature(refSeqAnnotation);
-    }
-
-    public GenomeLoc getFurthestLocusUsingFeatures() {
-        return furthestLocusUsingFeatures;
-    }
-
-    public Set> entrySet() {
-        return nameToFeature.entrySet();
-    }
-
-    public String toString() { // INTERNAL use only
-        StringBuilder sb = new StringBuilder();
-
-        for (Map.Entry nameFeatureEntry : entrySet()) {
-            String name = nameFeatureEntry.getKey();
-            PositionalRefSeqFeature feature = nameFeatureEntry.getValue();
-            sb.append(name).append(" -> [").append(feature).append("]\n");
-        }
-
-        return sb.toString();
-    }
-}
-
-class PositionalRefSeqFeature {
-    private final static String[] REQUIRE_COLUMNS =
-            {AnnotateMNPsWalker.REFSEQ_NAME2, AnnotateMNPsWalker.REFSEQ_STRAND,
-                    AnnotateMNPsWalker.REFSEQ_CODON_COORD, AnnotateMNPsWalker.REFSEQ_CODING_FRAME,
-                    AnnotateMNPsWalker.REFSEQ_REF_CODON, AnnotateMNPsWalker.REFSEQ_REF_AA};
-
-    protected String name2;
-    protected boolean positiveStrand;
-    protected int codonCoord;
-    protected int codingFrame;
-    protected String referenceCodon;
-    protected String referenceAA;
-
-    private Map baseToAnnotations;
-
-    public PositionalRefSeqFeature(AnnotatorInputTableFeature refSeqAnnotation) {
-        for (String column : REQUIRE_COLUMNS) {
-            if (!refSeqAnnotation.containsColumnName(column))
-                throw new UserException("In RefSeq: " + refSeqAnnotation + " Missing column " + column);
-        }
-        this.name2 = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_NAME2);
-        this.positiveStrand = (refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_STRAND).equals(AnnotateMNPsWalker.REFSEQ_POS_STRAND));
-        this.codonCoord = Integer.parseInt(refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_CODON_COORD));
-        this.codingFrame = Integer.parseInt(refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_CODING_FRAME));
-        this.referenceCodon = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_REF_CODON);
-        this.referenceAA = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_REF_AA);
-
-        this.baseToAnnotations = new HashMap();
-    }
-
-    public boolean equals(PositionalRefSeqFeature that) {
-        return this.name2.equals(that.name2) && this.positiveStrand == that.positiveStrand && this.codonCoord == that.codonCoord && this.codingFrame == that.codingFrame
-                && this.referenceCodon.equals(that.referenceCodon) && this.referenceAA.equals(that.referenceAA);
-    }
-
-    public void updateFeature(AnnotatorInputTableFeature refSeqAnnotation) {
-        if (!refSeqAnnotation.containsColumnName(AnnotateMNPsWalker.REFSEQ_ALT_BASE))
-            throw new UserException("In RefSeq: " + refSeqAnnotation + " Missing column " + AnnotateMNPsWalker.REFSEQ_ALT_BASE);
-        String base = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_ALT_BASE);
-
-        baseToAnnotations.put(base, new BaseAnnotations(refSeqAnnotation));
-    }
-
-    public String toString() { // INTERNAL use only
-        StringBuilder sb = new StringBuilder();
-
-        sb.append("name2= ").append(name2);
-        sb.append(", positiveStrand= ").append(positiveStrand);
-        sb.append(", codonCoord= ").append(codonCoord);
-        sb.append(", codingFrame= ").append(codingFrame);
-        sb.append(", referenceCodon= ").append(referenceCodon);
-        sb.append(", referenceAA= ").append(referenceAA);
-
-        sb.append(", baseAnnotations= {");
-        for (Map.Entry baseToAnnotationsEntry : baseToAnnotations.entrySet()) {
-            String base = baseToAnnotationsEntry.getKey();
-            BaseAnnotations annotations = baseToAnnotationsEntry.getValue();
-            sb.append(" ").append(base).append(" -> {").append(annotations).append("}");
-        }
-        sb.append(" }");
-
-        return sb.toString();
-    }
-}
-
-class BaseAnnotations {
-    private final static String[] REQUIRE_COLUMNS =
-            {AnnotateMNPsWalker.REFSEQ_VARIANT_CODON, AnnotateMNPsWalker.REFSEQ_VARIANT_AA,
-                    AnnotateMNPsWalker.REFSEQ_CHANGES_AA, AnnotateMNPsWalker.REFSEQ_FUNCTIONAL_CLASS,
-                    AnnotateMNPsWalker.REFSEQ_PROTEIN_COORD_DESCRIPTION};
-
-    protected String variantCodon;
-    protected String variantAA;
-    protected boolean changesAA;
-    protected String functionalClass;
-    protected String proteinCoordStr;
-
-    public BaseAnnotations(AnnotatorInputTableFeature refSeqAnnotation) {
-        for (String column : REQUIRE_COLUMNS) {
-            if (!refSeqAnnotation.containsColumnName(column))
-                throw new UserException("In RefSeq: " + refSeqAnnotation + " Missing column " + column);
-        }
-        this.variantCodon = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_VARIANT_CODON);
-        this.variantAA = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_VARIANT_AA);
-        this.changesAA = Boolean.parseBoolean(refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_CHANGES_AA));
-        this.functionalClass = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_FUNCTIONAL_CLASS);
-        this.proteinCoordStr = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_PROTEIN_COORD_DESCRIPTION);
-    }
-
-
-    public String toString() { // INTERNAL use only
-        StringBuilder sb = new StringBuilder();
-
-        sb.append("variantCodon= ").append(variantCodon);
-        sb.append(", variantAA= ").append(variantAA);
-        sb.append(", changesAA= ").append(changesAA);
-        sb.append(", functionalClass= ").append(functionalClass);
-        sb.append(", proteinCoordStr= ").append(proteinCoordStr);
-
-        return sb.toString();
-    }
-}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java
index 298d8d6c8..306509d0c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java
@@ -1,6 +1,8 @@
 package org.broadinstitute.sting.gatk.walkers.phasing;
 
+import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@@ -24,6 +26,12 @@ public class MergeAndMatchHaplotypes extends RodWalker {
     @Output
     protected VCFWriter vcfWriter = null;
 
+    @Input(fullName="pbt", shortName = "pbt", doc="Input VCF truth file", required=true)
+    public RodBinding pbtTrack;
+
+    @Input(fullName="rbp", shortName = "rbp", doc="Input VCF truth file", required=true)
+    public RodBinding rbpTrack;
+
     private Map pbtCache = new HashMap();
     private Map rbpCache = new HashMap();
 
@@ -31,7 +39,7 @@ public class MergeAndMatchHaplotypes extends RodWalker {
 
     public void initialize() {
         ArrayList rodNames = new ArrayList();
-        rodNames.add("pbt");
+        rodNames.add(pbtTrack.getName());
 
         Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
         Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
@@ -44,8 +52,8 @@ public class MergeAndMatchHaplotypes extends RodWalker {
     @Override
     public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
         if (tracker != null) {
-            Collection pbts = tracker.getVariantContexts(ref, "pbt", null, ref.getLocus(), true, true);
-            Collection rbps = tracker.getVariantContexts(ref, "rbp", null, ref.getLocus(), true, true);
+            Collection pbts = tracker.getValues(pbtTrack, ref.getLocus());
+            Collection rbps = tracker.getValues(rbpTrack, ref.getLocus());
 
             VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null;
             VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null;
@@ -91,7 +99,7 @@ public class MergeAndMatchHaplotypes extends RodWalker {
                 }
 
                 VariantContext newvc = new VariantContext(SOURCE_NAME, pbt.getChr(), pbt.getStart(), pbt.getStart(), pbt.getAlleles(), genotypes, pbt.getNegLog10PError(), pbt.getFilters(), pbt.getAttributes());
-                vcfWriter.add(newvc, ref.getBase());
+                vcfWriter.add(newvc);
             }
         }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java
index 5bd438605..14f54ec7d 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java
@@ -46,7 +46,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
  * Walks along all variant ROD loci, and merges consecutive sites if they segregate in all samples in the ROD.
  */
 @Allows(value = {DataSource.REFERENCE})
-@Requires(value = {DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class))
+@Requires(value = {DataSource.REFERENCE})
 @By(DataSource.REFERENCE_ORDERED_DATA)
 
 public class MergeMNPsWalker extends RodWalker {
@@ -58,12 +58,9 @@ public class MergeMNPsWalker extends RodWalker {
     @Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record; [default:1]", required = false)
     protected int maxGenomicDistanceForMNP = 1;
 
-    private LinkedList rodNames = null;
+    private String rodName = "variant";
 
     public void initialize() {
-        rodNames = new LinkedList();
-        rodNames.add("variant");
-
         initializeVcfWriter();
     }
 
@@ -77,8 +74,8 @@ public class MergeMNPsWalker extends RodWalker {
         hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
         hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
 
-        Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames);
-        vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples())));
+        Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(rodName));
+        vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodName).getGenotypeSamples())));
     }
 
     public boolean generateExtendedEvents() {
@@ -101,9 +98,7 @@ public class MergeMNPsWalker extends RodWalker {
         if (tracker == null)
             return null;
 
-        boolean requireStartHere = true; // only see each VariantContext once
-        boolean takeFirstOnly = false; // take as many entries as the VCF file has
-        for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly))
+        for (VariantContext vc : tracker.getValues(VariantContext.class, rodName, context.getLocation()))
             writeVCF(vc);
 
         return 0;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java
index b0491a281..53cfaa3a9 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java
@@ -118,7 +118,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
             innerWriter.close();
     }
 
-    public void add(VariantContext vc, byte refBase) {
+    public void add(VariantContext vc) {
         if (useSingleSample != null) { // only want to output context for one sample
             Genotype sampGt = vc.getGenotype(useSingleSample);
             if (sampGt != null) // TODO: subContextFromGenotypes() does not handle any INFO fields [AB, HaplotypeScore, MQ, etc.].  Note that even SelectVariants.subsetRecord() only handles AC,AN,AF, and DP!
@@ -138,11 +138,11 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
 
             if (curVcIsNotFiltered) { // still need to wait before can release vc
                 logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(genomeLocParser, vc));
-                vcfrWaitingToMerge = new VCFRecord(vc, refBase, false);
+                vcfrWaitingToMerge = new VCFRecord(vc, false);
             }
             else if (!emitOnlyMergedRecords) { // filtered records are never merged
                 logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(genomeLocParser, vc));
-                innerWriter.add(vc, refBase);
+                innerWriter.add(vc);
             }
         }
         else { // waiting to merge vcfrWaitingToMerge
@@ -151,7 +151,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
             if (!curVcIsNotFiltered) {
                 if (!emitOnlyMergedRecords) { // filtered records are never merged
                     logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(genomeLocParser, vc));
-                    filteredVcfrList.add(new VCFRecord(vc, refBase, false));
+                    filteredVcfrList.add(new VCFRecord(vc, false));
                 }
             }
             else { // waiting to merge vcfrWaitingToMerge, and curVcIsNotFiltered. So, attempt to merge them:
@@ -188,14 +188,14 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
                         addedAttribs.putAll(mergedVc.getAttributes());
                         mergedVc = VariantContext.modifyAttributes(mergedVc, addedAttribs);
 
-                        vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase, true);
+                        vcfrWaitingToMerge = new VCFRecord(mergedVc, true);
                         numMergedRecords++;
                     }
                 }
 
                 if (!mergedRecords) {
                     stopWaitingToMerge();
-                    vcfrWaitingToMerge = new VCFRecord(vc, refBase, false);
+                    vcfrWaitingToMerge = new VCFRecord(vc, false);
                 }
                 logger.debug("Merged? = " + mergedRecords);
             }
@@ -210,11 +210,11 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
         }
 
         if (!emitOnlyMergedRecords || vcfrWaitingToMerge.resultedFromMerge)
-            innerWriter.add(vcfrWaitingToMerge.vc, vcfrWaitingToMerge.refBase);
+            innerWriter.add(vcfrWaitingToMerge.vc);
         vcfrWaitingToMerge = null;
 
         for (VCFRecord vcfr : filteredVcfrList)
-            innerWriter.add(vcfr.vc, vcfr.refBase);
+            innerWriter.add(vcfr.vc);
         filteredVcfrList.clear();
     }
 
@@ -257,12 +257,10 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
 
     private static class VCFRecord {
         public VariantContext vc;
-        public byte refBase;
         public boolean resultedFromMerge;
 
-        public VCFRecord(VariantContext vc, byte refBase, boolean resultedFromMerge) {
+        public VCFRecord(VariantContext vc, boolean resultedFromMerge) {
             this.vc = vc;
-            this.refBase = refBase;
             this.resultedFromMerge = resultedFromMerge;
         }
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java
index be15d4541..e1be5e5c5 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java
@@ -49,7 +49,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
  * Walks along all variant ROD loci, and merges consecutive sites if some sample has segregating alt alleles in the ROD.
  */
 @Allows(value = {DataSource.REFERENCE})
-@Requires(value = {DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class))
+@Requires(value = {DataSource.REFERENCE})
 @By(DataSource.REFERENCE_ORDERED_DATA)
 
 public class MergeSegregatingAlternateAllelesWalker extends RodWalker {
@@ -81,12 +81,9 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker rodNames = null;
+    private String rodName = "variant";
 
     public void initialize() {
-        rodNames = new LinkedList();
-        rodNames.add("variant");
-
         initializeVcfWriter();
     }
 
@@ -114,8 +111,8 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames);
-        vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples())));
+        Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(rodName));
+        vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodName).getGenotypeSamples())));
     }
 
     public boolean generateExtendedEvents() {
@@ -138,9 +135,7 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker {
+
+    @ArgumentCollection
+    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
+
     @Argument(shortName="f", fullName="familySpec", required=true, doc="Patterns for the family structure (usage: mom+dad=child).  Specify several trios by supplying this argument many times and/or a file containing many patterns.")
     public ArrayList familySpecs = null;
 
     @Output
     protected VCFWriter vcfWriter = null;
 
-    private final String ROD_NAME = "variant";
     private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP";
     private final String SOURCE_NAME = "PhaseByTransmission";
 
@@ -102,7 +107,7 @@ public class PhaseByTransmission extends RodWalker {
         trios = getFamilySpecsFromCommandLineInput(familySpecs);
 
         ArrayList rodNames = new ArrayList();
-        rodNames.add(ROD_NAME);
+        rodNames.add(variantCollection.variants.getName());
 
         Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
         Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
@@ -234,7 +239,7 @@ public class PhaseByTransmission extends RodWalker {
         finalGenotypes.add(father);
         finalGenotypes.add(child);
 
-        if (mother.isCalled() && father.isCalled() && child.isCalled() && !(mother.isHet() && father.isHet() && child.isHet())) {
+        if (mother.isCalled() && father.isCalled() && child.isCalled()) {
             ArrayList possibleMotherGenotypes = createAllThreeGenotypes(ref, alt, mother);
             ArrayList possibleFatherGenotypes = createAllThreeGenotypes(ref, alt, father);
             ArrayList possibleChildGenotypes = createAllThreeGenotypes(ref, alt, child);
@@ -265,12 +270,14 @@ public class PhaseByTransmission extends RodWalker {
                 }
             }
 
-            Map attributes = new HashMap();
-            attributes.putAll(bestChildGenotype.getAttributes());
-            attributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, bestPrior*bestConfigurationLikelihood / norm);
-            bestChildGenotype = Genotype.modifyAttributes(bestChildGenotype, attributes);
+            if (!(bestMotherGenotype.isHet() && bestFatherGenotype.isHet() && bestChildGenotype.isHet())) {
+                Map attributes = new HashMap();
+                attributes.putAll(bestChildGenotype.getAttributes());
+                attributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, bestPrior*bestConfigurationLikelihood / norm);
+                bestChildGenotype = Genotype.modifyAttributes(bestChildGenotype, attributes);
 
-            finalGenotypes = getPhasedGenotypes(bestMotherGenotype, bestFatherGenotype, bestChildGenotype);
+                finalGenotypes = getPhasedGenotypes(bestMotherGenotype, bestFatherGenotype, bestChildGenotype);
+            }
         }
 
         return finalGenotypes;
@@ -287,31 +294,29 @@ public class PhaseByTransmission extends RodWalker {
     @Override
     public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
         if (tracker != null) {
-            Collection vcs = tracker.getVariantContexts(ref, ROD_NAME, null, context.getLocation(), true, true);
+            VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation());
 
-            for (VariantContext vc : vcs) {
-                Map genotypeMap = vc.getGenotypes();
+            Map genotypeMap = vc.getGenotypes();
 
-                for (Trio trio : trios) {
-                    Genotype mother = vc.getGenotype(trio.getMother());
-                    Genotype father = vc.getGenotype(trio.getFather());
-                    Genotype child = vc.getGenotype(trio.getChild());
+            for (Trio trio : trios) {
+                Genotype mother = vc.getGenotype(trio.getMother());
+                Genotype father = vc.getGenotype(trio.getFather());
+                Genotype child = vc.getGenotype(trio.getChild());
 
-                    ArrayList trioGenotypes = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child);
+                ArrayList trioGenotypes = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child);
 
-                    Genotype phasedMother = trioGenotypes.get(0);
-                    Genotype phasedFather = trioGenotypes.get(1);
-                    Genotype phasedChild = trioGenotypes.get(2);
+                Genotype phasedMother = trioGenotypes.get(0);
+                Genotype phasedFather = trioGenotypes.get(1);
+                Genotype phasedChild = trioGenotypes.get(2);
 
-                    genotypeMap.put(phasedMother.getSampleName(), phasedMother);
-                    genotypeMap.put(phasedFather.getSampleName(), phasedFather);
-                    genotypeMap.put(phasedChild.getSampleName(), phasedChild);
-                }
-
-                VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeMap);
-
-                vcfWriter.add(newvc, ref.getBase());
+                genotypeMap.put(phasedMother.getSampleName(), phasedMother);
+                genotypeMap.put(phasedFather.getSampleName(), phasedFather);
+                genotypeMap.put(phasedChild.getSampleName(), phasedChild);
             }
+
+            VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeMap);
+
+            vcfWriter.add(newvc);
         }
 
         return null;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
index 9702fd18c..ac4fba4b4 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
@@ -23,15 +23,13 @@
  */
 package org.broadinstitute.sting.gatk.walkers.phasing;
 
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Hidden;
-import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.DisjointSet;
@@ -56,7 +54,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
  * Walks along all variant ROD loci, caching a user-defined window of VariantContext sites, and then finishes phasing them when they go out of range (using upstream and downstream reads).
  */
 @Allows(value = {DataSource.READS, DataSource.REFERENCE})
-@Requires(value = {DataSource.READS, DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class))
+@Requires(value = {DataSource.READS, DataSource.REFERENCE})
 @By(DataSource.READS)
 
 @ReadFilters({MappingQualityZeroReadFilter.class})
@@ -64,6 +62,13 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
 
 public class ReadBackedPhasingWalker extends RodWalker {
     private static final boolean DEBUG = false;
+    /**
+     * The VCF file we are phasing variants from.
+     *
+     * All heterozygous variants found in this VCF file will be phased, where possible
+     */
+    @ArgumentCollection
+    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
 
     @Output(doc = "File to which variants should be written", required = true)
     protected VCFWriter writer = null;
@@ -98,8 +103,6 @@ public class ReadBackedPhasingWalker extends RodWalker rodNames = null;
-
     public static final String PQ_KEY = "PQ";
 
     // In order to detect phase inconsistencies:
@@ -123,9 +126,6 @@ public class ReadBackedPhasingWalker extends RodWalker();
-        rodNames.add("variant");
-
         /*
          Since we cap each base quality (BQ) by its read's mapping quality (MQ) [in Read.updateBaseAndQuality()], then:
          if minBQ > minMQ, then we require that MQ be >= minBQ as well.
@@ -175,8 +175,9 @@ public class ReadBackedPhasingWalker extends RodWalker rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames);
-        Set samples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples() : samplesToPhase);
+        String trackName = variantCollection.variants.getName();
+        Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
+        Set samples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase);
         writer.writeHeader(new VCFHeader(hInfo, samples));
     }
 
@@ -207,9 +208,7 @@ public class ReadBackedPhasingWalker extends RodWalker unprocessedList = new LinkedList();
 
-        boolean requireStartHere = true; // only see each VariantContext once
-        boolean takeFirstOnly = false; // take as many entries as the VCF file has
-        for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) {
+        for (VariantContext vc : tracker.getValues(variantCollection.variants, context.getLocation())) {
             if (samplesToPhase != null) vc = reduceVCToSamples(vc, samplesToPhase);
 
             if (ReadBackedPhasingWalker.processVariantInPhasing(vc)) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java
index 2851ace0d..c10eaa2da 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java
@@ -25,20 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.phasing;
 
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 
 public class WriteVCF {
     public static void writeVCF(VariantContext vc, VCFWriter writer, Logger logger) {
-        byte refBase;
-        if (!vc.isIndel()) {
-            Allele refAllele = vc.getReference();
-            refBase = SNPallelePair.getSingleBase(refAllele);
-        }
-        else {
-            refBase = vc.getReferenceBaseForIndel();
-        }
-
-        writer.add(vc, refBase);
+        writer.add(vc);
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java
index 2bdd4558f..640cff2ba 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java
@@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.qc;
 
+import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@@ -36,7 +37,7 @@ public class CountIntervals extends RefWalker {
             return null;
         }
 
-        List checkIntervals = tracker.getGATKFeatureMetaData("check",false);
+        List checkIntervals = tracker.getValues(Feature.class, "check");
         return (long) checkIntervals.size();
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java
index 170630b77..1c24f3879 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java
@@ -1,12 +1,39 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
 package org.broadinstitute.sting.gatk.walkers.qc;
 
 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
+import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
 import org.broadinstitute.sting.gatk.walkers.Reference;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.gatk.walkers.Window;
@@ -29,6 +56,9 @@ public class RodSystemValidationWalker extends RodWalker {
     // the divider to use in some of the text output
     private static final String DIVIDER = ",";
 
+    @Input(fullName="eval", shortName = "eval", doc="Input VCF eval file", required=true)
+    public List> eval;
+
     @Output
     public PrintStream out;
 
@@ -73,18 +103,17 @@ public class RodSystemValidationWalker extends RodWalker {
     @Override
     public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
         int ret = 0;
-        if (tracker != null && tracker.getAllRods().size() > 0) {
+        if (tracker != null && tracker.getNTracksWithBoundFeatures() > 0) {
             out.print(context.getLocation() + DIVIDER);
-            Collection features = tracker.getAllRods();
-            for (GATKFeature feat : features)
-                out.print(feat.getName() + DIVIDER);
+            for (RODRecordList rod: tracker.getBoundRodTracks())
+                out.print(rod.getName() + DIVIDER);
             out.println(";");
             ret++;
         }
 
         // if the argument was set, check for equivalence
         if (allRecordsVariantContextEquivalent && tracker != null) {
-            Collection col = tracker.getAllVariantContexts(ref);
+            Collection col = tracker.getValues(eval);
             VariantContext con = null;
             for (VariantContext contextInList : col)
                 if (con == null) con = contextInList;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java
index e1e6c4b69..bd25a73e0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java
@@ -26,7 +26,9 @@
 package org.broadinstitute.sting.gatk.walkers.qc;
 
 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@@ -45,8 +47,11 @@ import java.util.Arrays;
  * each overlapping read, and quality score) to the reference pileup data generated by samtools.  Samtools' pileup data
  * should be specified using the command-line argument '-B pileup,SAMPileup,'.
  */
-@Requires(value={DataSource.READS,DataSource.REFERENCE},referenceMetaData=@RMD(name="pileup",type=SAMPileupFeature.class))
+@Requires(value={DataSource.READS,DataSource.REFERENCE})
 public class ValidatingPileupWalker extends LocusWalker  implements TreeReducible {
+    @Input(fullName = "pileup", doc="The SAMPileup containing the expected output", required = true)
+    RodBinding pileup;
+
     @Output
     private PrintStream out;
 
@@ -130,17 +135,17 @@ public class ValidatingPileupWalker extends LocusWalker 0;
         
         // Only use data from non-dbsnp sites
         // Assume every mismatch at a non-dbsnp site is indicative of poor quality
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
index fec7ee4e6..a34719b18 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
@@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
+import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.Utils;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
index cb03d4c61..7653f511f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
@@ -8,7 +8,9 @@ import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
 import org.broadinstitute.sting.alignment.bwa.BWTFiles;
 import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@@ -35,9 +37,17 @@ import java.util.List;
  * Time: 2:12 PM
  * To change this template use File | Settings | File Templates.
  */
-@Requires(value={DataSource.REFERENCE}, referenceMetaData={@RMD(name="ProbeIntervals",type=TableFeature.class),
-@RMD(name="ValidateAlleles",type=VariantContext.class),@RMD(name="MaskAlleles",type=VariantContext.class)})
+@Requires(value={DataSource.REFERENCE})
 public class ValidationAmplicons extends RodWalker {
+    @Input(fullName = "ProbeIntervals", doc="Chris document me", required=true)
+    RodBinding probeIntervals;
+
+    @Input(fullName = "ValidateAlleles", doc="Chris document me", required=true)
+    RodBinding validateAlleles;
+
+    @Input(fullName = "MaskAlleles", doc="Chris document me", required=true)
+    RodBinding maskAlleles;
+
 
     @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false)
     boolean lowerCaseSNPs = false;
@@ -99,9 +109,10 @@ public class ValidationAmplicons extends RodWalker {
     }
 
     public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        if ( tracker == null || ! tracker.hasROD("ProbeIntervals")) { return null; }
+        if ( tracker == null || ! tracker.hasValues(probeIntervals)) { return null; }
 
-        GenomeLoc interval = ((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getLocation();
+        TableFeature feature = tracker.getFirstValue(probeIntervals);
+        GenomeLoc interval = feature.getLocation();
         //logger.debug(interval);
         if ( prevInterval == null || ! interval.equals(prevInterval) ) {
             // we're in a new interval, we should:
@@ -129,16 +140,16 @@ public class ValidationAmplicons extends RodWalker {
             rawSequence = new StringBuilder();
             sequenceInvalid = false;
             invReason = new LinkedList();
-            logger.debug(Utils.join("\t",((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getAllValues()));
-            probeName = ((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getValue(1);
+            logger.debug(Utils.join("\t",feature.getAllValues()));
+            probeName = feature.getValue(1);
             indelCounter = 0;
         }
 
         // step 3 (or 1 if not new):
         // build up the sequence
 
-        VariantContext mask = tracker.getVariantContext(ref,"MaskAlleles",ref.getLocus());
-        VariantContext validate = tracker.getVariantContext(ref,"ValidateAlleles",ref.getLocus());
+        VariantContext mask = tracker.getFirstValue(maskAlleles, ref.getLocus());
+        VariantContext validate = tracker.getFirstValue(validateAlleles,ref.getLocus());
 
         if ( mask == null && validate == null ) {
             if ( indelCounter > 0 ) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
index fe3173506..c26729ed3 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
@@ -9,7 +9,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
+import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
 import org.broadinstitute.sting.gatk.report.GATKReport;
 import org.broadinstitute.sting.gatk.report.GATKReportTable;
 import org.broadinstitute.sting.gatk.walkers.Reference;
@@ -67,7 +67,7 @@ public class VariantEvalWalker extends RodWalker implements Tr
     @Argument(fullName="stratificationModule", shortName="ST", doc="One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified)", required=false)
     protected String[] STRATIFICATIONS_TO_USE = {};
 
-    @Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)")
+    @Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)", required=false)
     protected Boolean NO_STANDARD_STRATIFICATIONS = false;
 
     @Argument(fullName="onlyVariantsOfType", shortName="VT", doc="If provided, only variants of these types will be considered during the evaluation, in ", required=false)
@@ -77,7 +77,7 @@ public class VariantEvalWalker extends RodWalker implements Tr
     @Argument(fullName="evalModule", shortName="EV", doc="One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified)", required=false)
     protected String[] MODULES_TO_USE = {};
 
-    @Argument(fullName="doNotUseAllStandardModules", shortName="noEV", doc="Do not use the standard modules by default (instead, only those that are specified with the -E option)")
+    @Argument(fullName="doNotUseAllStandardModules", shortName="noEV", doc="Do not use the standard modules by default (instead, only those that are specified with the -E option)", required=false)
     protected Boolean NO_STANDARD_MODULES = false;
 
     // Other arguments
@@ -231,6 +231,22 @@ public class VariantEvalWalker extends RodWalker implements Tr
                     for ( String sampleName : sampleNamesForStratification ) {
                         VariantContext eval = vcs.containsKey(evalName) && vcs.get(evalName) != null ? vcs.get(evalName).get(sampleName) : null;
 
+                        // todo: Eric, this is really the problem.  We select single eval and comp VCs independently
+                        // todo: discarding multiple eval tracks at the sites and not providing matched comps
+                        // todo: where appropriate.  Really this loop should look like:
+                        // todo: for each eval track:
+                        // todo:   for each eval in track:
+                        // todo:     for each compTrack:
+                        // todo:       comp = findMatchingComp(eval, compTrack) // find the matching comp in compTrack
+                        // todo:       call evalModule(eval, comp)
+                        // todo:       // may return null if no such comp exists, but proceed as eval modules may need to see eval / null pair
+                        // todo:       for each comp not matched by an eval in compTrack:
+                        // todo:         call evalModule(null, comp)
+                        // todo:         // need to call with null comp, as module
+                        // todo: note that the reason Kiran pre-computed the possible VCs is to apply the modifiers
+                        // todo: like subset to sample, etc.  So you probably will want a master map that maps
+                        // todo: from special eval bindings to the digested VC for efficiency.
+
                         if ( typesToUse != null ) {
                             if ( eval != null && ! typesToUse.contains(eval.getType()) ) eval = null;
                             if ( comp != null && ! typesToUse.contains(comp.getType()) ) comp = null;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java
index 255a54737..2ea64c49c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java
@@ -76,9 +76,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
 
     public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
         boolean evalIsGood = eval != null && eval.isVariant();
-        boolean expectingIndels = eval != null && eval.isIndel();
-
-        boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ;
+        boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
 
         if (compIsGood) nCompVariants++;           // count the number of comp events
         if (evalIsGood) nEvalVariants++;           // count the number of eval events
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java
index e29e7ed50..83a1c2f3b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java
@@ -8,6 +8,8 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationConte
 import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 
+import java.util.Collection;
+
 public abstract class VariantEvaluator {
     public void initialize(VariantEvalWalker walker) {}
 
@@ -17,25 +19,18 @@ public abstract class VariantEvaluator {
     public abstract int getComparisonOrder();
 
     // called at all sites, regardless of eval context itself; useful for counting processed bases
-    public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
+    public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
 
-    public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+    }
+
+    public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
         return null;
     }
 
-    public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, NewEvaluationContext group) {
-        return update1(vc1, tracker, ref, context);
-    }
-
-
-    public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+    public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
         return null;
     }
 
-    public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, NewEvaluationContext group) {
-        return update2(vc1, vc2, tracker, ref, context);
-    }
-
     public void finalizeEvaluation() {}
 
     protected double rate(long n, long d) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java
index a0973a088..d2e4392a5 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java
@@ -5,24 +5,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.EnumSet;
-import java.util.Set;
+import java.util.*;
 
 public class Novelty extends VariantStratifier implements StandardStratification {
     // needs the variant contexts and known names
     private Set knownNames;
-    private ArrayList states;
+    final private ArrayList states = new ArrayList(Arrays.asList("all", "known", "novel"));
+
 
     @Override
     public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) {
         this.knownNames = knownNames;
-
-        states = new ArrayList();
-        states.add("all");
-        states.add("known");
-        states.add("novel");
     }
 
     public ArrayList getAllStates() {
@@ -30,29 +23,18 @@ public class Novelty extends VariantStratifier implements StandardStratification
     }
 
     public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
-        boolean isNovel = true;
-
-        if (tracker != null) {
-            for (String knownName : knownNames) {
-                if (tracker.hasROD(knownName)) {
-                    EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION);
-                    if (eval != null) {
-                        allowableTypes.add(eval.getType());
+        if (tracker != null && eval != null) {
+            for (final String knownName : knownNames) {
+                final Collection knownComps = tracker.getValues(VariantContext.class, knownName, ref.getLocus());
+                for ( final VariantContext c : knownComps ) {
+                    // loop over sites, looking for something that matches the type eval
+                    if ( eval.getType() == c.getType() ) {
+                        return new ArrayList(Arrays.asList("all", "known"));
                     }
-
-                    Collection knownComps = tracker.getVariantContexts(ref, knownName, allowableTypes, ref.getLocus(), true, true);
-
-                    isNovel = knownComps.size() == 0;
-
-                    break;
                 }
             }
         }
 
-        ArrayList relevantStates = new ArrayList();
-        relevantStates.add("all");
-        relevantStates.add(isNovel ? "novel" : "known");
-
-        return relevantStates;
+        return new ArrayList(Arrays.asList("all", "novel"));
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
index 0a915db37..61a959c99 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
@@ -270,30 +270,7 @@ public class VariantEvalUtils {
                                                                    Set compNames,
                                                                    Set evalNames,
                                                                    boolean dynamicSelectTypes ) {
-        if ( dynamicSelectTypes ) { // todo -- this code is really conceptually broken
-            EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION);
-
-            if (tracker != null) {
-                Collection evalvcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false);
-
-                for (VariantContext vc : evalvcs) {
-                    allowableTypes.add(vc.getType());
-                }
-
-                if (allowableTypes.size() == 1) {
-                    // We didn't find any variation in the eval track, so now let's look at the comp track for allowable types
-                    Collection compvcs = tracker.getVariantContexts(ref, compNames, null, ref.getLocus(), true, false);
-
-                    for (VariantContext vc : compvcs) {
-                        allowableTypes.add(vc.getType());
-                    }
-                }
-            }
-
-            return allowableTypes;
-        } else {
-            return EnumSet.allOf(VariantContext.Type.class);
-        }
+        return EnumSet.allOf(VariantContext.Type.class);
     }
 
     /**
@@ -353,14 +330,11 @@ public class VariantEvalUtils {
      *                       to do this)
      * @return a mapping of track names to a list of VariantContext objects
      */
-    public HashMap> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set trackNames, EnumSet allowableTypes, boolean byFilter, boolean subsetBySample, boolean trackPerSample) {
-        HashMap> bindings = new HashMap>();
-
+    protected void bindVariantContexts(HashMap> bindings, RefMetaDataTracker tracker, ReferenceContext ref, Set trackNames, EnumSet allowableTypes, boolean byFilter, boolean subsetBySample, boolean trackPerSample) {
         for (String trackName : trackNames) {
             HashMap vcs = new HashMap();
 
-            Collection contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true);
-            VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null;
+            VariantContext vc = tracker == null ? null : tracker.getFirstValue(VariantContext.class, trackName, ref.getLocus());
 
             // First, filter the VariantContext to represent only the samples for evaluation
             if (vc != null) {
@@ -388,8 +362,6 @@ public class VariantEvalUtils {
                 bindings.put(trackName, vcs);
             }
         }
-
-        return bindings;
     }
 
     /**
@@ -417,11 +389,8 @@ public class VariantEvalUtils {
             }
         }
 
-        HashMap> evalBindings = bindVariantContexts(tracker, ref, evalNames, allowableTypes, byFilter, true, perSampleIsEnabled);
-        HashMap> compBindings = bindVariantContexts(tracker, ref, compNames, allowableTypes, byFilter, false, false);
-
-        vcs.putAll(compBindings);
-        vcs.putAll(evalBindings);
+        bindVariantContexts(vcs, tracker, ref, evalNames, allowableTypes, byFilter, true, perSampleIsEnabled);
+        bindVariantContexts(vcs, tracker, ref, compNames, allowableTypes, byFilter, false, false);
 
         return vcs;
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
index b195fd35f..abe27e483 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
@@ -28,9 +28,9 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.utils.SampleUtils;
@@ -56,6 +56,11 @@ public class ApplyRecalibration extends RodWalker {
     /////////////////////////////
     // Inputs
     /////////////////////////////
+    /**
+     * The raw input variants to be recalibrated.
+     */
+    @Input(fullName="input", shortName = "input", doc="The raw input variants to be recalibrated", required=true)
+    public List> input;
     @Input(fullName="recal_file", shortName="recalFile", doc="The output recal file used by ApplyRecalibration", required=true)
     private File RECAL_FILE;
     @Input(fullName="tranches_file", shortName="tranchesFile", doc="The input tranches file describing where to cut the data", required=true)
@@ -101,17 +106,8 @@ public class ApplyRecalibration extends RodWalker {
         }
         Collections.reverse(tranches); // this algorithm wants the tranches ordered from best (lowest truth sensitivity) to worst (highest truth sensitivity)
 
-        for( final ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
-            if( d.getName().startsWith("input") ) {
-                inputNames.add(d.getName());
-                logger.info("Found input variant track with name " + d.getName());
-            } else {
-                logger.info("Not evaluating ROD binding " + d.getName());
-            }
-        }
-
-        if( inputNames.size() == 0 ) {
-            throw new UserException.BadInput( "No input variant tracks found. Input variant binding names must begin with 'input'." );
+        for( final RodBinding rod : input ) {
+            inputNames.add( rod.getName() );
         }
 
         if( IGNORE_INPUT_FILTERS != null ) {
@@ -168,7 +164,7 @@ public class ApplyRecalibration extends RodWalker {
             return 1;
         }
 
-        for( VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), true, false) ) {
+        for( VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
             if( vc != null ) {
                 if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
                     String filterString = null;
@@ -204,9 +200,9 @@ public class ApplyRecalibration extends RodWalker {
                         filters.add(filterString);
                         vc = VariantContext.modifyFilters(vc, filters);
                     }
-                    vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs), ref.getBase() );
+                    vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs) );
                 } else { // valid VC but not compatible with this mode, so just emit the variant untouched
-                    vcfWriter.add( vc, ref.getBase() );
+                    vcfWriter.add( vc );
                 }
             }
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java
deleted file mode 100755
index 6c1a7ddbc..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2011 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
-
-import org.apache.log4j.Logger;
-import org.broadinstitute.sting.commandline.Tags;
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: 3/12/11
- */
-
-public class TrainingSet {
-
-    public String name;
-    public boolean isKnown = false;
-    public boolean isTraining = false;
-    public boolean isAntiTraining = false;
-    public boolean isTruth = false;
-    public boolean isConsensus = false;
-    public double prior = 0.0;
-
-    protected final static Logger logger = Logger.getLogger(TrainingSet.class);
-
-    public TrainingSet( final String name, final Tags tags ) {
-        this.name = name;
-
-        // Parse the tags to decide which tracks have which properties
-        if( tags != null ) {
-            isKnown = tags.containsKey("known") && tags.getValue("known").equals("true");
-            isTraining = tags.containsKey("training") && tags.getValue("training").equals("true");
-            isAntiTraining = tags.containsKey("bad") && tags.getValue("bad").equals("true");
-            isTruth = tags.containsKey("truth") && tags.getValue("truth").equals("true");
-            isConsensus = tags.containsKey("consensus") && tags.getValue("consensus").equals("true");
-            prior = ( tags.containsKey("prior") ? Double.parseDouble(tags.getValue("prior")) : prior );
-        }
-
-        // Report back to the user which tracks were found and the properties that were detected
-        if( !isConsensus && !isAntiTraining ) {
-            logger.info( String.format( "Found %s track: \tKnown = %s \tTraining = %s \tTruth = %s \tPrior = Q%.1f", this.name, isKnown, isTraining, isTruth, prior) );
-        } else if( isConsensus ) {
-            logger.info( String.format( "Found consensus track: %s", this.name) );
-        } else {
-            logger.info( String.format( "Found bad sites training track: %s", this.name) );
-        }
-    }
-}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
index 67d54a408..cb4d94332 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
@@ -26,10 +26,10 @@
 package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
 
 import org.apache.log4j.Logger;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 
 /**
@@ -51,7 +52,6 @@ public class VariantDataManager {
     private final double[] meanVector;
     private final double[] varianceVector; // this is really the standard deviation
     public final ArrayList annotationKeys;
-    private final ExpandingArrayList trainingSets;
     private final VariantRecalibratorArgumentCollection VRAC;
     protected final static Logger logger = Logger.getLogger(VariantDataManager.class);
 
@@ -62,7 +62,6 @@ public class VariantDataManager {
         this.VRAC = VRAC;
         meanVector = new double[this.annotationKeys.size()];
         varianceVector = new double[this.annotationKeys.size()];
-        trainingSets = new ExpandingArrayList();
     }
 
     public void setData( final ExpandingArrayList data ) {
@@ -105,31 +104,6 @@ public class VariantDataManager {
         }
     }
 
-    public void addTrainingSet( final TrainingSet trainingSet ) {
-        trainingSets.add( trainingSet );
-    }
-
-    public boolean checkHasTrainingSet() {
-        for( final TrainingSet trainingSet : trainingSets ) {
-            if( trainingSet.isTraining ) { return true; }
-        }
-        return false;
-    }
-
-    public boolean checkHasTruthSet() {
-        for( final TrainingSet trainingSet : trainingSets ) {
-            if( trainingSet.isTruth ) { return true; }
-        }
-        return false;
-    }
-
-    public boolean checkHasKnownSet() {
-        for( final TrainingSet trainingSet : trainingSets ) {
-            if( trainingSet.isKnown ) { return true; }
-        }
-        return false;
-    }
-
     public ExpandingArrayList getTrainingData() {
         final ExpandingArrayList trainingData = new ExpandingArrayList();
         for( final VariantDatum datum : data ) {
@@ -240,6 +214,15 @@ public class VariantDataManager {
             if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM
                   value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
             }
+
+            if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) {
+            // normalize QD by event length for indel case
+                int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now
+                if (eventLength > 0) { // sanity check
+                    value /= (double)eventLength;
+                }
+            }
+
             if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
             if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
         } catch( Exception e ) {
@@ -249,30 +232,44 @@ public class VariantDataManager {
         return value;
     }
 
-    public void parseTrainingSets( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC ) {
+    public void parseTrainingSets( final RefMetaDataTracker tracker, final GenomeLoc genomeLoc, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC, final HashMap rodToPriorMap,
+                                   final List> training, final List> truth, final List> known, final List> badSites) {
         datum.isKnown = false;
         datum.atTruthSite = false;
         datum.atTrainingSite = false;
         datum.atAntiTrainingSite = false;
         datum.prior = 2.0;
-        datum.consensusCount = 0;
 
-        for( final TrainingSet trainingSet : trainingSets ) {
-            for( final VariantContext trainVC : tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), false, false ) ) {
-                if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
-                        ((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
-                        (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
-
-                    datum.isKnown = datum.isKnown || trainingSet.isKnown;
-                    datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth;
-                    datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining;
-                    datum.prior = Math.max( datum.prior, trainingSet.prior );
-                    datum.consensusCount += ( trainingSet.isConsensus ? 1 : 0 );
+        for( final RodBinding rod : training ) {
+            for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
+                if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
+                    datum.atTrainingSite = true;
+                    datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
                 }
+            }
+        }
+        for( final RodBinding rod : truth ) {
+            for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
+                if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
+                    datum.atTruthSite = true;
+                    datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
+                }
+            }
+        }
+        for( final RodBinding rod : known ) {
+            for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
+                if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
+                    datum.isKnown = true;
+                    datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
+                }
+            }
+        }
+        for( final RodBinding rod : badSites ) {
+            for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
                 if( trainVC != null ) {
-                    datum.atAntiTrainingSite = datum.atAntiTrainingSite || trainingSet.isAntiTraining;
+                    datum.atAntiTrainingSite = true;
+                    datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
                 }
-
             }
         }
     }
@@ -284,4 +281,10 @@ public class VariantDataManager {
                     (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL")));
         }
     }
+
+    private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
+        return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
+                        ((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
+                        (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic());
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
index 76c888640..da9da936b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@@ -25,13 +25,9 @@
 
 package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
 
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.ArgumentCollection;
-import org.broadinstitute.sting.commandline.Hidden;
-import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.gatk.walkers.TreeReducible;
@@ -57,11 +53,51 @@ import java.util.*;
 
 public class VariantRecalibrator extends RodWalker, ExpandingArrayList> implements TreeReducible> {
 
-    public static final String VQS_LOD_KEY = "VQSLOD";
-    public static final String CULPRIT_KEY = "culprit";
+    public static final String VQS_LOD_KEY = "VQSLOD"; // Log odds ratio of being a true variant versus being false under the trained gaussian mixture model
+    public static final String CULPRIT_KEY = "culprit"; // The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out
 
     @ArgumentCollection private VariantRecalibratorArgumentCollection VRAC = new VariantRecalibratorArgumentCollection();
 
+    /////////////////////////////
+    // Inputs
+    /////////////////////////////
+    /**
+     * The raw input variants to be recalibrated.
+     */
+    @Input(fullName="input", shortName = "input", doc="The raw input variants to be recalibrated", required=true)
+    public List> input;
+    /**
+     * A list of training variants used to train the Gaussian mixture model.
+     *
+     * Input variants which are found to overlap with these training sites are used to build the Gaussian mixture model.
+     */
+    @Input(fullName="training", shortName = "training", doc="A list of training variants used to train the Gaussian mixture model", required=true)
+    public List> training;
+    /**
+     * A list of true variants to be used when deciding the truth sensitivity cut of the final callset.
+     *
+     * When deciding where to set the cutoff in VQSLOD sensitivity to these truth sites is used.
+     * Typically one might want to say I dropped my threshold until I got back 99% of HapMap sites, for example.
+     */
+    @Input(fullName="truth", shortName = "truth", doc="A list of true variants to be used when deciding the truth sensitivity cut of the final callset", required=true)
+    public List> truth;
+    /**
+     * A list of known variants to be used for metric comparison purposes.
+     *
+     * The known / novel status of a variant isn't used by the algorithm itself and is only used for reporting / display purposes.
+     * The output metrics are stratified by known status in order to aid in comparisons with other call sets.
+     */
+    @Input(fullName="known", shortName = "known", doc="A list of known variants to be used for metric comparison purposes", required=false)
+    public List> known = Collections.emptyList();
+    /**
+     * A list of known bad variants used to supplement training the negative model.
+     *
+     * In addition to using the worst 3% of variants as compared to the Gaussian mixture model, we can also supplement the list
+     * with a database of known bad variants. Maybe these are loci which are frequently filtered out in many projects (centromere, for example).
+     */
+    @Input(fullName="badSites", shortName = "badSites", doc="A list of known bad variants used to supplement training the negative model", required=false)
+    public List> badSites = Collections.emptyList();
+
     /////////////////////////////
     // Outputs
     /////////////////////////////
@@ -96,9 +132,9 @@ public class VariantRecalibrator extends RodWalker ignoreInputFilterSet = new TreeSet();
-    private final Set inputNames = new HashSet();
     private final VariantRecalibratorEngine engine = new VariantRecalibratorEngine( VRAC );
+    private final HashMap rodToPriorMap = new HashMap();
 
     //---------------------------------------------------------------------------------------------------------------
     //
@@ -123,31 +159,24 @@ public class VariantRecalibrator extends RodWalker> allInputBindings = new ArrayList>();
+        allInputBindings.addAll(truth);
+        allInputBindings.addAll(training);
+        allInputBindings.addAll(known);
+        allInputBindings.addAll(badSites);
+        for( final RodBinding rod : allInputBindings ) {
+            try {
+                rodToPriorMap.put(rod.getName(), (rod.getTags().containsKey("prior") ? Double.parseDouble(rod.getTags().getValue("prior")) : 0.0) );
+            } catch( NumberFormatException e ) {
+                throw new UserException.BadInput("Bad rod binding syntax. Prior key-value tag detected but isn't parsable. Expecting something like -training:prior=12.0 my.set.vcf");
+            }
+        }
     }
 
     //---------------------------------------------------------------------------------------------------------------
@@ -163,10 +192,12 @@ public class VariantRecalibrator extends RodWalker {
+    /**
+     * The VCF files to merge together
+     *
+     * variants can take any number of arguments on the command line.  Each -V argument
+     * will be included in the final merged output VCF.  If no explicit name is provided,
+     * the -V arguments will be named using the default algorithm: variants, variants2, variants3, etc.
+     * The user can override this by providing an explicit name -V:name,vcf for each -V argument,
+     * and each named argument will be labeled as such in the output (i.e., set=name rather than
+     * set=variants2).  The order of arguments does not matter unless except for the naming, so
+     * if you provide an rod priority list and no explicit names than variants, variants2, etc
+     * are techincally order dependent.  It is strongly recommended to provide explicit names when
+     * a rod priority list is provided.
+     */
+    @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
+    public List> variants;
 
     @Output(doc="File to which variants should be written",required=true)
     protected VCFWriter vcfWriter = null;
@@ -87,10 +97,6 @@ public class CombineVariants extends RodWalker {
     @Argument(fullName="minimumN", shortName="minN", doc="Combine variants and output site only if variant is present in at least N input files.", required=false)
     public int minimumN = 1;
 
-    @Hidden
-    @Argument(fullName="masterMerge", shortName="master", doc="Master merge mode -- experts only.  You need to look at the code to understand it", required=false)
-    public boolean master = false;
-
     @Hidden
     @Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false)
     public boolean MERGE_INFO_WITH_MAX_AC = false;
@@ -150,7 +156,7 @@ public class CombineVariants extends RodWalker {
 
         // get all of the vcf rods at this locus
         // Need to provide reference bases to simpleMerge starting at current locus
-        Collection vcs = tracker.getAllVariantContexts(ref, null, context.getLocation(), true, false);
+        Collection vcs = tracker.getValues(variants, context.getLocation());
 
         if ( sitesOnlyVCF ) {
             vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs);
@@ -158,7 +164,7 @@ public class CombineVariants extends RodWalker {
 
         if ( ASSUME_IDENTICAL_SAMPLES ) {
             for ( final VariantContext vc : vcs ) {
-                vcfWriter.add( vc, ref.getBase() );
+                vcfWriter.add(vc);
             }
             
             return vcs.isEmpty() ? 0 : 1;
@@ -174,17 +180,13 @@ public class CombineVariants extends RodWalker {
             return 0;
         
         List mergedVCs = new ArrayList();
-        if ( master ) {
-            mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master"));
-        } else {
-            Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
-            // iterate over the types so that it's deterministic
-            for ( VariantContext.Type type : VariantContext.Type.values() ) {
-                if ( VCsByType.containsKey(type) )
-                    mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
-                            priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
-                            ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
-            }
+        Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
+        // iterate over the types so that it's deterministic
+        for ( VariantContext.Type type : VariantContext.Type.values() ) {
+            if ( VCsByType.containsKey(type) )
+                mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
+                        priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
+                        SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
         }
 
         for ( VariantContext mergedVC : mergedVCs ) {
@@ -198,7 +200,7 @@ public class CombineVariants extends RodWalker {
             VariantContext annotatedMergedVC = VariantContext.modifyAttributes(mergedVC, attributes);
             if ( minimalVCF )
                 annotatedMergedVC = VariantContextUtils.pruneVariantContext(annotatedMergedVC, Arrays.asList(SET_KEY));
-            vcfWriter.add(annotatedMergedVC, ref.getBase());
+            vcfWriter.add(annotatedMergedVC);
         }
 
         return vcs.isEmpty() ? 0 : 1;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java
index b45ee1b67..4c2222f3a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java
@@ -24,7 +24,9 @@
 
 package org.broadinstitute.sting.gatk.walkers.variantutils;
 
+import org.broadinstitute.sting.commandline.ArgumentCollection;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@@ -44,9 +46,11 @@ import java.util.Set;
  * Filters a lifted-over VCF file for ref bases that have been changed.
  */
 @Reference(window=@Window(start=0,stop=100))
-@Requires(value={},referenceMetaData=@RMD(name="variant",type= VariantContext.class))
 public class FilterLiftedVariants extends RodWalker {
 
+    @ArgumentCollection
+    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
+
     private static final int MAX_VARIANT_SIZE = 100;
 
     @Output(doc="File to which variants should be written",required=true)
@@ -55,10 +59,11 @@ public class FilterLiftedVariants extends RodWalker {
     private long failedLocs = 0, totalLocs = 0;
 
     public void initialize() {
-        Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
-        Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
+        String trackName = variantCollection.variants.getName();
+        Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
+        Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
 
-        final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
+        final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaData() : null, samples);
         writer.writeHeader(vcfHeader);
     }
 
@@ -78,14 +83,14 @@ public class FilterLiftedVariants extends RodWalker {
         if ( failed )
             failedLocs++;
         else
-            writer.add(vc, ref[0]);
+            writer.add(vc);
     }
 
     public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
         if ( tracker == null )
             return 0;
 
-        Collection VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false);
+        Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation());
         for ( VariantContext vc : VCs )
             filterAndWrite(ref.getBases(), vc);
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
index 2ebd183f4..c47a015c6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
@@ -28,7 +28,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
 import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
+import org.broadinstitute.sting.commandline.ArgumentCollection;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@@ -46,19 +48,22 @@ import java.util.*;
  * Left-aligns indels from a variants file.
  */
 @Reference(window=@Window(start=-200,stop=200))
-@Requires(value={},referenceMetaData=@RMD(name="variant", type=VariantContext.class))
 public class LeftAlignVariants extends RodWalker {
 
+    @ArgumentCollection
+    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
+
     @Output(doc="File to which variants should be written",required=true)
     protected VCFWriter baseWriter = null;
 
     private SortingVCFWriter writer;
 
     public void initialize() {
-        Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
-        Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
+        String trackName = variantCollection.variants.getName();
+        Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
+        Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
 
-        Set headerLines = vcfHeaders.get("variant").getMetaData();
+        Set headerLines = vcfHeaders.get(trackName).getMetaData();
         baseWriter.writeHeader(new VCFHeader(headerLines, samples));
 
         writer = new SortingVCFWriter(baseWriter, 200);
@@ -68,7 +73,7 @@ public class LeftAlignVariants extends RodWalker {
         if ( tracker == null )
             return 0;
 
-        Collection VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false);
+        Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation());
 
         int changedSites = 0;
         for ( VariantContext vc : VCs )
@@ -90,10 +95,10 @@ public class LeftAlignVariants extends RodWalker {
 
 
     private int alignAndWrite(VariantContext vc, final ReferenceContext ref) {
-        if ( vc.isBiallelic() && vc.isIndel() )
+        if ( vc.isBiallelic() && vc.isIndel() && !vc.isComplexIndel() )
             return writeLeftAlignedIndel(vc, ref);
         else {
-            writer.add(vc, ref.getBase());
+            writer.add(vc);
             return 0;
         }
     }
@@ -109,7 +114,7 @@ public class LeftAlignVariants extends RodWalker {
             indelLength = vc.getAlternateAllele(0).length();
 
         if ( indelLength > 200 ) {
-            writer.add(vc, ref.getBase());
+            writer.add(vc);
             return 0;
         }
 
@@ -137,17 +142,12 @@ public class LeftAlignVariants extends RodWalker {
             byte[] newBases = new byte[indelLength];
             System.arraycopy((vc.isDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
             Allele newAllele = Allele.create(newBases, vc.isDeletion());
-            newVC = updateAllele(newVC, newAllele);
+            newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]);
 
-	    // we need to update the reference base just in case it changed
-	    Map attrs = new HashMap(newVC.getAttributes());
-	    attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refSeq[indelIndex-1]);
-	    newVC = VariantContext.modifyAttributes(newVC, attrs);
-
-            writer.add(newVC, refSeq[indelIndex-1]);
+            writer.add(newVC);
             return 1;
         } else {
-            writer.add(vc, ref.getBase());
+            writer.add(vc);
             return 0;
         }
     }
@@ -173,7 +173,7 @@ public class LeftAlignVariants extends RodWalker {
         return hap;
     }
 
-    public static VariantContext updateAllele(VariantContext vc, Allele newAllele) {
+    public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) {
         // create a mapping from original allele to new allele
         HashMap alleleMap = new HashMap(vc.getAlleles().size());
         if ( newAllele.isReference() ) {
@@ -197,6 +197,6 @@ public class LeftAlignVariants extends RodWalker {
             newGenotypes.put(genotype.getKey(), Genotype.modifyAlleles(genotype.getValue(), newAlleles));
         }
 
-        return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
+        return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), refBaseForIndel);
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java
index 4f05c8aac..1c76a21ea 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java
@@ -29,13 +29,11 @@ import net.sf.picard.liftover.LiftOver;
 import net.sf.picard.util.Interval;
 import net.sf.samtools.SAMFileHeader;
 import net.sf.samtools.SAMFileReader;
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.RMD;
-import org.broadinstitute.sting.gatk.walkers.Requires;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
@@ -49,9 +47,11 @@ import java.util.*;
 /**
  * Lifts a VCF file over from one build to another.  Note that the resulting VCF could be mis-sorted.
  */
-@Requires(value={},referenceMetaData=@RMD(name="variant", type=VariantContext.class))
 public class LiftoverVariants extends RodWalker {
 
+    @ArgumentCollection
+    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
+
     @Output(doc="File to which variants should be written",required=true)
     protected File file = null;
     protected StandardVCFWriter writer = null;
@@ -85,12 +85,13 @@ public class LiftoverVariants extends RodWalker {
             throw new UserException.BadInput("the chain file you are using is not compatible with the reference you are trying to lift over to; please use the appropriate chain file for the given reference");    
         }
 
-        Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
-        Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
+        String trackName = variantCollection.variants.getName();
+        Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
+        Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
 
         Set metaData = new HashSet();
-        if ( vcfHeaders.containsKey("variant") )
-            metaData.addAll(vcfHeaders.get("variant").getMetaData());
+        if ( vcfHeaders.containsKey(trackName) )
+            metaData.addAll(vcfHeaders.get(trackName).getMetaData());
         if ( RECORD_ORIGINAL_LOCATION ) {
             metaData.add(new VCFInfoHeaderLine("OriginalChr", 1, VCFHeaderLineType.String, "Original contig name for the record"));
             metaData.add(new VCFInfoHeaderLine("OriginalStart", 1, VCFHeaderLineType.Integer, "Original start position for the record"));
@@ -125,14 +126,14 @@ public class LiftoverVariants extends RodWalker {
                 vc = VariantContext.modifyAttributes(vc, attrs);
             }
 
-            VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, ref.getBase(), false);
+            VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, false);
             if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
                 logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
                         originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
                         originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0)));
             }
 
-            writer.add(vc, ref.getBase());
+            writer.add(vc);
             successfulIntervals++;
         } else {
             failedIntervals++;
@@ -143,7 +144,7 @@ public class LiftoverVariants extends RodWalker {
         if ( tracker == null )
             return 0;
 
-        Collection VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false);
+        Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation());
         for ( VariantContext vc : VCs )
             convertAndWrite(vc, ref);
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java
index f0756d884..1fefd20fc 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java
@@ -24,14 +24,12 @@
 
 package org.broadinstitute.sting.gatk.walkers.variantutils;
 
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.RMD;
-import org.broadinstitute.sting.gatk.walkers.Requires;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
@@ -39,17 +37,16 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 
 import java.io.File;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Set;
+import java.util.*;
 
 /**
  * Takes a VCF file, randomly splits variants into two different sets, and outputs 2 new VCFs with the results.
  */
-@Requires(value={},referenceMetaData=@RMD(name="variant", type=VariantContext.class))
 public class RandomlySplitVariants extends RodWalker {
 
+    @ArgumentCollection
+    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
+
     @Output(fullName="out1", shortName="o1", doc="File #1 to which variants should be written", required=true)
     protected VCFWriter vcfWriter1 = null;
 
@@ -61,8 +58,6 @@ public class RandomlySplitVariants extends RodWalker {
     @Argument(fullName="fractionToOut1", shortName="fraction", doc="Fraction of records to be placed in out1 (must be 0 >= fraction <= 1); all other records are placed in out2", required=false)
     protected double fraction = 0.5;
 
-    protected static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant";
-
     protected int iFraction;
 
     /**
@@ -74,8 +69,7 @@ public class RandomlySplitVariants extends RodWalker {
         iFraction = (int)(fraction * 1000.0);
 
         // setup the header info
-        final ArrayList inputNames = new ArrayList();
-        inputNames.add( INPUT_VARIANT_ROD_BINDING_NAME );
+        final List inputNames = Arrays.asList(variantCollection.variants.getName());
         Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames);
         Set hInfo = new HashSet();
         hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
@@ -97,13 +91,13 @@ public class RandomlySplitVariants extends RodWalker {
         if ( tracker == null )
             return 0;
 
-        Collection vcs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, null, context.getLocation(), true, false);
+        Collection vcs = tracker.getValues(variantCollection.variants, context.getLocation());
         for ( VariantContext vc : vcs ) {
             int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000);
             if ( random < iFraction )
-                vcfWriter1.add(vc, ref.getBase());
+                vcfWriter1.add(vc);
             else
-                vcfWriter2.add(vc, ref.getBase());
+                vcfWriter2.add(vc);
         }
 
         return 1;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
index e1a3659b8..c45ff280b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
@@ -24,47 +24,63 @@
 
 package org.broadinstitute.sting.gatk.walkers.variantutils;
 
-import org.broadinstitute.sting.commandline.Hidden;
-import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.text.XReadLines;
-import org.broadinstitute.sting.utils.variantcontext.*;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.MendelianViolation;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.commandline.Output;
-import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.RMD;
-import org.broadinstitute.sting.gatk.walkers.Requires;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.MendelianViolation;
 import org.broadinstitute.sting.utils.SampleUtils;
-import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
 
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.PrintStream;
-import java.lang.annotation.AnnotationFormatError;
 import java.util.*;
 
 /**
  * Takes a VCF file, selects variants based on sample(s) in which it was found and/or on various annotation criteria,
  * recompute the value of certain annotations based on the new sample set, and output a new VCF with the results.
  */
-@Requires(value={},referenceMetaData=@RMD(name="variant", type=VariantContext.class))
 public class SelectVariants extends RodWalker {
+    /**
+     * The VCF file we are selecting variants from.
+     *
+     * Variants from this file are sent through the filtering and modifying routines as directed
+     * by the arguments to SelectVariants, and finally are emitted.
+     */
+    @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
+
+    /**
+     * If provided, we will filter out variants that are "discordant" to the variants in this file
+     *
+     * A site is considered discordant if there exists some sample in eval that has a non-reference genotype
+     * and either the site isn't present in this track, the sample isn't present in this track,
+     * or the sample is called reference in this track.
+     */
+    @Input(fullName="discordance", shortName = "disc", doc="Output variants that were not called in this Feature comparison track", required=false)
+    private RodBinding discordanceTrack = RodBinding.makeUnbound(VariantContext.class);
+
+    /**
+     * If provided, we will filter out any variant in variants that isn't "concordant" with the variants in this track.
+     *
+     * A site is considered concordant if (1) we are not looking for specific samples and there is a variant called
+     * in both variants and concordance tracks or (2) every sample present in eval is present in the concordance
+     * track and they have the sample genotype call.
+     */
+    @Input(fullName="concordance", shortName = "conc", doc="Output variants that were also called in this Feature comparison track", required=false)
+    private RodBinding concordanceTrack = RodBinding.makeUnbound(VariantContext.class);
 
     @Output(doc="File to which variants should be written",required=true)
     protected VCFWriter vcfWriter = null;
@@ -90,16 +106,6 @@ public class SelectVariants extends RodWalker {
     @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't include filtered loci.", required=false)
     private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
 
-    @Argument(fullName="discordance", shortName =  "disc", doc="Output variants that were not called on a ROD comparison track. Use -disc ROD_NAME", required=false)
-    private String discordanceRodName = "";
-
-    @Argument(fullName="concordance", shortName =  "conc", doc="Output variants that were also called on a ROD comparison track. Use -conc ROD_NAME", required=false)
-    private String concordanceRodName = "";
-
-    @Hidden
-    @Argument(fullName="inputAF", shortName =  "inputAF", doc="", required=false)
-    private String inputAFRodName = "";
-
     @Hidden
     @Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false)
     private boolean KEEP_AF_SPECTRUM = false;
@@ -140,16 +146,13 @@ public class SelectVariants extends RodWalker {
     /* Private class used to store the intermediate variants in the integer random selection process */
     private class RandomVariantStructure {
         private VariantContext vc;
-        private byte refBase;
 
-        RandomVariantStructure(VariantContext vcP, byte refBaseP) {
+        RandomVariantStructure(VariantContext vcP) {
             vc = vcP;
-            refBase = refBaseP;
         }
 
-        public void set (VariantContext vcP, byte refBaseP) {
+        public void set (VariantContext vcP) {
             vc = vcP;
-            refBase = refBaseP;
         }
 
     }
@@ -165,9 +168,6 @@ public class SelectVariants extends RodWalker {
 
     private Set mvSet = new HashSet();
 
-    /* default name for the variant dataset (VCF) */
-    private final String variantRodName = "variant";
-
 
     /* variables used by the SELECT RANDOM modules */
     private boolean SELECT_RANDOM_NUMBER = false;
@@ -192,8 +192,7 @@ public class SelectVariants extends RodWalker {
      */
     public void initialize() {
         // Get list of samples to include in the output
-        ArrayList rodNames = new ArrayList();
-        rodNames.add(variantRodName);
+        List rodNames = Arrays.asList(variantCollection.variants.getName());
 
         Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
         TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
@@ -235,11 +234,11 @@ public class SelectVariants extends RodWalker {
         jexls = VariantContextUtils.initializeMatchExps(selectNames, SELECT_EXPRESSIONS);
 
         // Look at the parameters to decide which analysis to perform
-        DISCORDANCE_ONLY = discordanceRodName.length() > 0;
-        if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceRodName);
+        DISCORDANCE_ONLY = discordanceTrack.isBound();
+        if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceTrack.getName());
 
-        CONCORDANCE_ONLY = concordanceRodName.length() > 0;
-        if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceRodName);
+        CONCORDANCE_ONLY = concordanceTrack.isBound();
+        if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
 
         if (MENDELIAN_VIOLATIONS) {
             if ( FAMILY_STRUCTURE_FILE != null) {
@@ -317,7 +316,7 @@ public class SelectVariants extends RodWalker {
         if ( tracker == null )
             return 0;
 
-        Collection vcs = tracker.getVariantContexts(ref, variantRodName, null, context.getLocation(), true, false);
+        Collection vcs = tracker.getValues(variantCollection.variants, context.getLocation());
 
         if ( vcs == null || vcs.size() == 0) {
             return 0;
@@ -345,12 +344,12 @@ public class SelectVariants extends RodWalker {
                     break;
             }
             if (DISCORDANCE_ONLY) {
-                Collection compVCs = tracker.getVariantContexts(ref, discordanceRodName, null, context.getLocation(), true, false);
+                Collection compVCs = tracker.getValues(discordanceTrack, context.getLocation());
                 if (!isDiscordant(vc, compVCs))
                     return 0;
             }
             if (CONCORDANCE_ONLY) {
-                Collection compVCs = tracker.getVariantContexts(ref, concordanceRodName, null, context.getLocation(), true, false);
+                Collection compVCs = tracker.getValues(concordanceTrack, context.getLocation());
                 if (!isConcordant(vc, compVCs))
                     return 0;
             }
@@ -374,7 +373,7 @@ public class SelectVariants extends RodWalker {
                     randomlyAddVariant(++variantNumber, sub, ref.getBase());
                 }
                 else if (!SELECT_RANDOM_FRACTION || (!KEEP_AF_SPECTRUM && GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
-                    vcfWriter.add(sub, ref.getBase());
+                    vcfWriter.add(sub);
                 }
                 else {
                     if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) {
@@ -422,7 +421,7 @@ public class SelectVariants extends RodWalker {
 
                             //System.out.format("%s .. %4.4f\n",afo.toString(), af);
                             if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * afBoost *   afBoost)
-                                vcfWriter.add(sub, ref.getBase());
+                                vcfWriter.add(sub);
                         }
 
 
@@ -529,7 +528,7 @@ public class SelectVariants extends RodWalker {
         if (SELECT_RANDOM_NUMBER) {
             int positionToPrint = positionToAdd;
             for (int i=0; i {
 
         VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles());
 
+        // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
+        if ( vc.getAlleles().size() != sub.getAlleles().size() )
+            sub = VariantContext.modifyGenotypes(sub, VariantContextUtils.stripPLs(vc.getGenotypes()));
+
         HashMap attributes = new HashMap(sub.getAttributes());
 
         int depth = 0;
@@ -592,13 +595,13 @@ public class SelectVariants extends RodWalker {
 
     private void randomlyAddVariant(int rank, VariantContext vc, byte refBase) {
         if (nVariantsAdded < numRandom)
-            variantArray[nVariantsAdded++] = new RandomVariantStructure(vc, refBase);
+            variantArray[nVariantsAdded++] = new RandomVariantStructure(vc);
 
         else {
             double v = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
             double t = (1.0/(rank-numRandom+1));
             if ( v < t) {
-                variantArray[positionToAdd].set(vc, refBase);
+                variantArray[positionToAdd].set(vc);
                 nVariantsAdded++;
                 positionToAdd = nextCircularPosition(positionToAdd);
             }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
index 0644c669b..5c7fb268c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
@@ -25,15 +25,16 @@
 
 package org.broadinstitute.sting.gatk.walkers.variantutils;
 
+import org.broad.tribble.Feature;
 import org.broad.tribble.TribbleException;
 import org.broad.tribble.dbsnp.DbSNPFeature;
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Hidden;
+import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
+import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
+import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
@@ -50,10 +51,13 @@ import java.util.Set;
  * Validates a variants file.
  */
 @Reference(window=@Window(start=0,stop=100))
-@Requires(value={},referenceMetaData=@RMD(name=ValidateVariants.TARGET_ROD_NAME, type=VariantContext.class))
 public class ValidateVariants extends RodWalker {
 
-    protected static final String TARGET_ROD_NAME = "variant";
+    @ArgumentCollection
+    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
+
+    @ArgumentCollection
+    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
 
     public enum ValidationType {
         ALL, REF, IDS, ALLELES, CHR_COUNTS
@@ -74,19 +78,14 @@ public class ValidateVariants extends RodWalker {
     private File file = null;
 
     public void initialize() {
-        for ( ReferenceOrderedDataSource source : getToolkit().getRodDataSources() ) {
-            if ( source.getName().equals(TARGET_ROD_NAME) ) {
-                file = source.getFile();
-                break;
-            }
-        }
+        file = new File(variantCollection.variants.getSource());
     }
 
     public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
         if ( tracker == null )
             return 0;
 
-        Collection VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false);
+        Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation());
         for ( VariantContext vc : VCs )
             validate(vc, tracker, ref);
 
@@ -142,22 +141,24 @@ public class ValidateVariants extends RodWalker {
 
         // get the RS IDs
         Set rsIDs = null;
-        if ( tracker.hasROD(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
-            List dbsnpList = tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME);
+        if ( tracker.hasValues(dbsnp.dbsnp) ) {
+            List dbsnpList = tracker.getValues(dbsnp.dbsnp, ref.getLocus());
             rsIDs = new HashSet();
             for ( Object d : dbsnpList ) {
                 if (d instanceof DbSNPFeature )
                     rsIDs.add(((DbSNPFeature)d).getRsID());
+                else if (d instanceof VariantContext )
+                    rsIDs.add(((VariantContext)d).getID());
             }
         }
 
         try {
             switch( type ) {
                 case ALL:
-                    vc.extraStrictValidation(observedRefAllele, rsIDs);
+                    vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs);
                     break;
                 case REF:
-                    vc.validateReferenceBases(observedRefAllele);
+                    vc.validateReferenceBases(observedRefAllele, ref.getBase());
                     break;
                 case IDS:
                     vc.validateRSIDs(rsIDs);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
index 86bb3b0e8..6ed0bbd16 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
@@ -26,7 +26,9 @@
 package org.broadinstitute.sting.gatk.walkers.variantutils;
 
 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@@ -34,7 +36,6 @@ import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
-import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@@ -45,10 +46,9 @@ import java.util.*;
  * Converts Sequenom files to a VCF annotated with QC metrics (HW-equilibrium, % failed probes)
  */
 @Reference(window=@Window(start=0,stop=40))
-@Requires(value={},referenceMetaData=@RMD(name=VariantValidationAssessor.INPUT_VARIANT_ROD_BINDING_NAME, type=VariantContext.class))
-public class VariantValidationAssessor extends RodWalker,Integer> {
-
-    public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant";
+public class VariantValidationAssessor extends RodWalker {
+    @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true)
+    public RodBinding variants;
 
     @Output(doc="File to which variants should be written",required=true)
     protected VCFWriter vcfwriter = null;
@@ -68,7 +68,7 @@ public class VariantValidationAssessor extends RodWalker sampleNames = null;
 
     // variant context records
-    private ArrayList> records = new ArrayList>();
+    private ArrayList records = new ArrayList();
 
     // statistics
     private int numRecords = 0;
@@ -89,11 +89,11 @@ public class VariantValidationAssessor extends RodWalker map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+    public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
         if ( tracker == null )
             return null;
 
-        VariantContext vc = tracker.getVariantContext(ref, INPUT_VARIANT_ROD_BINDING_NAME, ref.getLocus());
+        VariantContext vc = tracker.getFirstValue(variants, ref.getLocus());
         // ignore places where we don't have a variant
         if ( vc == null )
             return null;
@@ -104,7 +104,7 @@ public class VariantValidationAssessor extends RodWalker call, Integer numVariants) {
+    public Integer reduce(VariantContext call, Integer numVariants) {
         if ( call != null ) {
             numVariants++;
             records.add(call);
@@ -113,8 +113,7 @@ public class VariantValidationAssessor extends RodWalker inputNames = new ArrayList();
-        inputNames.add( INPUT_VARIANT_ROD_BINDING_NAME );
+        final List inputNames = Arrays.asList(variants.getName());
 
         // setup the header fields
         Set hInfo = new HashSet();
@@ -155,12 +154,12 @@ public class VariantValidationAssessor extends RodWalker record : records )
-            vcfwriter.add(record.first, record.second);
+        for ( VariantContext record : records )
+            vcfwriter.add(record);
     }
 
 
-    private Pair addVariantInformationToCall(ReferenceContext ref, VariantContext vContext) {
+    private VariantContext addVariantInformationToCall(ReferenceContext ref, VariantContext vContext) {
 
         // check possible filters
         double hwPvalue = hardyWeinbergCalculation(vContext);
@@ -202,9 +201,7 @@ public class VariantValidationAssessor extends RodWalker(vContext, ref.getBase());
+        return VariantContext.modifyAttributes(vContext, infoMap);
     }
 
     private double hardyWeinbergCalculation(VariantContext vc) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
index 39358dad5..af3593ce4 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
@@ -24,14 +24,13 @@
 
 package org.broadinstitute.sting.gatk.walkers.variantutils;
 
+import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.Requires;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -43,8 +42,11 @@ import java.util.*;
 /**
  * Emits specific fields as dictated by the user from one or more VCF files.
  */
-@Requires(value={})
 public class VariantsToTable extends RodWalker {
+
+    @ArgumentCollection
+    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
+
     @Output(doc="File to which results should be written",required=true)
     protected PrintStream out;
 
@@ -78,8 +80,8 @@ public class VariantsToTable extends RodWalker {
         getters.put("REF", new Getter() {
             public String get(VariantContext vc) {
                 String x = "";
-                if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) {
-                    Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY));
+                if ( vc.hasReferenceBaseForIndel() ) {
+                    Byte refByte = vc.getReferenceBaseForIndel();
                     x=x+new String(new byte[]{refByte});
                 }
                 return x+vc.getReference().getDisplayString();
@@ -90,8 +92,8 @@ public class VariantsToTable extends RodWalker {
                 StringBuilder x = new StringBuilder();
                 int n = vc.getAlternateAlleles().size();
                 if ( n == 0 ) return ".";
-                if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) {
-                    Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY));
+                if ( vc.hasReferenceBaseForIndel() ) {
+                    Byte refByte = vc.getReferenceBaseForIndel();
                     x.append(new String(new byte[]{refByte}));                    
                 }
 
@@ -132,8 +134,7 @@ public class VariantsToTable extends RodWalker {
             return 0;
 
         if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) {
-            Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation());
-            for ( VariantContext vc : vcs) {
+            for ( VariantContext vc : tracker.getValues(variantCollection.variants, context.getLocation())) {
                 if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) {
                     List vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA);
                     out.println(Utils.join("\t", vals));
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
index aa0e5987f..f9e9562ca 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
@@ -26,20 +26,21 @@
 package org.broadinstitute.sting.gatk.walkers.variantutils;
 
 import net.sf.samtools.util.CloseableIterator;
-import org.broad.tribble.dbsnp.DbSNPCodec;
-import org.broad.tribble.dbsnp.DbSNPFeature;
+import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
-import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
+import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
 import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
-import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
@@ -49,31 +50,34 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
 
+import java.io.File;
 import java.util.*;
 
 /**
  * Converts variants from other file formats to VCF format.
  */
-@Requires(value={},referenceMetaData=@RMD(name=VariantsToVCF.INPUT_ROD_NAME, type=VariantContext.class))
 @Reference(window=@Window(start=-40,stop=40))
 public class VariantsToVCF extends RodWalker {
 
     @Output(doc="File to which variants should be written",required=true)
     protected VCFWriter baseWriter = null;
-    private SortingVCFWriter vcfwriter; // needed because hapmap indel records move
+    private SortingVCFWriter vcfwriter; // needed because hapmap/dbsnp indel records move
 
-    public static final String INPUT_ROD_NAME = "variant";
+    @Input(fullName="variant", shortName = "V", doc="Input variant file", required=true)
+    public RodBinding variants;
+
+    @Input(fullName="dbsnp", shortName = "D", doc="dbSNP VCF for populating rsIDs", required=false)
+    public RodBinding dbsnp;
 
     @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false)
     protected String sampleName = null;
 
+    @Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false)
+    protected boolean fixReferenceBase = false;
+
     private Set allowedGenotypeFormatStrings = new HashSet();
     private boolean wroteHeader = false;
 
-    // Don't allow mixed types for now
-    private EnumSet ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP,
-            VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL, VariantContext.Type.MNP);
-
     // for dealing with indels in hapmap
     CloseableIterator dbsnpIterator = null;
 
@@ -85,7 +89,7 @@ public class VariantsToVCF extends RodWalker {
         if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
             return 0;
 
-        String rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
+        String rsID = dbsnp == null ? null : DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(dbsnp, context.getLocation()));
 
         Collection contexts = getVariantContexts(tracker, ref);
 
@@ -97,108 +101,96 @@ public class VariantsToVCF extends RodWalker {
             }
 
             // set the appropriate sample name if necessary
-            if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(INPUT_ROD_NAME) ) {
-                Genotype g = Genotype.modifyName(vc.getGenotype(INPUT_ROD_NAME), sampleName);
+            if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) {
+                Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName);
                 Map genotypes = new HashMap();
                 genotypes.put(sampleName, g);
                 vc = VariantContext.modifyGenotypes(vc, genotypes);
             }
 
-            writeRecord(vc, tracker, ref.getBase());
+            if ( fixReferenceBase ) {
+                vc = VariantContext.modifyReferencePadding(vc, ref.getBase());
+            }
+
+            writeRecord(vc, tracker, ref.getLocus());
         }
 
         return 1;
     }
 
     private Collection getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref) {
-        // we need to special case the HapMap format because indels aren't handled correctly
-        List features = tracker.getReferenceMetaData(INPUT_ROD_NAME, true);
-        if ( features.size() > 0 && features.get(0) instanceof HapMapFeature ) {
-            ArrayList hapmapVCs = new ArrayList(features.size());
-            for ( Object feature : features ) {
-                HapMapFeature hapmap = (HapMapFeature)feature;
-                Byte refBase = null;
 
-                // if it's an indel, we need to figure out the alleles
-                if ( hapmap.getAlleles()[0].equals("-") ) {
-                    Map alleleMap = new HashMap(2);
+        List features = tracker.getValues(variants, ref.getLocus());
+        List VCs = new ArrayList(features.size());
 
-                    // get the dbsnp object corresponding to this record, so we can learn whether this is an insertion or deletion
-                    DbSNPFeature dbsnp = getDbsnpFeature(hapmap.getName());
-                    if ( dbsnp == null || dbsnp.getVariantType().equalsIgnoreCase("mixed") )
-                        continue;
+        for ( Feature record : features ) {
+            if ( VariantContextAdaptors.canBeConvertedToVariantContext(record) ) {
+                // we need to special case the HapMap format because indels aren't handled correctly
+                if ( record instanceof HapMapFeature) {
 
-                    boolean isInsertion = dbsnp.getVariantType().equalsIgnoreCase("insertion");
+                    // is it an indel?
+                    HapMapFeature hapmap = (HapMapFeature)record;
+                    if ( hapmap.getAlleles()[0].equals(HapMapFeature.NULL_ALLELE_STRING) || hapmap.getAlleles()[1].equals(HapMapFeature.NULL_ALLELE_STRING) ) {
+                        // get the dbsnp object corresponding to this record (needed to help us distinguish between insertions and deletions)
+                        VariantContext dbsnpVC = getDbsnp(hapmap.getName());
+                        if ( dbsnpVC == null || dbsnpVC.isMixed() )
+                            continue;
 
-                    alleleMap.put(HapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, isInsertion));
-                    alleleMap.put(HapMapFeature.INSERTION, Allele.create(hapmap.getAlleles()[1], !isInsertion));
-                    hapmap.setActualAlleles(alleleMap);
+                        Map alleleMap = new HashMap(2);
+                        alleleMap.put(HapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isInsertion()));
+                        alleleMap.put(HapMapFeature.INSERTION, Allele.create(((HapMapFeature)record).getAlleles()[1], !dbsnpVC.isInsertion()));
+                        hapmap.setActualAlleles(alleleMap);
 
-                    // also, use the correct positioning for insertions
-                    if ( isInsertion )
-                        hapmap.updatePosition(dbsnp.getStart());                        
-                    else
-                        hapmap.updatePosition(dbsnp.getStart() - 1);
+                        // also, use the correct positioning for insertions
+                        hapmap.updatePosition(dbsnpVC.getStart());
 
-                    if ( hapmap.getStart() < ref.getWindow().getStart() ) {
-                        logger.warn("Hapmap record at " + ref.getLocus() + " represents an indel too large to be converted; skipping...");
-                        continue;
+                        if ( hapmap.getStart() < ref.getWindow().getStart() ) {
+                            logger.warn("Hapmap record at " + ref.getLocus() + " represents an indel too large to be converted; skipping...");
+                            continue;
+                        }
                     }
-                    refBase = ref.getBases()[hapmap.getStart() - ref.getWindow().getStart()];
-                }
-                VariantContext vc = VariantContextAdaptors.toVariantContext(INPUT_ROD_NAME, hapmap, ref);
-                if ( vc != null ) {
-                    if ( refBase != null ) {
-                        Map attrs = new HashMap(vc.getAttributes());
-                        attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refBase);
-                        vc = VariantContext.modifyAttributes(vc, attrs);
-                    }
-                    hapmapVCs.add(vc);
                 }
+
+                // ok, we might actually be able to turn this record in a variant context
+                VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getName(), record, ref);
+
+                if ( vc != null ) // sometimes the track has odd stuff in it that can't be converted
+                    VCs.add(vc);
             }
-            return hapmapVCs;
         }
 
-        // for everything else, we can just convert to VariantContext
-        return tracker.getVariantContexts(ref, INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, ref.getLocus(), true, false);
+        return VCs;
     }
 
-    private DbSNPFeature getDbsnpFeature(String rsID) {
+    private VariantContext getDbsnp(String rsID) {
         if ( dbsnpIterator == null ) {
-            ReferenceOrderedDataSource dbsnpDataSource = null;
-            for ( ReferenceOrderedDataSource ds : getToolkit().getRodDataSources() ) {
-                if ( ds.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
-                    dbsnpDataSource = ds;
-                    break;
-                }
-            }
 
-            if ( dbsnpDataSource == null )
+            if ( dbsnp == null )
                 throw new UserException.BadInput("No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records");
 
             RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe);
-            dbsnpIterator = builder.createInstanceOfTrack(DbSNPCodec.class, dbsnpDataSource.getFile()).getIterator();
+            dbsnpIterator = builder.createInstanceOfTrack(VCFCodec.class, new File(dbsnp.getSource())).getIterator();
             // Note that we should really use some sort of seekable iterator here so that the search doesn't take forever
             // (but it's complicated because the hapmap location doesn't match the dbsnp location, so we don't know where to seek to)
         }
 
         while ( dbsnpIterator.hasNext() ) {
             GATKFeature feature = dbsnpIterator.next();
-            DbSNPFeature dbsnp = (DbSNPFeature)feature.getUnderlyingObject();
-            if ( dbsnp.getRsID().equals(rsID) )
-                return dbsnp;
+            VariantContext vc = (VariantContext)feature.getUnderlyingObject();
+            if ( vc.hasID() && vc.getID().equals(rsID) )
+                return vc;
         }
 
         return null;
     }
 
-    private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, byte ref) {
+    private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) {
         if ( !wroteHeader ) {
             wroteHeader = true;
 
             // setup the header fields
             Set hInfo = new HashSet();
-            hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
+            hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())));
             //hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
             //hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
 
@@ -214,16 +206,16 @@ public class VariantsToVCF extends RodWalker {
                 samples.add(sampleName);
             } else {
                 // try VCF first
-                samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(INPUT_ROD_NAME));
+                samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));
 
                 if ( samples.isEmpty() ) {
-                    List rods = tracker.getReferenceMetaData(INPUT_ROD_NAME);
-                    if ( rods.size() == 0 )
-                        throw new IllegalStateException("No rod data is present");
+                    List features = tracker.getValues(variants, loc);
+                    if ( features.size() == 0 )
+                        throw new IllegalStateException("No rod data is present, but we just created a VariantContext");
 
-                    Object rod = rods.get(0);
-                    if ( rod instanceof HapMapFeature)
-                        samples.addAll(Arrays.asList(((HapMapFeature)rod).getSampleIDs()));
+                    Feature f = features.get(0);
+                    if ( f instanceof HapMapFeature )
+                        samples.addAll(Arrays.asList(((HapMapFeature)f).getSampleIDs()));
                     else
                         samples.addAll(vc.getSampleNames());
                 }
@@ -233,7 +225,7 @@ public class VariantsToVCF extends RodWalker {
         }
 
         vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
-        vcfwriter.add(vc, ref);
+        vcfwriter.add(vc);
     }
 
     public Integer reduceInit() {
diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java
index 5095bd6e5..9578eda84 100644
--- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java
+++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java
@@ -38,10 +38,9 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.filters.FilterManager;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
-import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
 import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
 import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
-import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
 import org.broadinstitute.sting.gatk.walkers.PartitionBy;
 import org.broadinstitute.sting.gatk.walkers.PartitionType;
 import org.broadinstitute.sting.gatk.walkers.Walker;
@@ -118,7 +117,6 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
     protected Collection getArgumentTypeDescriptors() {
         List typeDescriptors = new ArrayList();
         typeDescriptors.add(new VCFWriterArgumentTypeDescriptor(GATKEngine,System.out,Collections.emptyList()));
-        typeDescriptors.add(new SAMFileReaderArgumentTypeDescriptor(GATKEngine));
         typeDescriptors.add(new SAMFileWriterArgumentTypeDescriptor(GATKEngine,System.out));
         typeDescriptors.add(new OutputStreamArgumentTypeDescriptor(GATKEngine,System.out));
         return typeDescriptors;
@@ -159,7 +157,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
                                     List argumentFields = new ArrayList();
 
                                     argumentFields.addAll(ArgumentDefinitionField.getArgumentFields(parser,walkerType));
-                                    argumentFields.addAll(RodBindField.getRodArguments(walkerType, trackBuilder));
+                                    //argumentFields.addAll(RodBindField.getRodArguments(walkerType, trackBuilder));
                                     argumentFields.addAll(ReadFilterField.getFilterArguments(parser,walkerType));
 
                                     String constructor = String.format("analysisName = \"%1$s\"%nanalysis_type = \"%1$s\"%n", walkerName);
diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java
index ea180d33c..baf083575 100644
--- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java
+++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java
@@ -26,7 +26,7 @@ package org.broadinstitute.sting.queue.extensions.gatk;
 
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.gatk.WalkerManager;
-import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
 import org.broadinstitute.sting.gatk.walkers.RMD;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 
@@ -91,39 +91,39 @@ public class RodBindField extends ArgumentField {
             }
         return exclusiveOf.toString();
     }
-
-    public static List getRodArguments(Class walkerClass, RMDTrackBuilder trackBuilder) {
-        List argumentFields = new ArrayList();
-
-        List requires = WalkerManager.getRequiredMetaData(walkerClass);
-        List allows = WalkerManager.getAllowsMetaData(walkerClass);
-
-        for (RMD required: requires) {
-            List fields = new ArrayList();
-            String trackName = required.name();
-            if ("*".equals(trackName)) {
-                // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
-                //fields.add(new RodBindArgumentField(argumentDefinition, true));
-            } else {
-                for (String typeName: trackBuilder.getTrackRecordTypeNames(required.type()))
-                    fields.add(new RodBindField(trackName, typeName, fields, true));
-            }
-            argumentFields.addAll(fields);
-        }
-
-        for (RMD allowed: allows) {
-            List fields = new ArrayList();
-            String trackName = allowed.name();
-            if ("*".equals(trackName)) {
-                // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
-                //fields.add(new RodBindArgumentField(argumentDefinition, false));
-            } else {
-                for (String typeName: trackBuilder.getTrackRecordTypeNames(allowed.type()))
-                    fields.add(new RodBindField(trackName, typeName, fields, true));
-            }
-            argumentFields.addAll(fields);
-        }
-
-        return argumentFields;
-    }
+//
+//    public static List getRodArguments(Class walkerClass, RMDTrackBuilder trackBuilder) {
+//        List argumentFields = new ArrayList();
+//
+//        List requires = WalkerManager.getRequiredMetaData(walkerClass);
+//        List allows = WalkerManager.getAllowsMetaData(walkerClass);
+//
+//        for (RMD required: requires) {
+//            List fields = new ArrayList();
+//            String trackName = required.name();
+//            if ("*".equals(trackName)) {
+//                // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
+//                //fields.add(new RodBindArgumentField(argumentDefinition, true));
+//            } else {
+//                for (String typeName: trackBuilder.getFeatureManager().getTrackRecordTypeNames(required.type()))
+//                    fields.add(new RodBindField(trackName, typeName, fields, true));
+//            }
+//            argumentFields.addAll(fields);
+//        }
+//
+//        for (RMD allowed: allows) {
+//            List fields = new ArrayList();
+//            String trackName = allowed.name();
+//            if ("*".equals(trackName)) {
+//                // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
+//                //fields.add(new RodBindArgumentField(argumentDefinition, false));
+//            } else {
+//                for (String typeName: trackBuilder.getFeatureManager().getTrackRecordTypeNames(allowed.type()))
+//                    fields.add(new RodBindField(trackName, typeName, fields, true));
+//            }
+//            argumentFields.addAll(fields);
+//        }
+//
+//        return argumentFields;
+//    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/AminoAcid.java b/public/java/src/org/broadinstitute/sting/utils/AminoAcid.java
similarity index 97%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/AminoAcid.java
rename to public/java/src/org/broadinstitute/sting/utils/AminoAcid.java
index 0d0b906e0..0b47093fa 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/AminoAcid.java
+++ b/public/java/src/org/broadinstitute/sting/utils/AminoAcid.java
@@ -23,7 +23,7 @@
  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
-package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator;
+package org.broadinstitute.sting.utils;
 
 /**
  * Represents a single amino acid.
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/AminoAcidTable.java b/public/java/src/org/broadinstitute/sting/utils/AminoAcidTable.java
similarity index 99%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/AminoAcidTable.java
rename to public/java/src/org/broadinstitute/sting/utils/AminoAcidTable.java
index c10eb5dd7..1ae28ffb3 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/AminoAcidTable.java
+++ b/public/java/src/org/broadinstitute/sting/utils/AminoAcidTable.java
@@ -23,7 +23,7 @@
  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
-package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator;
+package org.broadinstitute.sting.utils;
 
 import java.util.HashMap;
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
index a5c6e0537..8d9768681 100644
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
@@ -34,6 +34,7 @@ import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
 import org.apache.log4j.Logger;
+import org.broad.tribble.Feature;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 
@@ -443,6 +444,15 @@ public class GenomeLocParser {
         }
     }
 
+    /**
+     * Creates a GenomeLoc from a Tribble feature
+     * @param feature
+     * @return
+     */
+    public GenomeLoc createGenomeLoc(final Feature feature) {
+        return createGenomeLoc(feature.getChr(), feature.getStart(), feature.getEnd());
+    }
+
     /**
      * create a new genome loc, given the contig name, and a single position. Must be on the reference
      *
@@ -457,19 +467,6 @@ public class GenomeLocParser {
         return createGenomeLoc(contig, getContigIndex(contig), pos, pos);
     }
 
-//    /**
-//     * Creates a new GenomeLoc without performing any validation on its contig or bounds.
-//     * FOR UNIT TESTING PURPOSES ONLY!
-//     *
-//     * @param contig the contig name
-//     * @param start  start position of the interval
-//     * @param stop   stop position of the interval
-//     * @return a new GenomeLoc representing the specified location
-//     */
-//    public GenomeLoc createGenomeLocWithoutValidation( String contig, int start, int stop ) {
-//        return new GenomeLoc(contig, getContigIndexWithoutException(contig), start, stop);
-//    }
-
     /**
      * create a new genome loc from an existing loc, with a new start position
      * Note that this function will NOT explicitly check the ending offset, in case someone wants to
diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
old mode 100755
new mode 100644
index a0b970dbc..e197bb973
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@@ -162,13 +162,13 @@ public class MathUtils {
         return Math.log10(sum) + maxValue;
     }
 
-    public static double sum(List values) {
+    public static double sumDoubles(List values) {
         double s = 0.0;
         for ( double v : values) s += v;
         return s;
     }
 
-    public static int sum(List values) {
+    public static int sumIntegers(List values) {
         int s = 0;
         for ( int v : values) s += v;
         return s;
@@ -460,7 +460,7 @@ public class MathUtils {
 
         // for precision purposes, we need to add (or really subtract, since they're
         // all negative) the largest value; also, we need to convert to normal-space.
-        double maxValue = MathUtils.arrayMax( array );
+        double maxValue = MathUtils.arrayMaxDouble( array );
         for (int i = 0; i < array.size(); i++)
             normalized[i] = Math.pow(10, array.get(i) - maxValue);
 
@@ -551,7 +551,7 @@ public class MathUtils {
         return minI;
     }    
 
-    public static int arrayMax(List array) {
+    public static int arrayMaxInt(List array) {
         if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
         if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!");
 
@@ -560,7 +560,7 @@ public class MathUtils {
         return m;
     }
 
-    public static double arrayMax(List array) {
+    public static double arrayMaxDouble(List array) {
         if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
         if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!");
 
@@ -1355,5 +1355,4 @@ public class MathUtils {
     public static double log10Factorial (int x) {
         return log10Gamma(x+1);
     }
-
-}
\ No newline at end of file
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java
index 6a50badce..015e5d6f6 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java
@@ -42,6 +42,21 @@ public class Utils {
     /** our log, which we want to capture anything from this class */
     private static Logger logger = Logger.getLogger(Utils.class);
 
+    public static final float JAVA_DEFAULT_HASH_LOAD_FACTOR = 0.75f;
+
+    /**
+     * Calculates the optimum initial size for a hash table given the maximum number
+     * of elements it will need to hold. The optimum size is the smallest size that
+     * is guaranteed not to result in any rehash/table-resize operations.
+     *
+     * @param maxElements  The maximum number of elements you expect the hash table
+     *                     will need to hold
+     * @return             The optimum initial size for the table, given maxElements
+     */
+    public static int optimumHashSize ( int maxElements ) {
+        return (int)(maxElements / JAVA_DEFAULT_HASH_LOAD_FACTOR) + 2;
+    }
+
     public static String getClassName(Class c) {
         String FQClassName = c.getName();
         int firstChar;
diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
index 8d37ff573..04cbef0c3 100644
--- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
@@ -172,7 +172,7 @@ public class PluginManager {
       }
     }
     
-    protected Map> getPluginsByName() {
+    public Map> getPluginsByName() {
         return Collections.unmodifiableMap(pluginsByName);
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/HapMapFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/HapMapFeature.java
index 7a47a4b8d..6a10d0203 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/HapMapFeature.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/HapMapFeature.java
@@ -37,6 +37,7 @@ import java.util.Map;
  */
 public class HapMapFeature implements Feature {
 
+    public static final String NULL_ALLELE_STRING = "-";
     public static final String INSERTION = "I";
     public static final String DELETION = "D";
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java
new file mode 100644
index 000000000..eada8521f
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.codecs.snpEff;
+
+import org.broad.tribble.Feature;
+import org.broad.tribble.FeatureCodec;
+import org.broad.tribble.TribbleException;
+import org.broad.tribble.readers.AsciiLineReader;
+import org.broad.tribble.readers.LineReader;
+import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
+
+import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
+import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
+import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+/**
+ * Codec for decoding the output format of the SnpEff variant effect predictor tool
+ * (http://snpeff.sourceforge.net/).
+ *
+ * This format has 23 tab-delimited fields:
+ *
+ * Chromosome
+ * Position
+ * Reference
+ * Change
+ * Change Type: {SNP, MNP, INS, DEL}
+ * Zygosity: {Hom, Het}
+ * Quality
+ * Coverage
+ * Warnings
+ * Gene ID
+ * Gene Name
+ * Bio Type
+ * Transcript ID
+ * Exon ID
+ * Exon Rank
+ * Effect
+ * Old/New Amino Acid
+ * Old/New Codon
+ * Codon Num
+ * CDS Size
+ * Codons Around
+ * Amino Acids Around
+ * Custom Interval ID
+ *
+ * We treat all except the Chromosome, Position, and Effect fields as optional.
+ *
+ * @author David Roazen
+ */
+public class SnpEffCodec implements FeatureCodec, SelfScopingFeatureCodec {
+
+    public static final int EXPECTED_NUMBER_OF_FIELDS = 23;
+    public static final String FIELD_DELIMITER_PATTERN = "\\t";
+    public static final String EFFECT_FIELD_DELIMITER_PATTERN = "[,:]";
+    public static final String HEADER_LINE_START = "# ";
+    public static final String[] HEADER_FIELD_NAMES = { "Chromo",
+                                                        "Position",
+                                                        "Reference",
+                                                        "Change",
+                                                        "Change type",
+                                                        "Homozygous",
+                                                        "Quality",
+                                                        "Coverage",
+                                                        "Warnings",
+                                                        "Gene_ID",
+                                                        "Gene_name",
+                                                        "Bio_type",
+                                                        "Trancript_ID",   // yes, this is how it's spelled in the SnpEff output
+                                                        "Exon_ID",
+                                                        "Exon_Rank",
+                                                        "Effect",
+                                                        "old_AA/new_AA",
+                                                        "Old_codon/New_codon",
+                                                        "Codon_Num(CDS)",
+                                                        "CDS_size",
+                                                        "Codons around",
+                                                        "AAs around",
+                                                        "Custom_interval_ID"
+                                                      };
+
+    // The "Chromo", "Position", and "Effect" fields are required to be non-empty in every SnpEff output line:
+    public static final int[] REQUIRED_FIELDS = { 0, 1, 15 };
+
+    public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE";
+
+
+    public Feature decodeLoc ( String line ) {
+        return decode(line);
+    }
+
+    public Feature decode ( String line ) {
+        String[] tokens = line.split(FIELD_DELIMITER_PATTERN, -1);
+
+        if ( tokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
+            throw new TribbleException.InvalidDecodeLine("Line does not have the expected (" + EXPECTED_NUMBER_OF_FIELDS +
+                                                         ") number of fields: found " + tokens.length + " fields.", line);
+        }
+
+        try {
+            trimAllFields(tokens);
+            checkForRequiredFields(tokens, line);
+
+            String contig = tokens[0];
+            long position = Long.parseLong(tokens[1]);
+
+            String reference = tokens[2].isEmpty() ? null : tokens[2];
+            String change = tokens[3].isEmpty() ? null : tokens[3];
+            ChangeType changeType = tokens[4].isEmpty() ? null : ChangeType.valueOf(tokens[4]);
+            Zygosity zygosity = tokens[5].isEmpty() ? null : Zygosity.valueOf(tokens[5]);
+            Double quality = tokens[6].isEmpty() ? null : Double.parseDouble(tokens[6]);
+            Long coverage = tokens[7].isEmpty() ? null : Long.parseLong(tokens[7]);
+            String warnings = tokens[8].isEmpty() ? null : tokens[8];
+            String geneID = tokens[9].isEmpty() ? null : tokens[9];
+            String geneName = tokens[10].isEmpty() ? null : tokens[10];
+            String bioType = tokens[11].isEmpty() ? null : tokens[11];
+            String transcriptID = tokens[12].isEmpty() ? null : tokens[12];
+            String exonID = tokens[13].isEmpty() ? null : tokens[13];
+            Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]);
+
+            boolean isNonCodingGene = isNonCodingGene(tokens[15]);
+
+            // Split the effect field into three subfields if the WITHIN_NON_CODING_GENE flag is present,
+            // otherwise split it into two subfields. We need this limit to prevent the extra effect-related information
+            // in the final field (when present) from being inappropriately tokenized:
+
+            int effectFieldTokenLimit = isNonCodingGene ? 3 : 2;
+            String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit);
+            EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene);
+            String effectExtraInformation = parseEffectExtraInformation(effectFieldTokens, isNonCodingGene);
+
+            String oldAndNewAA = tokens[16].isEmpty() ? null : tokens[16];
+            String oldAndNewCodon = tokens[17].isEmpty() ? null : tokens[17];
+            Integer codonNum = tokens[18].isEmpty() ? null : Integer.parseInt(tokens[18]);
+            Integer cdsSize = tokens[19].isEmpty() ? null : Integer.parseInt(tokens[19]);
+            String codonsAround = tokens[20].isEmpty() ? null : tokens[20];
+            String aasAround = tokens[21].isEmpty() ? null : tokens[21];
+            String customIntervalID = tokens[22].isEmpty() ? null : tokens[22];
+
+            return new SnpEffFeature(contig, position, reference, change, changeType, zygosity, quality, coverage,
+                                     warnings, geneID, geneName, bioType, transcriptID, exonID, exonRank, isNonCodingGene,
+                                     effect, effectExtraInformation, oldAndNewAA, oldAndNewCodon, codonNum, cdsSize,
+                                     codonsAround, aasAround, customIntervalID);
+        }
+        catch ( NumberFormatException e ) {
+            throw new TribbleException.InvalidDecodeLine("Error parsing a numeric field : " + e.getMessage(), line);
+        }
+        catch ( IllegalArgumentException e ) {
+            throw new TribbleException.InvalidDecodeLine("Illegal value in field: " + e.getMessage(), line);
+        }
+    }
+
+    private void trimAllFields ( String[] tokens ) {
+        for ( int i = 0; i < tokens.length; i++ ) {
+            tokens[i] = tokens[i].trim();
+        }
+    }
+
+    private void checkForRequiredFields ( String[] tokens, String line ) {
+        for ( int requiredFieldIndex : REQUIRED_FIELDS ) {
+            if ( tokens[requiredFieldIndex].isEmpty() ) {
+                throw new TribbleException.InvalidDecodeLine("Line is missing required field \"" +
+                                                             HEADER_FIELD_NAMES[requiredFieldIndex] + "\"",
+                                                             line);
+            }
+        }
+    }
+
+    private boolean isNonCodingGene ( String effectField ) {
+        return effectField.startsWith(NON_CODING_GENE_FLAG);
+    }
+
+    private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) {
+        String effectName = "";
+
+        // If there's a WITHIN_NON_CODING_GENE flag, the effect name will be in the second subfield,
+        // otherwise it will be in the first subfield:
+
+        if ( effectFieldTokens.length > 1 && isNonCodingGene ) {
+            effectName = effectFieldTokens[1].trim();
+        }
+        else {
+            effectName = effectFieldTokens[0].trim();
+        }
+
+        return EffectType.valueOf(effectName);
+    }
+
+    private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) {
+
+        // The extra effect-related information, if present, will always be the last subfield:
+
+        if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) {
+            return effectFieldTokens[effectFieldTokens.length - 1].trim();
+        }
+
+        return null;
+    }
+
+    public Class getFeatureType() {
+        return SnpEffFeature.class;
+    }
+
+    public Object readHeader ( LineReader reader ) {
+        String headerLine = "";
+
+        try {
+            headerLine = reader.readLine();
+        }
+        catch ( IOException e ) {
+            throw new TribbleException("Unable to read header line from input file.");
+        }
+
+        validateHeaderLine(headerLine);
+        return headerLine;
+    }
+
+    private void validateHeaderLine ( String headerLine ) {
+        if ( headerLine == null || ! headerLine.startsWith(HEADER_LINE_START) ) {
+            throw new TribbleException.InvalidHeader("Header line does not start with " + HEADER_LINE_START);
+        }
+
+        String[] headerTokens = headerLine.substring(HEADER_LINE_START.length()).split(FIELD_DELIMITER_PATTERN);
+
+        if ( headerTokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
+            throw new TribbleException.InvalidHeader("Header line does not contain headings for the expected number (" +
+                                                     EXPECTED_NUMBER_OF_FIELDS + ") of columns.");
+        }
+
+        for ( int columnIndex = 0; columnIndex < headerTokens.length; columnIndex++ ) {
+            if ( ! HEADER_FIELD_NAMES[columnIndex].equals(headerTokens[columnIndex]) ) {
+                throw new TribbleException.InvalidHeader("Header field #" + columnIndex + ": Expected \"" +
+                                                         HEADER_FIELD_NAMES[columnIndex] + "\" but found \"" +
+                                                         headerTokens[columnIndex] + "\"");
+            }
+        }
+    }
+
+    public boolean canDecode ( final File potentialInput ) {
+        try {
+            LineReader reader = new AsciiLineReader(new FileInputStream(potentialInput));
+            readHeader(reader);
+        }
+        catch ( Exception e ) {
+            return false;
+        }
+
+        return true;
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java
new file mode 100644
index 000000000..270db470f
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.codecs.snpEff;
+
+/**
+ * A set of constants associated with the SnpEff codec.
+ *
+ * @author David Roazen
+ */
+public class SnpEffConstants {
+
+    // Possible SnpEff biological effects and their associated impacts:
+    public enum EffectType {
+        START_GAINED                      (EffectImpact.HIGH),
+        START_LOST                        (EffectImpact.HIGH),
+        EXON_DELETED                      (EffectImpact.HIGH),
+        FRAME_SHIFT                       (EffectImpact.HIGH),
+        STOP_GAINED                       (EffectImpact.HIGH),
+        STOP_LOST                         (EffectImpact.HIGH),
+        SPLICE_SITE_ACCEPTOR              (EffectImpact.HIGH),
+        SPLICE_SITE_DONOR                 (EffectImpact.HIGH),
+
+        NON_SYNONYMOUS_CODING             (EffectImpact.MODERATE),
+        UTR_5_DELETED                     (EffectImpact.MODERATE),
+        UTR_3_DELETED                     (EffectImpact.MODERATE),
+        CODON_INSERTION                   (EffectImpact.MODERATE),
+        CODON_CHANGE_PLUS_CODON_INSERTION (EffectImpact.MODERATE),
+        CODON_DELETION                    (EffectImpact.MODERATE),
+        CODON_CHANGE_PLUS_CODON_DELETION  (EffectImpact.MODERATE),
+
+        NONE                              (EffectImpact.LOW),
+        CHROMOSOME                        (EffectImpact.LOW),
+        INTERGENIC                        (EffectImpact.LOW),
+        UPSTREAM                          (EffectImpact.LOW),
+        UTR_5_PRIME                       (EffectImpact.LOW),
+        SYNONYMOUS_START                  (EffectImpact.LOW),
+        NON_SYNONYMOUS_START              (EffectImpact.LOW),
+        CDS                               (EffectImpact.LOW),
+        GENE                              (EffectImpact.LOW),
+        TRANSCRIPT                        (EffectImpact.LOW),
+        EXON                              (EffectImpact.LOW),
+        SYNONYMOUS_CODING                 (EffectImpact.LOW),
+        CODON_CHANGE                      (EffectImpact.LOW),
+        SYNONYMOUS_STOP                   (EffectImpact.LOW),
+        NON_SYNONYMOUS_STOP               (EffectImpact.LOW),
+        INTRON                            (EffectImpact.LOW),
+        UTR_3_PRIME                       (EffectImpact.LOW),
+        DOWNSTREAM                        (EffectImpact.LOW),
+        INTRON_CONSERVED                  (EffectImpact.LOW),
+        INTERGENIC_CONSERVED              (EffectImpact.LOW),
+        CUSTOM                            (EffectImpact.LOW);
+
+        private final EffectImpact impact;
+
+        EffectType ( EffectImpact impact ) {
+            this.impact = impact;
+        }
+
+        public EffectImpact getImpact() {
+            return impact;
+        }
+    }
+
+    public enum EffectImpact {
+        LOW       (1),
+        MODERATE  (2),
+        HIGH      (3);
+
+        private final int severityRating;
+
+        EffectImpact ( int severityRating ) {
+            this.severityRating = severityRating;
+        }
+
+        public boolean isHigherImpactThan ( EffectImpact other ) {
+            return this.severityRating > other.severityRating;
+        }
+    }
+
+    // The kinds of variants supported by the SnpEff output format:
+    public enum ChangeType {
+        SNP,
+        MNP,
+        INS,
+        DEL
+    }
+
+    // Possible zygosities of SnpEff variants:
+    public enum Zygosity {
+        Hom,
+        Het
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java
new file mode 100644
index 000000000..2f120b7d2
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java
@@ -0,0 +1,423 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.codecs.snpEff;
+
+import org.broad.tribble.Feature;
+
+import java.util.NoSuchElementException;
+
+import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
+import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact;
+import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
+import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
+
+/**
+ * Feature returned by the SnpEff codec -- stores the parsed field values from a line of SnpEff output.
+ *
+ * Many fields are optional, and missing values are represented by nulls. You should always call the
+ * hasX() method before calling the corresponding getX() method. Required fields can never be null
+ * and do not have a hasX() method.
+ *
+ * @author David Roazen
+ */
+public class SnpEffFeature implements Feature {
+
+    private String contig;                   // REQUIRED FIELD
+    private long position;                   // REQUIRED FIELD
+    private String reference;
+    private String change;
+    private ChangeType changeType;
+    private Zygosity zygosity;
+    private Double quality;
+    private Long coverage;
+    private String warnings;
+    private String geneID;
+    private String geneName;
+    private String bioType;
+    private String transcriptID;
+    private String exonID;
+    private Integer exonRank;
+    private boolean isNonCodingGene;         // REQUIRED FIELD
+    private EffectType effect;               // REQUIRED FIELD
+    private String effectExtraInformation;
+    private String oldAndNewAA;
+    private String oldAndNewCodon;
+    private Integer codonNum;
+    private Integer cdsSize;
+    private String codonsAround;
+    private String aasAround;
+    private String customIntervalID;
+
+    public SnpEffFeature ( String contig,
+                           long position,
+                           String reference,
+                           String change,
+                           ChangeType changeType,
+                           Zygosity zygosity,
+                           Double quality,
+                           Long coverage,
+                           String warnings,
+                           String geneID,
+                           String geneName,
+                           String bioType,
+                           String transcriptID,
+                           String exonID,
+                           Integer exonRank,
+                           boolean isNonCodingGene,
+                           EffectType effect,
+                           String effectExtraInformation,
+                           String oldAndNewAA,
+                           String oldAndNewCodon,
+                           Integer codonNum,
+                           Integer cdsSize,
+                           String codonsAround,
+                           String aasAround,
+                           String customIntervalID ) {
+
+        if ( contig == null || effect == null ) {
+            throw new IllegalArgumentException("contig and effect cannot be null, as they are required fields");
+        }
+
+        this.contig = contig;
+        this.position = position;
+        this.reference = reference;
+        this.change = change;
+        this.changeType = changeType;
+        this.zygosity = zygosity;
+        this.quality = quality;
+        this.coverage = coverage;
+        this.warnings = warnings;
+        this.geneID = geneID;
+        this.geneName = geneName;
+        this.bioType = bioType;
+        this.transcriptID = transcriptID;
+        this.exonID = exonID;
+        this.exonRank = exonRank;
+        this.isNonCodingGene = isNonCodingGene;
+        this.effect = effect;
+        this.effectExtraInformation = effectExtraInformation;
+        this.oldAndNewAA = oldAndNewAA;
+        this.oldAndNewCodon = oldAndNewCodon;
+        this.codonNum = codonNum;
+        this.cdsSize = cdsSize;
+        this.codonsAround = codonsAround;
+        this.aasAround = aasAround;
+        this.customIntervalID = customIntervalID;
+    }
+
+    public boolean isHigherImpactThan ( SnpEffFeature other ) {
+
+        // If one effect is in a non-coding gene and the other is not, the effect NOT in the
+        // non-coding gene has higher impact:
+
+        if ( ! isNonCodingGene() && other.isNonCodingGene() ) {
+            return true;
+        }
+        else if ( isNonCodingGene() && ! other.isNonCodingGene() ) {
+            return false;
+        }
+
+        // Otherwise, both effects are either in or not in a non-coding gene, so we compare the impacts
+        // of the effects themselves as defined in the SnpEffConstants class:
+
+        return getEffectImpact().isHigherImpactThan(other.getEffectImpact());
+    }
+
+    public String getChr() {
+        return contig;
+    }
+
+    public int getStart() {
+        return (int)position;
+    }
+
+    public int getEnd() {
+        return (int)position;
+    }
+
+    public boolean hasReference() {
+        return reference != null;
+    }
+
+    public String getReference() {
+        if ( reference == null ) throw new NoSuchElementException("This feature has no reference field");
+        return reference;
+    }
+
+    public boolean hasChange() {
+        return change != null;
+    }
+
+    public String getChange() {
+        if ( change == null ) throw new NoSuchElementException("This feature has no change field");
+        return change;
+    }
+
+    public boolean hasChangeType() {
+        return changeType != null;
+    }
+
+    public ChangeType getChangeType() {
+        if ( changeType == null ) throw new NoSuchElementException("This feature has no changeType field");
+        return changeType;
+    }
+
+    public boolean hasZygosity() {
+        return zygosity != null;
+    }
+
+    public Zygosity getZygosity() {
+        if ( zygosity == null ) throw new NoSuchElementException("This feature has no zygosity field");
+        return zygosity;
+    }
+
+    public boolean hasQuality() {
+        return quality != null;
+    }
+
+    public Double getQuality() {
+        if ( quality == null ) throw new NoSuchElementException("This feature has no quality field");
+        return quality;
+    }
+
+    public boolean hasCoverage() {
+        return coverage != null;
+    }
+
+    public Long getCoverage() {
+        if ( coverage == null ) throw new NoSuchElementException("This feature has no coverage field");
+        return coverage;
+    }
+
+    public boolean hasWarnings() {
+        return warnings != null;
+    }
+
+    public String getWarnings() {
+        if ( warnings == null ) throw new NoSuchElementException("This feature has no warnings field");
+        return warnings;
+    }
+
+    public boolean hasGeneID() {
+        return geneID != null;
+    }
+
+    public String getGeneID() {
+        if ( geneID == null ) throw new NoSuchElementException("This feature has no geneID field");
+        return geneID;
+    }
+
+    public boolean hasGeneName() {
+        return geneName != null;
+    }
+
+    public String getGeneName() {
+        if ( geneName == null ) throw new NoSuchElementException("This feature has no geneName field");
+        return geneName;
+    }
+
+    public boolean hasBioType() {
+        return bioType != null;
+    }
+
+    public String getBioType() {
+        if ( bioType == null ) throw new NoSuchElementException("This feature has no bioType field");
+        return bioType;
+    }
+
+    public boolean hasTranscriptID() {
+        return transcriptID != null;
+    }
+
+    public String getTranscriptID() {
+        if ( transcriptID == null ) throw new NoSuchElementException("This feature has no transcriptID field");
+        return transcriptID;
+    }
+
+    public boolean hasExonID() {
+        return exonID != null;
+    }
+
+    public String getExonID() {
+        if ( exonID == null ) throw new NoSuchElementException("This feature has no exonID field");
+        return exonID;
+    }
+
+    public boolean hasExonRank() {
+        return exonRank != null;
+    }
+
+    public Integer getExonRank() {
+        if ( exonRank == null ) throw new NoSuchElementException("This feature has no exonRank field");
+        return exonRank;
+    }
+
+    public boolean isNonCodingGene() {
+        return isNonCodingGene;
+    }
+
+    public EffectType getEffect() {
+        return effect;
+    }
+
+    public EffectImpact getEffectImpact() {
+        return effect.getImpact();
+    }
+
+    public boolean hasEffectExtraInformation() {
+        return effectExtraInformation != null;
+    }
+
+    public String getEffectExtraInformation() {
+        if ( effectExtraInformation == null ) throw new NoSuchElementException("This feature has no effectExtraInformation field");
+        return effectExtraInformation;
+    }
+
+    public boolean hasOldAndNewAA() {
+        return oldAndNewAA != null;
+    }
+
+    public String getOldAndNewAA() {
+        if ( oldAndNewAA == null ) throw new NoSuchElementException("This feature has no oldAndNewAA field");
+        return oldAndNewAA;
+    }
+
+    public boolean hasOldAndNewCodon() {
+        return oldAndNewCodon != null;
+    }
+
+    public String getOldAndNewCodon() {
+        if ( oldAndNewCodon == null ) throw new NoSuchElementException("This feature has no oldAndNewCodon field");
+        return oldAndNewCodon;
+    }
+
+    public boolean hasCodonNum() {
+        return codonNum != null;
+    }
+
+    public Integer getCodonNum() {
+        if ( codonNum == null ) throw new NoSuchElementException("This feature has no codonNum field");
+        return codonNum;
+    }
+
+    public boolean hasCdsSize() {
+        return cdsSize != null;
+    }
+
+    public Integer getCdsSize() {
+        if ( cdsSize == null ) throw new NoSuchElementException("This feature has no cdsSize field");
+        return cdsSize;
+    }
+
+    public boolean hasCodonsAround() {
+        return codonsAround != null;
+    }
+
+    public String getCodonsAround() {
+        if ( codonsAround == null ) throw new NoSuchElementException("This feature has no codonsAround field");
+        return codonsAround;
+    }
+
+    public boolean hadAasAround() {
+        return aasAround != null;
+    }
+
+    public String getAasAround() {
+        if ( aasAround == null ) throw new NoSuchElementException("This feature has no aasAround field");
+        return aasAround;
+    }
+
+    public boolean hasCustomIntervalID() {
+        return customIntervalID != null;
+    }
+
+    public String getCustomIntervalID() {
+        if ( customIntervalID == null ) throw new NoSuchElementException("This feature has no customIntervalID field");
+        return customIntervalID;
+    }
+
+    public boolean equals ( Object o ) {
+        if ( o == null || ! (o instanceof SnpEffFeature) ) {
+            return false;
+        }
+
+        SnpEffFeature other = (SnpEffFeature)o;
+
+        return contig.equals(other.contig) &&
+               position == other.position &&
+               (reference == null ? other.reference == null : reference.equals(other.reference)) &&
+               (change == null ? other.change == null : change.equals(other.change)) &&
+               changeType == other.changeType &&
+               zygosity == other.zygosity &&
+               (quality == null ? other.quality == null : quality.equals(other.quality)) &&
+               (coverage == null ? other.coverage == null : coverage.equals(other.coverage)) &&
+               (warnings == null ? other.warnings == null : warnings.equals(other.warnings)) &&
+               (geneID == null ? other.geneID == null : geneID.equals(other.geneID)) &&
+               (geneName == null ? other.geneName == null : geneName.equals(other.geneName)) &&
+               (bioType == null ? other.bioType == null : bioType.equals(other.bioType)) &&
+               (transcriptID == null ? other.transcriptID == null : transcriptID.equals(other.transcriptID)) &&
+               (exonID == null ? other.exonID == null : exonID.equals(other.exonID)) &&
+               (exonRank == null ? other.exonRank == null : exonRank.equals(other.exonRank)) &&
+               isNonCodingGene == other.isNonCodingGene &&
+               effect == other.effect &&
+               (effectExtraInformation == null ? other.effectExtraInformation == null : effectExtraInformation.equals(other.effectExtraInformation)) &&
+               (oldAndNewAA == null ? other.oldAndNewAA == null : oldAndNewAA.equals(other.oldAndNewAA)) &&
+               (oldAndNewCodon == null ? other.oldAndNewCodon == null : oldAndNewCodon.equals(other.oldAndNewCodon)) &&
+               (codonNum == null ? other.codonNum == null : codonNum.equals(other.codonNum)) &&
+               (cdsSize == null ? other.cdsSize == null : cdsSize.equals(other.cdsSize)) &&
+               (codonsAround == null ? other.codonsAround == null : codonsAround.equals(other.codonsAround)) &&
+               (aasAround == null ? other.aasAround == null : aasAround.equals(other.aasAround)) &&
+               (customIntervalID == null ? other.customIntervalID == null : customIntervalID.equals(other.customIntervalID));
+    }
+
+    public String toString() {
+        return "[Contig: " + contig +
+               " Position: " + position +
+               " Reference: " + reference +
+               " Change: " + change +
+               " Change Type: " + changeType +
+               " Zygosity: " + zygosity +
+               " Quality: " + quality +
+               " Coverage: " + coverage +
+               " Warnings: " + warnings +
+               " Gene ID: " + geneID +
+               " Gene Name: " + geneName +
+               " Bio Type: " + bioType +
+               " Transcript ID: " + transcriptID +
+               " Exon ID: " + exonID +
+               " Exon Rank: " + exonRank +
+               " Non-Coding Gene: " + isNonCodingGene +
+               " Effect: " + effect +
+               " Effect Extra Information: " + effectExtraInformation +
+               " Old/New AA: " + oldAndNewAA +
+               " Old/New Codon: " + oldAndNewCodon +
+               " Codon Num: " + codonNum +
+               " CDS Size: " + cdsSize +
+               " Codons Around: " + codonsAround +
+               " AAs Around: " + aasAround +
+               " Custom Interval ID: " + customIntervalID +
+               "]";
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
index 710127f7a..cb505c717 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
@@ -7,16 +7,20 @@ import org.broad.tribble.NameAwareCodec;
 import org.broad.tribble.TribbleException;
 import org.broad.tribble.readers.LineReader;
 import org.broad.tribble.util.ParsingUtils;
+import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
 import java.util.*;
 
 
-public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser {
+public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser, SelfScopingFeatureCodec {
 
     protected final static Logger log = Logger.getLogger(VCFCodec.class);
     protected final static int NUM_STANDARD_FIELDS = 8;  // INFO is the 8th column
@@ -567,7 +571,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
 
             // set the reference base for indels in the attributes
             Map attributes = new TreeMap(inputVC.getAttributes());
-            attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(inputVC.getReference().getBases()[0]));
 
             Map originalToTrimmedAlleleMap = new HashMap();
 
@@ -611,10 +614,21 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
                 genotypes.put(sample.getKey(), Genotype.modifyAlleles(sample.getValue(), trimmedAlleles));
 
             }
-            return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes);
+            return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes, new Byte(inputVC.getReference().getBases()[0]));
 
         }
 
         return inputVC;
     }
+
+    public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) {
+        try {
+            char[] buff = new char[MAGIC_HEADER_LINE.length()];
+            new FileReader(potentialInput).read(buff, 0, MAGIC_HEADER_LINE.length());
+            String firstLine = new String(buff);
+            return firstLine.startsWith(MAGIC_HEADER_LINE);
+        } catch ( IOException e ) {
+            return false;
+        }
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java
index 311aaecf7..c299511db 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java
@@ -105,9 +105,8 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
      * add a record to the file
      *
      * @param vc      the Variant Context object
-     * @param refBase the ref base
      */
-    public void add(VariantContext vc, byte refBase) {
+    public void add(VariantContext vc) {
         /* Note that the code below does not prevent the successive add()-ing of: (chr1, 10), (chr20, 200), (chr15, 100)
            since there is no implicit ordering of chromosomes:
          */
@@ -122,7 +121,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
 
         noteCurrentRecord(vc); // possibly overwritten
 
-        queue.add(new VCFRecord(vc, refBase));
+        queue.add(new VCFRecord(vc));
         emitSafeRecords();
     }
 
@@ -133,7 +132,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
             // No need to wait, waiting for nothing, or before what we're waiting for:
             if (emitUnsafe || mostUpstreamWritableLoc == null || firstRec.vc.getStart() <= mostUpstreamWritableLoc) {
                 queue.poll();
-                innerWriter.add(firstRec.vc, firstRec.refBase);
+                innerWriter.add(firstRec.vc);
             }
             else {
                 break;
@@ -143,7 +142,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
 
     /**
      * Gets a string representation of this object.
-     * @return
+     * @return a string representation of this object
      */
     @Override
     public String toString() {
@@ -158,11 +157,9 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
 
     private static class VCFRecord {
         public VariantContext vc;
-        public byte refBase;
 
-        public VCFRecord(VariantContext vc, byte refBase) {
+        public VCFRecord(VariantContext vc) {
             this.vc = vc;
-            this.refBase = refBase;
         }
     }
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java
index b7f4be39a..d3705813c 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java
@@ -202,20 +202,18 @@ public class StandardVCFWriter implements VCFWriter {
      * add a record to the file
      *
      * @param vc      the Variant Context object
-     * @param refBase the ref base used for indels
      */
-    public void add(VariantContext vc, byte refBase) {
-        add(vc, refBase, false);
+    public void add(VariantContext vc) {
+        add(vc, false);
     }
 
     /**
      * add a record to the file
      *
      * @param vc      the Variant Context object
-     * @param refBase the ref base used for indels
      * @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER THE EVENT INSTEAD)
      */
-    public void add(VariantContext vc, byte refBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
+    public void add(VariantContext vc, boolean refBaseShouldBeAppliedToEndOfAlleles) {
         if ( mHeader == null )
             throw new IllegalStateException("The VCF Header must be written before records can be added: " + locationString());
 
@@ -223,7 +221,7 @@ public class StandardVCFWriter implements VCFWriter {
             vc = VariantContext.modifyGenotypes(vc, null);
 
         try {
-            vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBase, refBaseShouldBeAppliedToEndOfAlleles);
+            vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBaseShouldBeAppliedToEndOfAlleles);
 
             // if we are doing on the fly indexing, add the record ***before*** we write any bytes 
             if ( indexer != null ) indexer.addFeature(vc, positionalStream.getPosition());
@@ -285,7 +283,7 @@ public class StandardVCFWriter implements VCFWriter {
             Map infoFields = new TreeMap();
             for ( Map.Entry field : vc.getAttributes().entrySet() ) {
                 String key = field.getKey();
-                if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
+                if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
                     continue;
 
                 String outputValue = formatVCFField(field.getValue());
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java
index c29f2ba8b..ea16595bb 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java
@@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 
+import java.io.File;
+import java.io.FileReader;
 import java.io.IOException;
 import java.util.*;
 
@@ -16,6 +18,8 @@ import java.util.*;
  * quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
  */
 public class VCF3Codec extends AbstractVCFCodec {
+    public final static String VCF3_MAGIC_HEADER = "##fileformat=VCFv3";
+
 
     /**
      * @param reader the line reader to take header lines from
@@ -178,4 +182,8 @@ public class VCF3Codec extends AbstractVCFCodec {
         return genotypes;
     }
 
+    @Override
+    public boolean canDecode(final File potentialInput) {
+        return canDecodeFile(potentialInput, VCF3_MAGIC_HEADER);
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java
index 05fff5d9e..55a0eb3f9 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java
@@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 
+import java.io.File;
+import java.io.FileReader;
 import java.io.IOException;
 import java.util.*;
 
@@ -16,6 +18,7 @@ import java.util.*;
  * quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
  */
 public class VCFCodec extends AbstractVCFCodec {
+    public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4";
 
     /**
      * @param reader the line reader to take header lines from
@@ -184,5 +187,8 @@ public class VCFCodec extends AbstractVCFCodec {
         return genotypes;
     }
 
-
+    @Override
+    public boolean canDecode(final File potentialInput) {
+        return canDecodeFile(potentialInput, VCF4_MAGIC_HEADER);
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFWriter.java
index 0d23fe455..55749d26e 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFWriter.java
@@ -14,5 +14,5 @@ public interface VCFWriter {
      */
     public void close();
 
-    public void add(VariantContext vc, byte refBase);
+    public void add(VariantContext vc);
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
index 3c3299ff5..b3524c0d8 100755
--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@@ -87,6 +87,13 @@ public class UserException extends ReviewedStingException {
         }
     }
 
+    public static class UnknownTribbleType extends CommandLineException {
+        public UnknownTribbleType(String type, String message) {
+            super(String.format("Unknown tribble type %s: %s", type, message));
+        }
+    }
+
+
     public static class BadTmpDir extends UserException {
         public BadTmpDir(String message) {
             super(String.format("Failure working with the tmp directory %s. Override with -Djava.io.tmpdir=X on the command line to a bigger/better file system.  Exact error was %s", System.getProperties().get("java.io.tmpdir"), message));
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java
index 366df0c3a..ce03c8093 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java
@@ -31,29 +31,70 @@ import java.io.*;
 import java.util.Set;
 
 /**
- *
+ * Extend this class to provide a documentation handler for GATKdocs
  */
 public abstract class DocumentedGATKFeatureHandler {
     private GATKDoclet doclet;
 
+    /**
+     * @return the javadoc RootDoc of this javadoc run
+     */
     protected RootDoc getRootDoc() {
         return this.doclet.rootDoc;
     }
 
+    /** Set the master doclet driving this handler */
     public void setDoclet(GATKDoclet doclet) {
         this.doclet = doclet;
     }
 
+    /**
+     * @return the GATKDoclet driving this documentation run
+     */
     public GATKDoclet getDoclet() {
         return doclet;
     }
 
-    public boolean shouldBeProcessed(ClassDoc doc) { return true; }
+    /**
+     * Should return false iff this handler wants GATKDoclet to skip documenting
+     * this ClassDoc.
+     * @param doc that is being considered for inclusion in the docs
+     * @return true if the doclet should document ClassDoc doc
+     */
+    public boolean includeInDocs(ClassDoc doc) { return true; }
 
-    public String getDestinationFilename(ClassDoc doc) {
-        return HelpUtils.getClassName(doc).replace(".", "_") + ".html";
+    /**
+     * Return the flat filename (no paths) that the handler would like the Doclet to
+     * write out the documentation for ClassDoc doc and its associated Class clazz
+     * @param doc
+     * @param clazz
+     * @return
+     */
+    public String getDestinationFilename(ClassDoc doc, Class clazz) {
+        return GATKDocUtils.htmlFilenameForClass(clazz);
     }
 
+    /**
+     * Return the name of the FreeMarker template we will use to process ClassDoc doc.
+     *
+     * Note this is a flat filename relative to settings/helpTemplates in the GATK source tree
+     * @param doc
+     * @return
+     * @throws IOException
+     */
     public abstract String getTemplateName(ClassDoc doc) throws IOException;
+
+    /**
+     * Actually generate the documentation map associated with toProcess
+     *
+     * Can use all to provide references and rootDoc for additional information, if necessary.
+     * Implementing methods should end with a call to setHandlerContext on toProcess, as in:
+     *
+     * toProcess.setHandlerContent(summary, rootMap);
+     *
+     * @param rootDoc
+     * @param toProcess
+     * @param all
+     */
     public abstract void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcess, Set all);
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
new file mode 100644
index 000000000..8efeecd7b
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.help;
+
+/**
+ * @author depristo
+ * @since 8/8/11
+ */
+public class GATKDocUtils {
+    private final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gsa/gatkdocs/release/";
+    private final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/";
+    private final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/";
+
+    public static String htmlFilenameForClass(Class c) {
+        return c.getName().replace(".", "_") + ".html";
+    }
+
+    public static String helpLinksToGATKDocs(Class c) {
+        String classPath = htmlFilenameForClass(c);
+        StringBuilder b = new StringBuilder();
+        b.append("release  version: ").append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath).append("\n");
+        b.append("stable   version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n");
+        b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n");
+        return b.toString();
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java
index 65c6624d5..1f6db2757 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java
@@ -30,19 +30,29 @@ import java.util.HashMap;
 import java.util.Map;
 
 /**
-* Created by IntelliJ IDEA.
-* User: depristo
-* Date: 7/24/11
-* Time: 7:59 PM
-* To change this template use File | Settings | File Templates.
-*/
-public class GATKDocWorkUnit implements Comparable {
-    // known at the start
-    final String name, filename, group;
-    final DocumentedGATKFeatureHandler handler;
-    final ClassDoc classDoc;
+ * Simple collection of all relevant information about something the GATKDoclet can document
+ *
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 7/24/11
+ * Time: 7:59 PM
+ */
+class GATKDocWorkUnit implements Comparable {
+    /** The class that's being documented */
     final Class clazz;
+    /** The name of the thing we are documenting */
+    final String name;
+    /** the filename where we will be writing the docs for this class */
+    final String filename;
+    /** The name of the documentation group (e.g., walkers, read filters) class belongs to */
+    final String group;
+    /** The documentation handler for this class */
+    final DocumentedGATKFeatureHandler handler;
+    /** The javadoc documentation for clazz */
+    final ClassDoc classDoc;
+    /** The annotation that lead to this Class being in GATKDoc */
     final DocumentedGATKFeature annotation;
+    /** When was this walker built, and what's the absolute version number */
     final String buildTimestamp, absoluteVersion;
 
     // set by the handler
@@ -64,12 +74,21 @@ public class GATKDocWorkUnit implements Comparable {
         this.absoluteVersion = absoluteVersion;
     }
 
+    /**
+     * Called by the GATKDoclet to set handler provided context for this work unit
+     * @param summary
+     * @param forTemplate
+     */
     public void setHandlerContent(String summary, Map forTemplate) {
         this.summary = summary;
         this.forTemplate = forTemplate;
     }
 
-    public Map toMap() {
+    /**
+     * Return a String -> String map suitable for FreeMarker to create an index to this WorkUnit
+     * @return
+     */
+    public Map indexDataMap() {
         Map data = new HashMap();
         data.put("name", name);
         data.put("summary", summary);
@@ -78,6 +97,11 @@ public class GATKDocWorkUnit implements Comparable {
         return data;
     }
 
+    /**
+     * Sort in order of the name of this WorkUnit
+     * @param other
+     * @return
+     */
     public int compareTo(GATKDocWorkUnit other) {
         return this.name.compareTo(other.name);
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java
index 0b4c69e3c..f278e593d 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java
@@ -57,7 +57,7 @@ public class GATKDoclet {
      * @throws java.io.IOException if output can't be written.
      */
     public static boolean start(RootDoc rootDoc) throws IOException {
-        logger.setLevel(Level.INFO);
+        logger.setLevel(Level.DEBUG);
         // load arguments
         for(String[] options: rootDoc.options()) {
             if(options[0].equals("-build-timestamp"))
@@ -95,11 +95,15 @@ public class GATKDoclet {
         for ( ClassDoc doc : rootDoc.classes() ) {
             logger.debug("Considering " + doc);
             Class clazz = getClassForClassDoc(doc);
+
+            if ( clazz != null && clazz.getName().equals("org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance"))
+                logger.debug("foo");
+
             DocumentedGATKFeature feature = getFeatureForClassDoc(doc);
             DocumentedGATKFeatureHandler handler = createHandler(doc, feature);
-            if ( handler != null && handler.shouldBeProcessed(doc) ) {
+            if ( handler != null && handler.includeInDocs(doc) ) {
                 logger.info("Going to generate documentation for class " + doc);
-                String filename = handler.getDestinationFilename(doc);
+                String filename = handler.getDestinationFilename(doc, clazz);
                 GATKDocWorkUnit unit = new GATKDocWorkUnit(doc.name(),
                         filename, feature.groupName(),
                         feature, handler, doc, clazz,
@@ -216,7 +220,7 @@ public class GATKDoclet {
         Set docFeatures = new HashSet();
         List> data = new ArrayList>();
         for ( GATKDocWorkUnit workUnit : indexData ) {
-            data.add(workUnit.toMap());
+            data.add(workUnit.indexDataMap());
             docFeatures.add(workUnit.annotation);
         }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java
index fd1048844..3ca24dc35 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java
@@ -51,7 +51,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
     RootDoc rootDoc;
 
     @Override
-    public boolean shouldBeProcessed(ClassDoc doc) {
+    public boolean includeInDocs(ClassDoc doc) {
         return true;
 //        try {
 //            Class type = HelpUtils.getClassForDoc(doc);
@@ -92,7 +92,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
         for(Tag tag: classdoc.firstSentenceTags())
             summaryBuilder.append(tag.text());
         root.put("summary", summaryBuilder.toString());
-        root.put("description", classdoc.commentText());
+        root.put("description", classdoc.commentText().substring(summaryBuilder.toString().length()));
         root.put("timestamp", toProcess.buildTimestamp);
         root.put("version", toProcess.absoluteVersion);
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java
index 988240ef9..2bc3fa284 100644
--- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java
+++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java
@@ -26,7 +26,6 @@
 package org.broadinstitute.sting.utils.interval;
 
 import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
-import org.broadinstitute.sting.gatk.refdata.utils.StringToGenomeLocIteratorAdapter;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.exceptions.UserException;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java b/public/java/src/org/broadinstitute/sting/utils/interval/StringToGenomeLocIteratorAdapter.java
similarity index 95%
rename from public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java
rename to public/java/src/org/broadinstitute/sting/utils/interval/StringToGenomeLocIteratorAdapter.java
index fc7f7c58f..659260345 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/interval/StringToGenomeLocIteratorAdapter.java
@@ -23,7 +23,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-package org.broadinstitute.sting.gatk.refdata.utils;
+package org.broadinstitute.sting.utils.interval;
 
 import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
 import org.broadinstitute.sting.utils.GenomeLoc;
diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java
index f6aa882ad..79271464b 100644
--- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java
@@ -25,10 +25,11 @@
 package org.broadinstitute.sting.utils.text;
 
 import org.broadinstitute.sting.commandline.ParsingEngine;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.commandline.Tags;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
-import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 
 import java.io.File;
@@ -92,7 +93,9 @@ public class ListFileUtils {
      * @param RODBindings a text equivale
      * @return a list of expanded, bound RODs.
      */
-    public static Collection unpackRODBindings(final List RODBindings, final String dbSNPFile, final ParsingEngine parser) {
+    @Deprecated
+    public static Collection unpackRODBindingsOldStyle(final Collection RODBindings, final ParsingEngine parser) {
+        // todo -- this is a strange home for this code.  Move into ROD system
         Collection rodBindings = new ArrayList();
 
         for (String fileName: RODBindings) {
@@ -120,21 +123,53 @@ public class ListFileUtils {
             rodBindings.add(new RMDTriplet(name,type,fileName,storageType,tags));
         }
 
-        if (dbSNPFile != null) {
-            if(dbSNPFile.toLowerCase().contains("vcf"))
-                throw new UserException("--DBSNP (-D) argument currently does not support VCF.  To use dbSNP in VCF format, please use -B:dbsnp,vcf .");
+        return rodBindings;
+    }
 
-            final Tags tags = parser.getTags(dbSNPFile);
-            String fileName = expandFileName(dbSNPFile);
-            RMDTriplet.RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDTriplet.RMDStorageType.STREAM : RMDTriplet.RMDStorageType.FILE;
+    /**
+     * Convert command-line argument representation of ROD bindings to something more easily understandable by the engine.
+     * @param RODBindings a text equivale
+     * @return a list of expanded, bound RODs.
+     */
+    public static Collection unpackRODBindings(final Collection RODBindings, final ParsingEngine parser) {
+        // todo -- this is a strange home for this code.  Move into ROD system
+        Collection rodBindings = new ArrayList();
+        FeatureManager builderForValidation = new FeatureManager();
 
-            rodBindings.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME,"dbsnp",fileName,storageType,tags));
+        for (RodBinding rodBinding: RODBindings) {
+            String argValue = rodBinding.getSource();
+            String fileName = expandFileName(argValue);
+            String name = rodBinding.getName();
+            String type = rodBinding.getTribbleType();
+
+            RMDTriplet.RMDStorageType storageType = null;
+            if(rodBinding.getTags().getValue("storage") != null)
+                storageType = Enum.valueOf(RMDTriplet.RMDStorageType.class,rodBinding.getTags().getValue("storage"));
+            else if(fileName.toLowerCase().endsWith("stdin"))
+                storageType = RMDTriplet.RMDStorageType.STREAM;
+            else
+                storageType = RMDTriplet.RMDStorageType.FILE;
+
+            RMDTriplet triplet = new RMDTriplet(name,type,fileName,storageType,rodBinding.getTags());
+
+            // validate triplet type
+            FeatureManager.FeatureDescriptor descriptor = builderForValidation.getByTriplet(triplet);
+            if ( descriptor == null )
+                throw new UserException.UnknownTribbleType(rodBinding.getTribbleType(),
+                        String.format("Field %s had provided type %s but there's no such Tribble type.  Available types are %s",
+                                rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.userFriendlyListOfAvailableFeatures()));
+            if ( ! rodBinding.getType().isAssignableFrom(descriptor.getFeatureClass()) )
+                throw new UserException.BadArgumentValue(rodBinding.getName(),
+                        String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s",
+                                rodBinding.getName(), rodBinding.getType(), descriptor.getName()));
+
+
+            rodBindings.add(triplet);
         }
 
         return rodBindings;
     }
 
-
     /**
      * Expand any special characters that appear in the filename.  Right now, '-' is expanded to
      * '/dev/stdin' only, but in the future, special characters like '~' and '*' that are passed
diff --git a/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java
index 1d4251542..3159f3fb7 100644
--- a/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java
@@ -116,4 +116,57 @@ public class TextFormattingUtils {
         return bundle;
     }
 
+
+    /**
+     * Returns the word starting positions within line, excluding the first position 0.
+     * The returned list is compatible with splitFixedWidth.
+     * @param line Text to parse.
+     * @return the word starting positions within line, excluding the first position 0.
+     */
+    public static List getWordStarts(String line) {
+        if (line == null)
+            throw new ReviewedStingException("line is null");
+        List starts = new ArrayList();
+        int stop = line.length();
+        for (int i = 1; i < stop; i++)
+            if (Character.isWhitespace(line.charAt(i-1)))
+                if(!Character.isWhitespace(line.charAt(i)))
+                    starts.add(i);
+        return starts;
+    }
+
+    /**
+     * Parses a fixed width line of text.
+     * @param line Text to parse.
+     * @param columnStarts the column starting positions within line, excluding the first position 0.
+     * @return The parsed string array with each entry trimmed.
+     */
+    public static String[] splitFixedWidth(String line, List columnStarts) {
+        if (line == null)
+            throw new ReviewedStingException("line is null");
+        if (columnStarts == null)
+            throw new ReviewedStingException("columnStarts is null");
+        int startCount = columnStarts.size();
+        String[] row = new String[startCount + 1];
+        if (startCount == 0) {
+            row[0] = line.trim();
+        } else {
+            row[0] = line.substring(0, columnStarts.get(0)).trim();
+            for (int i = 1; i < startCount; i++)
+                row[i] = line.substring(columnStarts.get(i - 1), columnStarts.get(i)).trim();
+            row[startCount] = line.substring(columnStarts.get(startCount - 1)).trim();
+        }
+        return row;
+    }
+
+    /**
+     * Parses a line of text by whitespace.
+     * @param line Text to parse.
+     * @return The parsed string array.
+     */
+    public static String[] splitWhiteSpace(String line) {
+        if (line == null)
+            throw new ReviewedStingException("line is null");
+        return line.trim().split("\\s+");
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java
index 0b5976c3c..fdf3d97db 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java
@@ -57,6 +57,13 @@ public class Genotype {
         return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
     }
 
+    public static Genotype removePLs(Genotype g) {
+        Map attrs = new HashMap(g.getAttributes());
+        attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
+        attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
+        return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
+    }
+
     public static Genotype modifyAlleles(Genotype g, List alleles) {
         return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java
index a191670a4..a752f4a1b 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java
@@ -27,15 +27,15 @@ public class MutableVariantContext extends VariantContext {
     }
 
     public MutableVariantContext(String source, String contig, long start, long stop, Collection alleles) {
-        this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
+        super(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
     }
 
     public MutableVariantContext(String source, String contig, long start, long stop, Collection alleles, Collection genotypes) {
-        this(source, contig, start, stop, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
+        super(source, contig, start, stop, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
     }
 
     public MutableVariantContext(VariantContext parent) {
-        this(parent.getSource(), parent.contig, parent.start, parent.stop, parent.getAlleles(), parent.getGenotypes(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes());
+        super(parent.getSource(), parent.contig, parent.start, parent.stop, parent.getAlleles(), parent.getGenotypes(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes(), parent.getReferenceBaseForIndel());
     }
 
     /**
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
index eab392c4d..23478cc2b 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@@ -5,6 +5,7 @@ import org.broad.tribble.TribbleException;
 import org.broad.tribble.util.ParsingUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFParser;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
 import java.util.*;
 
@@ -163,11 +164,12 @@ import java.util.*;
 public class VariantContext implements Feature { // to enable tribble intergration
     protected InferredGeneticContext commonInfo = null;
     public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR;
-    public final static String REFERENCE_BASE_FOR_INDEL_KEY = "_REFERENCE_BASE_FOR_INDEL_";
     public final static String UNPARSED_GENOTYPE_MAP_KEY = "_UNPARSED_GENOTYPE_MAP_";
     public final static String UNPARSED_GENOTYPE_PARSER_KEY = "_UNPARSED_GENOTYPE_PARSER_";
     public final static String ID_KEY = "ID";
 
+    private final Byte REFERENCE_BASE_FOR_INDEL;
+
     public final static Set PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet());
 
     /** The location of this VariantContext */
@@ -205,6 +207,24 @@ public class VariantContext implements Feature { // to enable tribble intergrati
     // ---------------------------------------------------------------------------------------------------------
 
 
+    /**
+     * the complete constructor.  Makes a complete VariantContext from its arguments
+     *
+     * @param source          source
+     * @param contig          the contig
+     * @param start           the start base (one based)
+     * @param stop            the stop reference base (one based)
+     * @param alleles         alleles
+     * @param genotypes       genotypes map
+     * @param negLog10PError  qual
+     * @param filters         filters: use null for unfiltered and empty set for passes filters
+     * @param attributes      attributes
+     * @param referenceBaseForIndel   padded reference base
+     */
+    public VariantContext(String source, String contig, long start, long stop, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel) {
+        this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false);
+    }
+
     /**
      * the complete constructor.  Makes a complete VariantContext from its arguments
      *
@@ -219,7 +239,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
      * @param attributes      attributes
      */
     public VariantContext(String source, String contig, long start, long stop, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes) {
-        this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, false);
+        this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false);
     }
 
     /**
@@ -239,7 +259,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
      * @param attributes      attributes
      */
     public VariantContext(String source, String contig, long start, long stop, Collection alleles, double negLog10PError, Set filters, Map attributes) {
-        this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, true);
+        this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, null, true);
     }
 
     /**
@@ -256,7 +276,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
      * @param attributes     attributes
      */
     public VariantContext(String source, String contig, long start, long stop, Collection alleles, Collection genotypes, double negLog10PError, Set filters, Map attributes) {
-        this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, negLog10PError, filters, attributes, false);
+        this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, negLog10PError, filters, attributes, null, false);
     }
 
     /**
@@ -269,7 +289,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
      * @param alleles alleles
      */
     public VariantContext(String source, String contig, long start, long stop, Collection alleles) {
-        this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, false);
+        this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false);
     }
 
     /**
@@ -292,7 +312,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
      * @param other the VariantContext to copy
      */
     public VariantContext(VariantContext other) {
-        this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), false);
+        this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false);
     }
 
     /**
@@ -307,8 +327,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
      * @param negLog10PError  qual
      * @param filters         filters: use null for unfiltered and empty set for passes filters
      * @param attributes      attributes
+     * @param referenceBaseForIndel   padded reference base
+     * @param genotypesAreUnparsed    true if the genotypes have not yet been parsed
      */
-    private VariantContext(String source, String contig, long start, long stop, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes, boolean genotypesAreUnparsed) {
+    private VariantContext(String source, String contig, long start, long stop,
+                           Collection alleles, Map genotypes,
+                           double negLog10PError, Set filters, Map attributes,
+                           Byte referenceBaseForIndel, boolean genotypesAreUnparsed) {
         if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
         this.contig = contig;
         this.start = start;
@@ -323,6 +348,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
 
         this.commonInfo = new InferredGeneticContext(source, negLog10PError, filters, attributes);
         filtersWereAppliedToContext = filters != null;
+        REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel;
 
         if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); }
 
@@ -355,23 +381,27 @@ public class VariantContext implements Feature { // to enable tribble intergrati
     // ---------------------------------------------------------------------------------------------------------
 
     public static VariantContext modifyGenotypes(VariantContext vc, Map genotypes) {
-        return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), false);
+        return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), false);
     }
 
     public static VariantContext modifyLocation(VariantContext vc, String chr, int start, int end) {
-        return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), true);
+        return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
     }
 
     public static VariantContext modifyFilters(VariantContext vc, Set filters) {
-        return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap(vc.getAttributes()), true);
+        return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
     }
 
     public static VariantContext modifyAttributes(VariantContext vc, Map attributes) {
-        return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, true);
+        return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true);
+    }
+
+    public static VariantContext modifyReferencePadding(VariantContext vc, Byte b) {
+        return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true);
     }
 
     public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set filters, Map attributes) {
-        return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, true);
+        return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true);
     }
 
     // ---------------------------------------------------------------------------------------------------------
@@ -414,7 +444,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
      * @return vc subcontext
      */
     public VariantContext subContextFromGenotypes(Collection genotypes, Set alleles) {
-        return new VariantContext(getSource(), contig, start, stop, alleles, genotypes, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes());
+        return new VariantContext(getSource(), contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes(), getReferenceBaseForIndel());
     }
 
 
@@ -603,6 +633,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
         return (String)commonInfo.getAttribute(ID_KEY);
     }
 
+    public boolean hasReferenceBaseForIndel() {
+        return REFERENCE_BASE_FOR_INDEL != null;
+    }
+
+    // the indel base that gets stripped off for indels
+    public Byte getReferenceBaseForIndel() {
+        return REFERENCE_BASE_FOR_INDEL;
+    }
+
     // ---------------------------------------------------------------------------------------------------------
     //
     // get routines to access context info fields
@@ -1020,11 +1059,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati
      * Run all extra-strict validation tests on a Variant Context object
      *
      * @param reference        the true reference allele
+     * @param paddedRefBase    the reference base used for padding indels
      * @param rsIDs            the true dbSNP IDs
      */
-    public void extraStrictValidation(Allele reference, Set rsIDs) {
+    public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set rsIDs) {
         // validate the reference
-        validateReferenceBases(reference);
+        validateReferenceBases(reference, paddedRefBase);
 
         // validate the RS IDs
         validateRSIDs(rsIDs);
@@ -1039,11 +1079,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
         //checkReferenceTrack();
     }
 
-    public void validateReferenceBases(Allele reference) {
+    public void validateReferenceBases(Allele reference, Byte paddedRefBase) {
         // don't validate if we're an insertion
         if ( !reference.isNull() && !reference.basesMatch(getReference()) ) {
             throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
         }
+
+        // we also need to validate the padding base for simple indels
+        if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) )
+            throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), (char)getReferenceBaseForIndel().byteValue(), (char)paddedRefBase.byteValue()));
     }
 
     public void validateRSIDs(Set rsIDs) {
@@ -1151,6 +1195,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
 
     private boolean validate(boolean throwException) {
         try {
+            validateReferencePadding();
             validateAlleles();
             validateGenotypes();
         } catch ( IllegalArgumentException e ) {
@@ -1163,6 +1208,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
         return true;
     }
 
+    private void validateReferencePadding() {
+        boolean needsPadding = hasSymbolicAlleles() || (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed
+
+        if ( needsPadding && !hasReferenceBaseForIndel() )
+            throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + getStart() + "; no padded reference base was provided.");
+    }
+
     private void validateAlleles() {
         // check alleles
         boolean alreadySeenRef = false, alreadySeenNull = false;
@@ -1221,16 +1273,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati
     //
     // ---------------------------------------------------------------------------------------------------------
 
-    // the indel base that gets stripped off for indels
-    public boolean hasReferenceBaseForIndel() {
-        return hasAttribute(REFERENCE_BASE_FOR_INDEL_KEY);
-    }
-
-    // the indel base that gets stripped off for indels
-    public byte getReferenceBaseForIndel() {
-        return hasReferenceBaseForIndel() ? (Byte)getAttribute(REFERENCE_BASE_FOR_INDEL_KEY) : (byte)'N';
-    }
-
     private void determineType() {
         if ( type == null ) {
             switch ( getNAlleles() ) {
@@ -1357,8 +1399,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
         return false;
     }
 
-    public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, byte inputRefBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
-        Allele refAllele = inputVC.getReference();
+    public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
 
         // see if we need to pad common reference base from all alleles
         boolean padVC;
@@ -1368,31 +1409,20 @@ public class VariantContext implements Feature { // to enable tribble intergrati
         long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
         if (inputVC.hasSymbolicAlleles())
             padVC = true;
-        else if (refAllele.length() == locLength)
+        else if (inputVC.getReference().length() == locLength)
             padVC = false;
-        else if (refAllele.length() == locLength-1)
+        else if (inputVC.getReference().length() == locLength-1)
             padVC = true;
         else throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
                     " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
 
-
         // nothing to do if we don't need to pad bases
         if (padVC) {
-            Byte refByte;
 
-            Map attributes = inputVC.getAttributes();
+            if ( !inputVC.hasReferenceBaseForIndel() )
+                throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
 
-            // upper-case for consistency; note that we can safely make these casts because the input is constrained to be a byte
-            inputRefBase = (byte)Character.toUpperCase((char)inputRefBase);
-            if (attributes.containsKey(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY))
-                refByte = (Byte)attributes.get(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY);
-            else if (inputRefBase == 'A' || inputRefBase == 'T' || inputRefBase == 'C' || inputRefBase == 'G' || inputRefBase == 'N')
-                refByte = inputRefBase;
-            else
-                throw new IllegalArgumentException("Error when trying to pad Variant Context at location " + String.valueOf(inputVC.getStart())
-                        + " in contig " + inputVC.getChr() +
-                        ". Either input reference base ("+(char)inputRefBase+
-                        ", ascii code="+inputRefBase+") must be a regular base, or input VC must contain reference base key");
+            Byte refByte = inputVC.getReferenceBaseForIndel();
 
             List alleles = new ArrayList();
             Map genotypes = new TreeMap();
@@ -1444,11 +1474,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
 
             // Do not change the filter state if filters were not applied to this context
             Set inputVCFilters = inputVC.filtersWereAppliedToContext ? inputVC.getFilters() : null;
-            return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(),
-                    inputVCFilters, attributes);
-
-
-
+            return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVCFilters, inputVC.getAttributes());
         }
         else
             return inputVC;
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
index 212600360..fa039b42e 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@@ -295,10 +295,7 @@ public class VariantContextUtils {
     @Requires("vc != null")
     @Ensures("result != null")
     public static VariantContext sitesOnlyVariantContext(VariantContext vc) {
-        return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(),
-                vc.getAlleles(), vc.getNegLog10PError(),
-                vc.filtersWereApplied() ? vc.getFilters() : null,
-                vc.getAttributes());
+        return VariantContext.modifyGenotypes(vc, null);
     }
 
     /**
@@ -449,7 +446,7 @@ public class VariantContextUtils {
                                              FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
                                              boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
 
-        return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false);
+        return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, "set", false, false);
     }
 
     /**
@@ -464,7 +461,6 @@ public class VariantContextUtils {
      * @param genotypeMergeOptions      merge option for genotypes
      * @param annotateOrigin            should we annotate the set it came from?
      * @param printMessages             should we print messages?
-     * @param inputRefBase              the ref base
      * @param setKey                    the key name of the set
      * @param filteredAreUncalled       are filtered records uncalled?
      * @param mergeInfoWithMaxAC        should we merge in info from the VC with maximum allele count?
@@ -472,7 +468,7 @@ public class VariantContextUtils {
      */
     public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs,
                                              FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
-                                             boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
+                                             boolean annotateOrigin, boolean printMessages, String setKey,
                                              boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) {
         if ( unsortedVCs == null || unsortedVCs.size() == 0 )
             return null;
@@ -490,7 +486,7 @@ public class VariantContextUtils {
         for (VariantContext vc : prepaddedVCs) {
             // also a reasonable place to remove filtered calls, if needed
             if ( ! filteredAreUncalled || vc.isNotFiltered() )
-                VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false));
+                VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc, false));
         }
         if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
             return null;
@@ -592,6 +588,14 @@ public class VariantContextUtils {
             }
         }
 
+        // if we have more alternate alleles in the merged VC than in one or more of the original VCs, we need to strip out the GL/PLs (because they are no longer accurate)
+        for ( VariantContext vc : VCs ) {
+            if ( vc.alleles.size() != alleles.size() ) {
+                genotypes = stripPLs(genotypes);
+                break;
+            }
+        }
+
         // take the VC with the maxAC and pull the attributes into a modifiable map
         if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
             attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes());
@@ -637,6 +641,16 @@ public class VariantContextUtils {
         return merged;
     }
 
+    public static Map stripPLs(Map genotypes) {
+        Map newGs = new HashMap(genotypes.size());
+
+        for ( Map.Entry g : genotypes.entrySet() ) {
+            newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? Genotype.removePLs(g.getValue()) : g.getValue());
+        }
+
+        return newGs;
+    }
+
     public static Map> separateVariantContextsByType(Collection VCs) {
         HashMap> mappedVCs = new HashMap>();
         for ( VariantContext vc : VCs ) {
diff --git a/public/java/test/net/sf/picard/reference/FastaSequenceIndexBuilderUnitTest.java b/public/java/test/net/sf/picard/reference/FastaSequenceIndexBuilderUnitTest.java
index 27b76537f..cf0f9051e 100644
--- a/public/java/test/net/sf/picard/reference/FastaSequenceIndexBuilderUnitTest.java
+++ b/public/java/test/net/sf/picard/reference/FastaSequenceIndexBuilderUnitTest.java
@@ -27,7 +27,6 @@ package net.sf.picard.reference;
 
 import org.testng.Assert;
 import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
@@ -40,7 +39,6 @@ import java.io.FileNotFoundException;
 public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
 
     private FastaSequenceIndexBuilder builder;
-    private ReferenceDataSourceProgressListener progress;
     private File fastaFile;
     private FastaSequenceIndex controlIndex;
 
@@ -58,7 +56,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
         logger.warn("Executing unixFileTest");
 
         fastaFile = new File(validationDataLocation + "exampleFASTA.fasta");
-        builder = new FastaSequenceIndexBuilder(fastaFile, progress);
+        builder = new FastaSequenceIndexBuilder(fastaFile, false);
         FastaSequenceIndex index = builder.createIndex();
         controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 100000, 60, 61,0));
 
@@ -75,7 +73,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
         logger.warn("Executing windowsFileTest");
 
         fastaFile = new File(validationDataLocation + "exampleFASTA-windows.fasta");
-        builder = new FastaSequenceIndexBuilder(fastaFile, progress);
+        builder = new FastaSequenceIndexBuilder(fastaFile, false);
         FastaSequenceIndex index = builder.createIndex();
         controlIndex.add(new FastaSequenceIndexEntry("chr2", 7, 29, 7, 9,0));
 
@@ -91,7 +89,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
         logger.warn("Executing combinedWindowsUnix");
 
         fastaFile = new File(validationDataLocation + "exampleFASTA-combined.fasta");
-        builder = new FastaSequenceIndexBuilder(fastaFile, progress);
+        builder = new FastaSequenceIndexBuilder(fastaFile, false);
         FastaSequenceIndex index = builder.createIndex();
         controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 100000, 60, 61,0));
         controlIndex.add(new FastaSequenceIndexEntry("chr2", 101680, 29, 7, 9,1));
@@ -108,7 +106,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
         logger.warn("Executing threeVariableLengthContigs");
 
         fastaFile = new File(validationDataLocation + "exampleFASTA-3contigs.fasta");
-        builder = new FastaSequenceIndexBuilder(fastaFile, progress);
+        builder = new FastaSequenceIndexBuilder(fastaFile, false);
         FastaSequenceIndex index = builder.createIndex();
         controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 17, 5, 6,0));
         controlIndex.add(new FastaSequenceIndexEntry("chr2", 35, 21, 7, 8,1));
diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java
index ef46d4bff..7a749c0a2 100755
--- a/public/java/test/org/broadinstitute/sting/BaseTest.java
+++ b/public/java/test/org/broadinstitute/sting/BaseTest.java
@@ -64,9 +64,8 @@ public abstract class BaseTest {
     public static final String b37Refseq = refseqAnnotationLocation + "refGene-big-table-b37.txt";
 
     public static final String dbsnpDataLocation = GATKDataLocation;
-    public static final String hg18dbSNP129 = dbsnpDataLocation + "dbsnp_129_hg18.rod";
-    public static final String b36dbSNP129 = dbsnpDataLocation + "dbsnp_129_b36.rod";
-    public static final String b37dbSNP129 = dbsnpDataLocation + "dbsnp_129_b37.rod";
+    public static final String b36dbSNP129 = dbsnpDataLocation + "dbsnp_129_b36.vcf";
+    public static final String b37dbSNP129 = dbsnpDataLocation + "dbsnp_129_b37.vcf";
     public static final String b37dbSNP132 = dbsnpDataLocation + "dbsnp_132_b37.vcf";
 
     public static final String hapmapDataLocation = comparisonDataLocation + "Validated/HapMap/3.3/";
diff --git a/public/java/test/org/broadinstitute/sting/MD5DB.java b/public/java/test/org/broadinstitute/sting/MD5DB.java
index bea9eaec5..0194e114a 100644
--- a/public/java/test/org/broadinstitute/sting/MD5DB.java
+++ b/public/java/test/org/broadinstitute/sting/MD5DB.java
@@ -47,6 +47,7 @@ public class MD5DB {
     /**
      * Subdirectory under the ant build directory where we store integration test md5 results
      */
+    private static final int MAX_RECORDS_TO_READ = 10000;
     public static final String LOCAL_MD5_DB_DIR = "integrationtests";
     public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests";
 
@@ -78,8 +79,8 @@ public class MD5DB {
      * @return
      */
     public static String getMD5FilePath(final String md5, final String valueIfNotFound) {
-        // we prefer the local db to the global DB, so match it first
-        for ( String dir : Arrays.asList(LOCAL_MD5_DB_DIR, GLOBAL_MD5_DB_DIR)) {
+        // we prefer the global db to the local DB, so match it first
+        for ( String dir : Arrays.asList(GLOBAL_MD5_DB_DIR, LOCAL_MD5_DB_DIR)) {
             File f = getFileForMD5(md5, dir);
             if ( f.exists() && f.canRead() )
                 return f.getPath();
@@ -232,7 +233,7 @@ public class MD5DB {
 
                     // inline differences
                     DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0);
-                    boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params);
+                    boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), MAX_RECORDS_TO_READ, params);
                     if ( success )
                         System.out.printf("Note that the above list is not comprehensive.  At most 20 lines of output, and 10 specific differences will be listed.  Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n",
                                 pathToExpectedMD5File, pathToFileMD5File);
diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java
index 54e3b35bc..366401ad6 100755
--- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java
@@ -25,12 +25,17 @@
 
 package org.broadinstitute.sting.commandline;
 
+import org.broad.tribble.Feature;
+import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.testng.Assert;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
+import javax.script.Bindings;
 import java.util.List;
 import java.util.EnumSet;
 /**
@@ -42,6 +47,7 @@ public class ParsingEngineUnitTest extends BaseTest {
     @BeforeMethod
     public void setUp() {
         parsingEngine = new ParsingEngine(null);
+        RodBinding.resetNameCounter();
     }
 
     private class InputFileArgProvider {
@@ -62,7 +68,7 @@ public class ParsingEngineUnitTest extends BaseTest {
 
         Assert.assertEquals(argProvider.inputFile,"na12878.bam","Argument is not correctly initialized");
     }
-    
+
     @Test
     public void multiCharShortNameArgumentTest() {
         final String[] commandLine = new String[] {"-out","out.txt"};
@@ -211,7 +217,7 @@ public class ParsingEngineUnitTest extends BaseTest {
 
         Assert.assertEquals(argProvider.testEnum, TestEnum.ONE, "Enum value is not correct");
     }
-    
+
     @Test
     public void enumDefaultTest() {
         final String[] commandLine = new String[] {};
@@ -552,7 +558,7 @@ public class ParsingEngineUnitTest extends BaseTest {
         commandLine = new String[] {"--foo","5","--bar","6"};
 
         parsingEngine.parse( commandLine );
-        parsingEngine.validate();        
+        parsingEngine.validate();
     }
 
     private class MutuallyExclusiveArgProvider {
@@ -618,4 +624,301 @@ public class ParsingEngineUnitTest extends BaseTest {
         @ArgumentCollection
         RequiredArgProvider rap2 = new RequiredArgProvider();
     }
+
+    // --------------------------------------------------------------------------------
+    //
+    // Tests of the RodBinding system
+    //
+    // --------------------------------------------------------------------------------
+
+    private class SingleRodBindingArgProvider {
+        @Input(fullName="binding", shortName="V", required=false)
+        public RodBinding binding = RodBinding.makeUnbound(Feature.class);
+    }
+
+    @Test
+    public void basicRodBindingArgumentTest() {
+        final String[] commandLine = new String[] {"-V:vcf","foo.vcf"};
+
+        parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        SingleRodBindingArgProvider argProvider = new SingleRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+
+        Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
+        Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.isBound(), true, "Bound() isn't returning its expected value");
+        Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
+    }
+
+    private class ShortNameOnlyRodBindingArgProvider {
+        @Input(shortName="short", required=false)
+        public RodBinding binding = RodBinding.makeUnbound(Feature.class);
+    }
+
+    @Test
+    public void shortNameOnlyRodBindingArgumentTest() {
+        final String[] commandLine = new String[] {"-short:vcf","foo.vcf"};
+
+        parsingEngine.addArgumentSource( ShortNameOnlyRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        ShortNameOnlyRodBindingArgProvider argProvider = new ShortNameOnlyRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+
+        Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
+        Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.isBound(), true, "Bound() isn't returning its expected value");
+        Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
+    }
+
+    @Test
+    public void unbasicRodBindingArgumentTest() {
+        final String[] commandLine = new String[] {};
+
+        parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        SingleRodBindingArgProvider argProvider = new SingleRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+
+        Assert.assertEquals(argProvider.binding.getName(), RodBinding.UNBOUND_VARIABLE_NAME, "Name isn't set properly");
+        Assert.assertEquals(argProvider.binding.getSource(), RodBinding.UNBOUND_SOURCE, "Source isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.isBound(), false, "Bound() isn't returning its expected value");
+        Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 0, "Tags aren't correctly set");
+    }
+
+    @Test(expectedExceptions = UserException.class)
+    public void rodBindingArgumentTestMissingType() {
+        final String[] commandLine = new String[] {"-V","foo.vcf"};
+
+        parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        SingleRodBindingArgProvider argProvider = new SingleRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject(argProvider);
+    }
+
+    @Test(expectedExceptions = UserException.class)
+    public void rodBindingArgumentTestTooManyTags() {
+        final String[] commandLine = new String[] {"-V:x,y,z","foo.vcf"};
+
+        parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        SingleRodBindingArgProvider argProvider = new SingleRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject(argProvider);
+    }
+
+    private class VariantContextRodBindingArgProvider {
+        @Input(fullName = "binding", shortName="V")
+        public RodBinding binding;
+    }
+
+    @Test
+    public void variantContextBindingArgumentTest() {
+        final String[] commandLine = new String[] {"-V:vcf","foo.vcf"};
+
+        parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+
+        Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
+        Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
+    }
+
+    @Test
+    public void variantContextBindingArgumentTestVCF3() {
+        final String[] commandLine = new String[] {"-V:vcf3","foo.vcf"};
+
+        parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+
+        Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
+        Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
+    }
+
+    private class ListRodBindingArgProvider {
+        @Input(fullName = "binding", shortName="V", required=false)
+        public List> bindings;
+    }
+
+    @Test
+    public void listRodBindingArgumentTest() {
+        final String[] commandLine = new String[] {"-V:vcf","foo.vcf"};
+
+        parsingEngine.addArgumentSource( ListRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        ListRodBindingArgProvider argProvider = new ListRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+
+        Assert.assertEquals(argProvider.bindings.size(), 1, "Unexpected number of bindings");
+        RodBinding binding = argProvider.bindings.get(0);
+        Assert.assertEquals(binding.getName(), "binding", "Name isn't set properly");
+        Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
+        Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value");
+        Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
+    }
+
+    @Test
+    public void listRodBindingArgumentTest2Args() {
+        final String[] commandLine = new String[] {"-V:vcf","foo.vcf", "-V:vcf", "bar.vcf"};
+
+        parsingEngine.addArgumentSource( ListRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        ListRodBindingArgProvider argProvider = new ListRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+
+        Assert.assertEquals(argProvider.bindings.size(), 2, "Unexpected number of bindings");
+
+        RodBinding binding = argProvider.bindings.get(0);
+        Assert.assertEquals(binding.getName(), "binding", "Name isn't set properly");
+        Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
+        Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value");
+        Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
+
+        RodBinding binding2 = argProvider.bindings.get(1);
+        Assert.assertEquals(binding2.getName(), "binding2", "Name isn't set properly");
+        Assert.assertEquals(binding2.getSource(), "bar.vcf", "Source isn't set to its expected value");
+        Assert.assertEquals(binding2.getType(), Feature.class, "Type isn't set to its expected value");
+        Assert.assertEquals(binding2.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
+    }
+
+    @Test
+    public void listRodBindingArgumentTest0Args() {
+        final String[] commandLine = new String[] {};
+
+        parsingEngine.addArgumentSource( ListRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        ListRodBindingArgProvider argProvider = new ListRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+
+        Assert.assertNull(argProvider.bindings, "Bindings were not null");
+    }
+
+    @Test
+    public void listRodBindingArgumentTestExplicitlyNamed() {
+        final String[] commandLine = new String[] {"-V:foo,vcf","foo.vcf", "-V:foo,vcf", "bar.vcf"};
+
+        parsingEngine.addArgumentSource( ListRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        ListRodBindingArgProvider argProvider = new ListRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+
+        Assert.assertEquals(argProvider.bindings.size(), 2, "Unexpected number of bindings");
+        Assert.assertEquals(argProvider.bindings.get(0).getName(), "foo", "Name isn't set properly");
+        Assert.assertEquals(argProvider.bindings.get(1).getName(), "foo2", "Name isn't set properly");
+    }
+
+    private final static String HISEQ_VCF = testDir + "HiSeq.10000.vcf";
+    private final static String TRANCHES_FILE = testDir + "tranches.6.txt";
+
+    @Test
+    public void variantContextBindingTestDynamicTyping1() {
+        final String[] commandLine = new String[] {"-V", HISEQ_VCF};
+
+        parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+
+        Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
+        Assert.assertEquals(argProvider.binding.getSource(), HISEQ_VCF, "Source isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 0, "Tags aren't correctly set");
+    }
+
+    @Test
+    public void variantContextBindingTestDynamicTypingNameAsSingleArgument() {
+        final String[] commandLine = new String[] {"-V:name", HISEQ_VCF};
+
+        parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+
+        Assert.assertEquals(argProvider.binding.getName(), "name", "Name isn't set properly");
+        Assert.assertEquals(argProvider.binding.getSource(), HISEQ_VCF, "Source isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
+    }
+
+    @Test()
+    public void variantContextBindingTestDynamicTypingTwoTagsPassing() {
+        final String[] commandLine = new String[] {"-V:name,vcf", HISEQ_VCF};
+
+        parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+
+        Assert.assertEquals(argProvider.binding.getName(), "name", "Name isn't set properly");
+        Assert.assertEquals(argProvider.binding.getSource(), HISEQ_VCF, "Source isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 2, "Tags aren't correctly set");
+    }
+
+    @Test()
+    public void variantContextBindingTestDynamicTypingTwoTagsCausingTypeFailure() {
+        final String[] commandLine = new String[] {"-V:name,beagle", HISEQ_VCF};
+
+        parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject(argProvider);
+
+        Assert.assertEquals(argProvider.binding.getName(), "name", "Name isn't set properly");
+        Assert.assertEquals(argProvider.binding.getSource(), HISEQ_VCF, "Source isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getTribbleType(), "beagle", "Type isn't set to its expected value");
+        Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 2, "Tags aren't correctly set");
+    }
+
+    @Test(expectedExceptions = UserException.class)
+    public void variantContextBindingTestDynamicTypingUnknownTribbleType() {
+        final String[] commandLine = new String[] {"-V", TRANCHES_FILE};
+
+        parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class );
+        parsingEngine.parse( commandLine );
+        parsingEngine.validate();
+
+        VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider();
+        parsingEngine.loadArgumentsIntoObject( argProvider );
+    }
 }
diff --git a/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java
new file mode 100644
index 000000000..206f32532
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.commandline;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import org.testng.annotations.BeforeMethod;
+
+/**
+ * Test suite for the parsing engine.
+ */
+public class RodBindingUnitTest extends BaseTest {
+    Tags mytags = new Tags();
+
+    @BeforeMethod
+    public void setUp() {
+        RodBinding.resetNameCounter();
+    }
+
+    @Test
+    public void testStandardRodBinding() {
+        RodBinding b = new RodBinding(VariantContext.class, "b", "foo", "vcf", mytags);
+        Assert.assertEquals(b.getName(), "b");
+        Assert.assertEquals(b.getType(), VariantContext.class);
+        Assert.assertEquals(b.getSource(), "foo");
+        Assert.assertEquals(b.getTribbleType(), "vcf");
+        Assert.assertEquals(b.isBound(), true);
+    }
+
+    @Test
+    public void testUnboundRodBinding() {
+        RodBinding u = RodBinding.makeUnbound(VariantContext.class);
+        Assert.assertEquals(u.getName(), RodBinding.UNBOUND_VARIABLE_NAME);
+        Assert.assertEquals(u.getSource(), RodBinding.UNBOUND_SOURCE);
+        Assert.assertEquals(u.getType(), VariantContext.class);
+        Assert.assertEquals(u.getTribbleType(), RodBinding.UNBOUND_TRIBBLE_TYPE);
+        Assert.assertEquals(u.isBound(), false);
+    }
+
+    @Test
+    public void testMultipleBindings() {
+        String name = "binding";
+        RodBinding b1 = new RodBinding(VariantContext.class, name, "foo", "vcf", mytags);
+        Assert.assertEquals(b1.getName(), name);
+        Assert.assertEquals(b1.getType(), VariantContext.class);
+        Assert.assertEquals(b1.getSource(), "foo");
+        Assert.assertEquals(b1.getTribbleType(), "vcf");
+        Assert.assertEquals(b1.isBound(), true);
+
+        RodBinding b2 = new RodBinding(VariantContext.class, name, "foo", "vcf", mytags);
+        Assert.assertEquals(b2.getName(), name + "2");
+        Assert.assertEquals(b2.getType(), VariantContext.class);
+        Assert.assertEquals(b2.getSource(), "foo");
+        Assert.assertEquals(b2.getTribbleType(), "vcf");
+        Assert.assertEquals(b2.isBound(), true);
+    }
+}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java
new file mode 100644
index 000000000..5b5083ef3
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk;
+
+import org.broadinstitute.sting.WalkerTest;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.testng.annotations.Test;
+
+/**
+ *
+ */
+public class EngineFeaturesIntegrationTest extends WalkerTest {
+    private void testBadRODBindingInput(String type, String name, Class c) {
+        WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -L 1:1 --variant:variant," + type + " "
+                + b37dbSNP132 + " -R " + b37KGReference + " -o %s",
+                1, c);
+        executeTest(name, spec);
+    }
+
+    @Test() private void testBadRODBindingInputType1() {
+        testBadRODBindingInput("beagle", "BEAGLE input to VCF expecting walker", UserException.BadArgumentValue.class);
+    }
+
+    @Test() private void testBadRODBindingInputType2() {
+        testBadRODBindingInput("vcf3", "VCF3 input to VCF expecting walker", UserException.class);
+    }
+
+    @Test() private void testBadRODBindingInputType3() {
+        testBadRODBindingInput("bed", "Bed input to VCF expecting walker", UserException.BadArgumentValue.class);
+    }
+
+    @Test() private void testBadRODBindingInputTypeUnknownType() {
+        testBadRODBindingInput("bedXXX", "Unknown input to VCF expecting walker", UserException.UnknownTribbleType.class);
+    }
+}
\ No newline at end of file
diff --git a/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java
index cd43927a4..6149a1e51 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java
@@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk;
 
 import org.testng.Assert;
 import org.broadinstitute.sting.commandline.Hidden;
-import org.broadinstitute.sting.gatk.walkers.Requires;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.gatk.walkers.qc.CountLociWalker;
 import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
@@ -64,7 +63,6 @@ public class WalkerManagerUnitTest {
 }
 
 @Hidden
-@Requires(value={})
 class UninstantiableWalker extends Walker {
     // Private constructor will generate uninstantiable message
     private UninstantiableWalker() {}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java
index 59edf934e..f3e868474 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java
@@ -81,7 +81,6 @@ public class GATKArgumentCollectionUnitTest extends BaseTest {
         collect.samFiles = input;
         collect.strictnessLevel = SAMFileReader.ValidationStringency.STRICT;
         collect.referenceFile = new File("referenceFile".toLowerCase());
-        collect.DBSNPFile = "DBSNPFile".toLowerCase();
         collect.unsafe = ValidationExclusion.TYPE.ALL;
         collect.downsampleFraction = null;
         collect.downsampleCoverage = null;
diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java
index b32473b9d..f782580e2 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java
@@ -1,15 +1,16 @@
 package org.broadinstitute.sting.gatk.datasources.providers;
 
+import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Tags;
 import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
 import org.testng.Assert;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
-import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
@@ -69,8 +70,8 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
         LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.emptyList());
         ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
 
-        RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10));
-        Assert.assertEquals(tracker.getAllRods().size(), 0, "The tracker should not have produced any data");
+        RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10), null);
+        Assert.assertEquals(tracker.getValues(Feature.class).size(), 0, "The tracker should not have produced any data");
     }
 
     /**
@@ -87,8 +88,8 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
         LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource));
         ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
 
-        RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20));
-        TableFeature datum = tracker.lookup("tableTest",TableFeature.class);
+        RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null);
+        TableFeature datum = tracker.getFirstValue(TableFeature.class, "tableTest");
 
         Assert.assertEquals(datum.get("COL1"),"C","datum parameter for COL1 is incorrect");
         Assert.assertEquals(datum.get("COL2"),"D","datum parameter for COL2 is incorrect");
@@ -113,14 +114,14 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
         LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2));
         ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
 
-        RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20));
-        TableFeature datum1 = tracker.lookup("tableTest1",TableFeature.class);
+        RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null);
+        TableFeature datum1 = tracker.getFirstValue(TableFeature.class, "tableTest1");
 
         Assert.assertEquals(datum1.get("COL1"),"C","datum1 parameter for COL1 is incorrect");
         Assert.assertEquals(datum1.get("COL2"),"D","datum1 parameter for COL2 is incorrect");
         Assert.assertEquals(datum1.get("COL3"),"E","datum1 parameter for COL3 is incorrect");
 
-        TableFeature datum2 = tracker.lookup("tableTest2", TableFeature.class);
+        TableFeature datum2 = tracker.getFirstValue(TableFeature.class, "tableTest2");
 
         Assert.assertEquals(datum2.get("COL1"),"C","datum2 parameter for COL1 is incorrect");
         Assert.assertEquals(datum2.get("COL2"),"D","datum2 parameter for COL2 is incorrect");
diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java
index 5b0d67e88..bd4f93d24 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java
@@ -1,10 +1,10 @@
 package org.broadinstitute.sting.gatk.datasources.rmd;
 
 import org.broadinstitute.sting.commandline.Tags;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
 import org.testng.Assert;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
-import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
 import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java
new file mode 100644
index 000000000..fbd30bc8a
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.refdata;
+
+import net.sf.samtools.SAMFileHeader;
+import org.apache.log4j.Logger;
+import org.broad.tribble.Feature;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.commandline.Tags;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
+import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
+import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.testng.Assert;
+import org.testng.annotations.*;
+import java.util.*;
+import java.util.List;
+
+public class RefMetaDataTrackerUnitTest {
+    final protected static Logger logger = Logger.getLogger(RefMetaDataTrackerUnitTest.class);
+    private static SAMFileHeader header;
+    private ReferenceContext context;
+    private GenomeLocParser genomeLocParser;
+    private GenomeLoc locus;
+    private final static int START_POS = 10;
+    Allele A,C,G,T;
+    VariantContext AC_SNP, AG_SNP, AT_SNP;
+    TableFeature span10_10, span1_20, span10_20;
+
+    @BeforeClass
+    public void beforeClass() {
+        header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100);
+        genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
+        locus = genomeLocParser.createGenomeLoc("chr1", START_POS, START_POS);
+        context = new ReferenceContext(genomeLocParser, locus, (byte)'A');
+        A = Allele.create("A", true);
+        C = Allele.create("C");
+        G = Allele.create("G");
+        T = Allele.create("T");
+        AC_SNP = new VariantContext("x", "chr1", START_POS, START_POS, Arrays.asList(A, C));
+        AG_SNP = new VariantContext("x", "chr1", START_POS, START_POS, Arrays.asList(A, G));
+        AT_SNP = new VariantContext("x", "chr1", START_POS, START_POS, Arrays.asList(A, T));
+        span10_10 = makeSpan(10, 10);
+        span1_20 = makeSpan(1, 20);
+        span10_20 = makeSpan(10, 20);
+    }
+
+    @BeforeMethod
+    public void reset() {
+        RodBinding.resetNameCounter();
+    }
+
+    private class MyTest extends BaseTest.TestDataProvider {
+        public RODRecordList AValues, BValues;
+
+        private MyTest(Class c, final List AValues, final List BValues) {
+            super(c);
+            this.AValues = AValues == null ? null : makeRODRecord("A", AValues);
+            this.BValues = BValues == null ? null : makeRODRecord("B", BValues);
+        }
+
+        private MyTest(final List AValues, final List BValues) {
+            super(MyTest.class);
+            this.AValues = AValues == null ? null : makeRODRecord("A", AValues);
+            this.BValues = BValues == null ? null : makeRODRecord("B", BValues);
+        }
+
+        @Override
+        public String toString() {
+            return String.format("A=%s, B=%s", AValues, BValues);
+        }
+
+        private final RODRecordList makeRODRecord(String name, List features) {
+            List x = new ArrayList();
+            for ( Feature f : features )
+                x.add(new GATKFeature.TribbleGATKFeature(genomeLocParser, f, name));
+            return new RODRecordListImpl(name, x, locus);
+        }
+
+        public List expected(String name) {
+            if ( name.equals("A+B") ) return allValues();
+            if ( name.equals("A") ) return expectedAValues();
+            if ( name.equals("B") ) return expectedBValues();
+            throw new RuntimeException("FAIL");
+        }
+
+        public List allValues() {
+            List x = new ArrayList();
+            x.addAll(expectedAValues());
+            x.addAll(expectedBValues());
+            return x;
+        }
+
+        public List expectedAValues() {
+            return AValues == null ? Collections.emptyList() : AValues;
+        }
+
+        public List expectedBValues() {
+            return BValues == null ? Collections.emptyList() : BValues;
+        }
+
+        public RefMetaDataTracker makeTracker() {
+            List x = new ArrayList();
+            if ( AValues != null ) x.add(AValues);
+            if ( BValues != null ) x.add(BValues);
+            return new RefMetaDataTracker(x, context);
+        }
+
+        public int nBoundTracks() {
+            int n = 0;
+            if ( AValues != null ) n++;
+            if ( BValues != null ) n++;
+            return n;
+        }
+    }
+
+    private final TableFeature makeSpan(int start, int stop) {
+        return new TableFeature(genomeLocParser.createGenomeLoc("chr1", start, stop),
+                Collections.emptyList(), Collections.emptyList());
+    }
+
+    @DataProvider(name = "tests")
+    public Object[][] createTests() {
+        new MyTest(null, null);
+        new MyTest(Arrays.asList(AC_SNP), null);
+        new MyTest(Arrays.asList(AC_SNP, AT_SNP), null);
+        new MyTest(Arrays.asList(AC_SNP), Arrays.asList(AG_SNP));
+        new MyTest(Arrays.asList(AC_SNP, AT_SNP), Arrays.asList(AG_SNP));
+        new MyTest(Arrays.asList(AC_SNP, AT_SNP), Arrays.asList(span10_10));
+        new MyTest(Arrays.asList(AC_SNP, AT_SNP), Arrays.asList(span10_10, span10_20));
+        new MyTest(Arrays.asList(AC_SNP, AT_SNP), Arrays.asList(span10_10, span10_20, span1_20));
+
+        // for requires starts
+        new MyTest(Arrays.asList(span1_20), null);
+        new MyTest(Arrays.asList(span10_10, span10_20), null);
+        new MyTest(Arrays.asList(span10_10, span10_20, span1_20), null);
+
+        return MyTest.getTests(MyTest.class);
+    }
+
+    @Test(enabled = true, dataProvider = "tests")
+    public void testRawBindings(MyTest test) {
+        logger.warn("Testing " + test + " for number of bound tracks");
+        RefMetaDataTracker tracker = test.makeTracker();
+        Assert.assertEquals(tracker.getNTracksWithBoundFeatures(), test.nBoundTracks());
+
+        testSimpleBindings("A", tracker, test.AValues);
+        testSimpleBindings("B", tracker, test.BValues);
+    }
+
+    private  void testSimpleBindings(String name, RefMetaDataTracker tracker, RODRecordList expected) {
+        List asValues = tracker.getValues(Feature.class, name);
+
+        Assert.assertEquals(tracker.hasValues(name), expected != null);
+        Assert.assertEquals(asValues.size(), expected == null ? 0 : expected.size());
+
+        if ( expected != null ) {
+            for ( GATKFeature e : expected ) {
+                boolean foundValue = false;
+                for ( Feature f : asValues ) {
+                    if ( e.getUnderlyingObject() == f ) foundValue = true;
+                }
+                Assert.assertTrue(foundValue, "Never found expected value of " + e.getUnderlyingObject() + " bound to " + name + " in " + tracker);
+            }
+        }
+    }
+
+    @Test(enabled = true, dataProvider = "tests")
+    public void testGettersAsString(MyTest test) {
+        logger.warn("Testing " + test + " for get() methods");
+        RefMetaDataTracker tracker = test.makeTracker();
+
+        for ( String name : Arrays.asList("A+B", "A", "B") ) {
+            List v1 = name.equals("A+B") ? tracker.getValues(Feature.class) : tracker.getValues(Feature.class, name);
+            testGetter(name, v1, test.expected(name), true, tracker);
+
+            List v2 = name.equals("A+B") ? tracker.getValues(Feature.class, locus) : tracker.getValues(Feature.class, name, locus);
+            testGetter(name, v2, startingHere(test.expected(name)), true, tracker);
+
+            Feature v3 = name.equals("A+B") ? tracker.getFirstValue(Feature.class) : tracker.getFirstValue(Feature.class, name);
+            testGetter(name, Arrays.asList(v3), test.expected(name), false, tracker);
+
+            Feature v4 = name.equals("A+B") ? tracker.getFirstValue(Feature.class, locus) : tracker.getFirstValue(Feature.class, name, locus);
+            testGetter(name, Arrays.asList(v4), startingHere(test.expected(name)), false, tracker);
+        }
+    }
+
+    @Test(enabled = true, dataProvider = "tests")
+    public void testGettersAsRodBindings(MyTest test) {
+        logger.warn("Testing " + test + " for get() methods as RodBindings");
+        RefMetaDataTracker tracker = test.makeTracker();
+
+        for ( String nameAsString : Arrays.asList("A", "B") ) {
+            RodBinding binding = new RodBinding(Feature.class, nameAsString, "none", "vcf", new Tags());
+            List v1 = tracker.getValues(binding);
+            testGetter(nameAsString, v1, test.expected(nameAsString), true, tracker);
+
+            List v2 = tracker.getValues(binding, locus);
+            testGetter(nameAsString, v2, startingHere(test.expected(nameAsString)), true, tracker);
+
+            Feature v3 = tracker.getFirstValue(binding);
+            testGetter(nameAsString, Arrays.asList(v3), test.expected(nameAsString), false, tracker);
+
+            Feature v4 = tracker.getFirstValue(binding, locus);
+            testGetter(nameAsString, Arrays.asList(v4), startingHere(test.expected(nameAsString)), false, tracker);
+        }
+    }
+
+    @Test(enabled = true, dataProvider = "tests")
+    public void testGettersAsListOfRodBindings(MyTest test) {
+        logger.warn("Testing " + test + " for get() methods for List");
+        RefMetaDataTracker tracker = test.makeTracker();
+
+        String nameAsString = "A+B";
+        RodBinding A = new RodBinding(Feature.class, "A", "none", "vcf", new Tags());
+        RodBinding B = new RodBinding(Feature.class, "B", "none", "vcf", new Tags());
+        List> binding = Arrays.asList(A, B);
+
+        List v1 = tracker.getValues(binding);
+        testGetter(nameAsString, v1, test.expected(nameAsString), true, tracker);
+
+        List v2 = tracker.getValues(binding, locus);
+        testGetter(nameAsString, v2, startingHere(test.expected(nameAsString)), true, tracker);
+
+        Feature v3 = tracker.getFirstValue(binding);
+        testGetter(nameAsString, Arrays.asList(v3), test.expected(nameAsString), false, tracker);
+
+        Feature v4 = tracker.getFirstValue(binding, locus);
+        testGetter(nameAsString, Arrays.asList(v4), startingHere(test.expected(nameAsString)), false, tracker);
+    }
+
+    private List startingHere(List l) {
+        List x = new ArrayList();
+        for ( GATKFeature f : l ) if ( f.getStart() == locus.getStart() ) x.add(f);
+        return x;
+    }
+
+    private void testGetter(String name, List got, List expected, boolean requireExact, RefMetaDataTracker tracker) {
+        if ( got.size() == 1 && got.get(0) == null )
+            got = Collections.emptyList();
+
+        if ( requireExact )
+            Assert.assertEquals(got.size(), expected.size());
+
+        boolean foundAny = false;
+        for ( GATKFeature e : expected ) {
+            boolean found1 = false;
+            for ( Feature got1 : got ) {
+                if ( e.getUnderlyingObject() == got1 )
+                    found1 = true;
+            }
+            if ( requireExact )
+                Assert.assertTrue(found1, "Never found expected GATKFeature " + e + " bound to " + name + " in " + tracker);
+            foundAny = found1 || foundAny;
+        }
+
+        if ( ! requireExact && ! expected.isEmpty() )
+            Assert.assertTrue(foundAny, "Never found any got values matching one of the expected values bound to " + name + " in " + tracker);
+    }
+}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java
deleted file mode 100644
index fa20ea913..000000000
--- a/public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package org.broadinstitute.sting.gatk.refdata;
-
-import org.testng.Assert;
-import org.broadinstitute.sting.BaseTest;
-
-import org.testng.annotations.Test;
-
-import java.util.ArrayList;
-import java.util.List;
-
-
-/**
- * 
- * @author aaron 
- * 
- * Class ReferenceOrderedDataUnitTest
- *
- * some functionality to test parts of the reference ordered data system that I've added.  This is by NO MEANS
- * a complete test suite, but additions would be extremely welcome
- */
-public class ReferenceOrderedDataUnitTest extends BaseTest {
-    @Test
-    public void extractRodsFromFileTest() {
-        String file = validationDataLocation + "testRODFileImpl.csv";
-        List lst = new ArrayList();
-        ReferenceOrderedData.extractRodsFromFile(lst,file);
-        Assert.assertEquals(lst.size(), 6);
-        int index = 0;
-        for (String entry: lst) {
-            String first = entry.subSequence(0,entry.indexOf(",")).toString();            
-            Assert.assertTrue(first.equals("rod" + String.valueOf(++index)));
-        }
-    }
-    @Test
-    public void extractRodsFromMultiFileTest() {
-        String file = validationDataLocation + "testRODFileImpl.csv";
-        String file2 = validationDataLocation + "testRODFileImpl2.csv";
-        List lst = new ArrayList();
-        ReferenceOrderedData.extractRodsFromFile(lst,file);
-        ReferenceOrderedData.extractRodsFromFile(lst,file2);
-        Assert.assertEquals(lst.size(), 12);
-        int index = 0;
-        for (String entry: lst) {
-            String first = entry.subSequence(0,entry.indexOf(",")).toString();
-            Assert.assertTrue(first.equals("rod" + String.valueOf(++index)));
-        }
-    }
-}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java
new file mode 100644
index 000000000..5d662ffed
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.refdata.tracks;
+
+
+import net.sf.picard.reference.IndexedFastaSequenceFile;
+import org.broad.tribble.Feature;
+import org.broad.tribble.FeatureCodec;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.refdata.features.table.BedTableCodec;
+import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.testng.Assert;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.*;
+import java.util.*;
+
+
+/**
+ * @author depristo
+ *
+ * UnitTests for RMD FeatureManager
+ */
+public class FeatureManagerUnitTest extends BaseTest {
+    private static final File RANDOM_FILE = new File(validationDataLocation + "exampleGATKReport.eval");
+    private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf");
+    private static final File VCF4_FILE = new File(validationDataLocation + "vcf4.1.example.vcf");
+
+    private FeatureManager manager;
+    private GenomeLocParser genomeLocParser;
+
+    @BeforeMethod
+    public void setup() {
+        File referenceFile = new File(b36KGReference);
+        try {
+            IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
+            genomeLocParser = new GenomeLocParser(seq);
+            manager = new FeatureManager();
+        }
+        catch(FileNotFoundException ex) {
+            throw new UserException.CouldNotReadInputFile(referenceFile,ex);
+        }
+    }
+
+    @Test
+    public void testManagerCreation() {
+        Assert.assertTrue(manager.getFeatureDescriptors().size() > 0);
+    }
+
+    private class FMTest extends BaseTest.TestDataProvider {
+        public Class codec;
+        public Class feature;
+        public String name;
+        public File associatedFile;
+
+        private FMTest(final Class feature, final Class codec, final String name, final File file) {
+            super(FMTest.class);
+            this.codec = codec;
+            this.feature = feature;
+            this.name = name;
+            this.associatedFile = file;
+        }
+
+        public void assertExpected(FeatureManager.FeatureDescriptor featureDescriptor) {
+            Assert.assertEquals(featureDescriptor.getCodecClass(), codec);
+            Assert.assertEquals(featureDescriptor.getFeatureClass(), feature);
+            Assert.assertEquals(featureDescriptor.getName().toLowerCase(), name.toLowerCase());
+        }
+
+        public String toString() {
+            return String.format("FMTest name=%s codec=%s feature=%s file=%s", name, codec, feature, associatedFile);
+        }
+    }
+
+    @DataProvider(name = "tests")
+    public Object[][] createTests() {
+        new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE);
+        new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE);
+        new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null);
+        return FMTest.getTests(FMTest.class);
+    }
+
+    @Test(dataProvider = "tests")
+    public void testGetByFile(FMTest params) {
+        if ( params.associatedFile != null ) {
+            FeatureManager.FeatureDescriptor byFile = manager.getByFiletype(params.associatedFile);
+            Assert.assertNotNull(byFile, "Couldn't find any type associated with file " + params.associatedFile);
+            params.assertExpected(byFile);
+        }
+    }
+
+    @Test
+    public void testGetByFileNoMatch() {
+        FeatureManager.FeatureDescriptor byFile = manager.getByFiletype(RANDOM_FILE);
+        Assert.assertNull(byFile, "Found type " + byFile + " associated with RANDOM, non-Tribble file " + RANDOM_FILE);
+    }
+
+    @Test(dataProvider = "tests")
+    public void testGetters(FMTest params) {
+        params.assertExpected(manager.getByCodec(params.codec));
+        params.assertExpected(manager.getByName(params.name));
+        params.assertExpected(manager.getByName(params.name.toLowerCase()));
+        params.assertExpected(manager.getByName(params.name.toUpperCase()));
+
+        Collection descriptors = manager.getByFeature(params.feature);
+        Assert.assertTrue(descriptors.size() > 0, "Look up by FeatureClass failed");
+    }
+
+    @Test
+    public void testUserFriendlyList() {
+        Assert.assertTrue(manager.userFriendlyListOfAvailableFeatures().length() > 0, "Expected at least one codec to be listed");
+        Assert.assertTrue(manager.userFriendlyListOfAvailableFeatures().split(",").length > 0, "Expected at least two codecs, but only saw one");
+    }
+
+    @Test
+    public void testCodecCreation() {
+        FeatureManager.FeatureDescriptor descriptor = manager.getByName("vcf");
+        Assert.assertNotNull(descriptor, "Couldn't find VCF feature descriptor!");
+
+        FeatureCodec c = manager.createCodec(descriptor, "foo", genomeLocParser);
+        Assert.assertNotNull(c, "Couldn't create codec");
+        Assert.assertEquals(c.getClass(), descriptor.getCodecClass());
+        Assert.assertEquals(c.getFeatureType(), descriptor.getFeatureClass());
+    }
+
+}
+
diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java
similarity index 96%
rename from public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java
rename to public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java
index e475e732d..ae218e898 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2010.  The Broad Institute
+ * Copyright (c) 2011, The Broad Institute
+ *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
  * files (the "Software"), to deal in the Software without
@@ -11,7 +12,7 @@
  *
  * The above copyright notice and this permission notice shall be
  * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
@@ -21,13 +22,14 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-package org.broadinstitute.sting.gatk.refdata.tracks.builders;
+package org.broadinstitute.sting.gatk.refdata.tracks;
 
 
 import net.sf.picard.reference.IndexedFastaSequenceFile;
 import net.sf.samtools.SAMSequenceDictionary;
 import org.broad.tribble.Tribble;
 import org.broad.tribble.index.Index;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
 import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -73,8 +75,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
 
     @Test
     public void testBuilder() {
-        Map classes = builder.getAvailableTrackNamesAndTypes();
-        Assert.assertTrue(classes.size() > 0);
+        Assert.assertTrue(builder.getFeatureManager().getFeatureDescriptors().size() > 0);
     }
 
     @Test
diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportParserUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportParserUnitTest.java
deleted file mode 100644
index cfd75c41a..000000000
--- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportParserUnitTest.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.report;
-
-import org.broadinstitute.sting.BaseTest;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-import java.io.File;
-
-public class GATKReportParserUnitTest extends BaseTest {
-    @Test
-    public void testParse() throws Exception {
-        GATKReportParser parser = new GATKReportParser();
-        parser.parse(new File(validationDataLocation + "exampleGATKReport.eval"));
-
-        Assert.assertEquals(parser.getValue("CountVariants", "none.eval.none.all", "nProcessedLoci"), "100000");
-        Assert.assertEquals(parser.getValue("CountVariants", "none.eval.none.all", "nNoCalls"), "99872");
-
-        Assert.assertEquals(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2", "AC"), "2");
-        Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2.bad", "AC"));
-        Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2", "AC.bad"));
-        Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics.bad", "none.eval.none.novel.ac2", "AC"));
-
-        Assert.assertEquals(parser.getValue("ValidationReport", "none.eval.none.known", "sensitivity"), "NaN");
-    }
-}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java
new file mode 100644
index 000000000..02e1ba99a
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.report;
+
+import org.broadinstitute.sting.BaseTest;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class GATKReportUnitTest extends BaseTest {
+    @Test
+    public void testParse() throws Exception {
+        String reportPath = validationDataLocation + "exampleGATKReport.eval";
+        GATKReport report = new GATKReport(reportPath);
+
+        GATKReportTable countVariants = report.getTable("CountVariants");
+        Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1);
+        Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all");
+        Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000");
+        Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872");
+
+        GATKReportTable validationReport = report.getTable("ValidationReport");
+        Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1);
+        Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known");
+        Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN");
+
+        GATKReportTable simpleMetricsByAC = report.getTable("SimpleMetricsByAC.metrics");
+        Assert.assertEquals(simpleMetricsByAC.getVersion(), GATKReportVersion.V0_1);
+        Object simpleMetricsByACPK = simpleMetricsByAC.getPrimaryKey("none.eval.none.novel.ac2");
+        Assert.assertEquals(simpleMetricsByAC.get(simpleMetricsByACPK, "AC"), "2");
+
+        Assert.assertFalse(simpleMetricsByAC.containsPrimaryKey("none.eval.none.novel.ac2.bad"));
+    }
+}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
index e6300e6c9..af29bd01f 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@@ -14,7 +14,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testHasAnnotsNotAsking1() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c"));
         executeTest("test file has annotations, not asking for annotations, #1", spec);
     }
@@ -22,7 +22,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testHasAnnotsNotAsking2() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
+                baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
                 Arrays.asList("964f1016ec9a3c55333f62dd834c14d6"));
         executeTest("test file has annotations, not asking for annotations, #2", spec);
     }
@@ -30,7 +30,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testHasAnnotsAsking1() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("8e7de435105499cd71ffc099e268a83e"));
         executeTest("test file has annotations, asking for annotations, #1", spec);
     }
@@ -38,7 +38,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testHasAnnotsAsking2() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
+                baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
                 Arrays.asList("64b6804cb1e27826e3a47089349be581"));
         executeTest("test file has annotations, asking for annotations, #2", spec);
     }
@@ -46,7 +46,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testNoAnnotsNotAsking1() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("42ccee09fa9f8c58f4a0d4f1139c094f"));
         executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
     }
@@ -54,7 +54,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testNoAnnotsNotAsking2() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
+                baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
                 Arrays.asList("f2ddfa8105c290b1f34b7a261a02a1ac"));
         executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
     }
@@ -62,7 +62,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testNoAnnotsAsking1() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49"));
         executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
     }
@@ -70,7 +70,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testNoAnnotsAsking2() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
+                baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
                 Arrays.asList("09f8e840770a9411ff77508e0ed0837f"));
         executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
     }
@@ -78,7 +78,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testOverwritingHeader() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
+                baseTestString() + " -G \"Standard\" --variant:VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
                 Arrays.asList("78d2c19f8107d865970dbaf3e12edd92"));
         executeTest("test overwriting header", spec);
     }
@@ -86,7 +86,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testNoReads() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
+                baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
                 Arrays.asList("16e3a1403fc376320d7c69492cad9345"));
         executeTest("not passing it any reads", spec);
     }
@@ -94,7 +94,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testDBTagWithDbsnp() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
+                baseTestString() + " -B:dbsnp,vcf " + b36dbSNP129 + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
                 Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d"));
         executeTest("getting DB tag with dbSNP", spec);
     }
@@ -102,7 +102,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testDBTagWithHapMap() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
+                baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
                 Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688"));
         executeTest("getting DB tag with HM3", spec);
     }
@@ -110,7 +110,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     @Test
     public void testUsingExpression() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1,
+                baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1,
                 Arrays.asList("e9c0d832dc6b4ed06c955060f830c140"));
         executeTest("using expression", spec);
     }
@@ -120,9 +120,21 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
         final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf";
         for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) {
             WalkerTestSpec spec = new WalkerTestSpec(
-                    baseTestString() + " -A HomopolymerRun -B:variant,VCF " + validationDataLocation + "/" + file + " -BTI variant -NO_HEADER", 1,
+                    baseTestString() + " -A HomopolymerRun --variant:VCF " + validationDataLocation + "/" + file + " -BTI variant -NO_HEADER", 1,
                     Arrays.asList(MD5));
             executeTest("Testing lookup vcf tabix vs. vcf tribble", spec);
         }
     }
+
+    @Test
+    public void testSnpEffAnnotations() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+            "-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variant " +
+            validationDataLocation + "1000G.exomes.vcf --snpEffFile  " + validationDataLocation +
+            "snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000",
+            1,
+            Arrays.asList("c08648a078368c80530bff004b3157f1")
+        );
+        executeTest("Testing SnpEff annotations", spec);
+    }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java
deleted file mode 100755
index c75a5b2dc..000000000
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator;
-
-
-import java.util.Arrays;
-
-import org.broadinstitute.sting.WalkerTest;
-import org.testng.annotations.Test;
-
-public class GenomicAnnotatorIntegrationTest extends WalkerTest {
-    String testFileWithIndels = validationDataLocation + "/GenomicAnnotatorValidation/1KGBroadWEx.cleaned.indels.vcf";
-    String testFileWithSNPsAndIndels = validationDataLocation + "/GenomicAnnotatorValidation/1KGBroadWEx.variants.vcf";
-
-    @Test
-    public void testGenomicAnnotatorOnDbSNP() {
-
-        /*
-        TODO put this test back in once it gets faster.
-        String[] md5 = {"d19d6d1eb52fb09e7493653dc645d92a"};
-        WalkerTestSpec spec = new WalkerTestSpec(
-                "-T GenomicAnnotator -R " + b36KGReference + " " +
-                "-B:variant,vcf /humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf " +
-                "-B:dbsnp,AnnotatorInputTable /humgen/gsa-hpprojects/GATK/data/Annotations/dbsnp/b130/snp130-b36-only-the-SNPs.txt " +
-                "-m " + //generate many records from one input record if necessary
-                "-o %s " +
-                "-BTI variant",
-                 1,
-                 Arrays.asList(md5));
-        executeTest("test with dbSNP", spec);
-        */
-
-
-        String[] md5WithDashSArg = {"efba4ce1641cfa2ef88a64395f2ebce8"};
-        WalkerTestSpec specWithSArg = new WalkerTestSpec(
-                "-T GenomicAnnotator -R " + b36KGReference +
-                " -B:variant,vcf3 /humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf" +
-                " -B:dbsnp,AnnotatorInputTable /humgen/gsa-hpprojects/GATK/data/Annotations/dbsnp/b130/snp130-b36-only-the-SNPs.txt" +
-                " -m" + //generate many records from one input record if necessary
-                " -o %s" +
-                " -BTI variant" +
-                " -s dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet" +
-                " -NO_HEADER",
-                 1,
-                 Arrays.asList(md5WithDashSArg));
-        executeTest("test with dbSNP and -s arg", specWithSArg);
-
-    }
-
-    @Test
-    public void testGenomicAnnotatorOnIndels() {
-        WalkerTestSpec testOnIndels = new WalkerTestSpec(
-                buildCommandLine(
-                        "-T GenomicAnnotator",
-                        "-R " + b37KGReference,
-                        "-L 22:10000000-20000000",
-                        "-B:refseq,AnnotatorInputTable " + b37Refseq,
-                        "-B:variant,VCF " + testFileWithIndels,
-                        "-NO_HEADER",
-                        "-o %s"
-                ),
-                1,
-                Arrays.asList("772fc3f43b70770ec6c6acbb8bbbd4c0")
-        );
-        executeTest("testGenomicAnnotatorOnIndels", testOnIndels);
-    }
-
-    @Test
-    public void testGenomicAnnotatorOnSNPsAndIndels() {
-        WalkerTestSpec testOnSNPsAndIndels = new WalkerTestSpec(
-                buildCommandLine(
-                        "-T GenomicAnnotator",
-                        "-R " + b37KGReference,
-                        "-L 22:10000000-20000000",
-                        "-B:refseq,AnnotatorInputTable " + b37Refseq,
-                        "-B:variant,VCF " + testFileWithSNPsAndIndels,
-                        "-NO_HEADER",
-                        "-o %s"
-                ),
-                1,
-                Arrays.asList("081ade7f3d2d3c5f19cb1e8651a626f3")
-        );
-        executeTest("testGenomicAnnotatorOnSNPsAndIndels", testOnSNPsAndIndels);
-    }
-}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java
index fef1b6e64..5f759fdbf 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java
@@ -37,10 +37,10 @@ public class BeagleIntegrationTest extends WalkerTest {
     public void testBeagleOutput() {
         WalkerTestSpec spec = new WalkerTestSpec(
                 "-T BeagleOutputToVCF -R " + hg19Reference + " " +
-                        "-B:variant,VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " +
-                        "-B:beagleR2,BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
-                        "-B:beagleProbs,BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
-                        "-B:beaglePhased,BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
+                        "--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " +
+                        "--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
+                        "--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
+                        "--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
                         "-o %s -NO_HEADER", 1, Arrays.asList("3531451e84208264104040993889aaf4"));
         executeTest("test BeagleOutputToVCF", spec);
     }
@@ -49,7 +49,7 @@ public class BeagleIntegrationTest extends WalkerTest {
     public void testBeagleInput() {
         WalkerTestSpec spec = new WalkerTestSpec(
                 "-T ProduceBeagleInput -R " + hg19Reference + " " +
-                        "-B:variant,VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " +
+                        "--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " +
                          "-o %s", 1, Arrays.asList("a01c704246f3dd1b9c65774007e51e69"));
         executeTest("test BeagleInput", spec);
     }
@@ -57,8 +57,8 @@ public class BeagleIntegrationTest extends WalkerTest {
     @Test
     public void testBeagleInput2() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                "-T ProduceBeagleInput -B:variant,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+
-                        "-B:validation,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+
+                "-T ProduceBeagleInput --variant:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+
+                        "--validation:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+
                         "-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta -NO_HEADER ",2,
                 Arrays.asList("660986891b30cdc937e0f2a3a5743faa","e96ddd51da9f4a797b2aa8c20e404166"));
         executeTest("test BeagleInputWithBootstrap",spec);
@@ -68,10 +68,10 @@ public class BeagleIntegrationTest extends WalkerTest {
     public void testBeagleOutput2() {
         WalkerTestSpec spec = new WalkerTestSpec(
                 "-T BeagleOutputToVCF -R "+hg19Reference+" "+
-                "-B:variant,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.vcf "+
-                "-B:beagleR2,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
-                "-B:beagleProbs,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
-                "-B:beaglePhased,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
+                "--variant:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.vcf "+
+                "--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
+                "--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
+                "--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
                 "-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("8dd6ec53994fb46c5c22af8535d22965"));
 
         executeTest("testBeagleChangesSitesToRef",spec);
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalkerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalkerIntegrationTest.java
index 4a32d6701..1ba7a5e85 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalkerIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalkerIntegrationTest.java
@@ -30,7 +30,7 @@ import org.testng.annotations.Test;
 import java.util.Arrays;
 
 public class CompareCallableLociWalkerIntegrationTest extends WalkerTest {
-    final static String commonArgs = "-R " + hg18Reference + " -T CompareCallableLoci -B:comp1,Bed " + validationDataLocation + "1kg_slx.chr1_10mb.callable.bed -B:comp2,Bed " + validationDataLocation + "ga2_slx.chr1_10mb.callable.bed -o %s";
+    final static String commonArgs = "-R " + hg18Reference + " -T CompareCallableLoci --comp1:Bed " + validationDataLocation + "1kg_slx.chr1_10mb.callable.bed --comp2:Bed " + validationDataLocation + "ga2_slx.chr1_10mb.callable.bed -o %s";
 
     @Test
     public void testCompareCallableLociWalker1() {
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java
index 77159d9c2..f9aaaecc1 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java
@@ -30,8 +30,6 @@ import org.testng.annotations.Test;
 
 import java.io.File;
 import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
 
 public class DiffObjectsIntegrationTest extends WalkerTest {
     private class TestParams extends TestDataProvider {
@@ -52,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
 
     @DataProvider(name = "data")
     public Object[][] createData() {
-        new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "4d9f4636de05b93c354d05011264546e");
-        new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "37e6efd833b5cd6d860a9df3df9713fc");
+        new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "92311de76dda3f38aac289d807ef23d0");
+        new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "0c69412c385fda50210f2a612e1ffe4a");
         return TestParams.getTests(TestParams.class);
     }
 
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java
index 0c034eba9..9af39e92c 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java
@@ -24,15 +24,15 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
         executeTest("testFastaReference", spec1b);
 
         WalkerTestSpec spec2 = new WalkerTestSpec(
-                "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -B:snpmask,dbsnp " + GATKDataLocation + "dbsnp_129_b36.rod -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s",
+                "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s",
                  1,
-                 Arrays.asList("3a48986c3832a768b478c3e95f994b0f"));
+                 Arrays.asList("0567b32ebdc26604ddf2a390de4579ac"));
         executeTest("testFastaAlternateReferenceIndels", spec2);
 
         WalkerTestSpec spec3 = new WalkerTestSpec(
-                "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:snps,GeliText " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.geli.calls -B:snpmask,dbsnp " + GATKDataLocation + "dbsnp_129_b36.rod -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s",
+                "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + GATKDataLocation + "dbsnp_129_b36.vcf -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s",
                  1,
-                 Arrays.asList("82705a88f6fc25880dd2331183531d9a"));
+                 Arrays.asList("8b6cd2e20c381f9819aab2d270f5e641"));
         executeTest("testFastaAlternateReferenceSnps", spec3);
     }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java
index 7bec67d2e..1cb43ceb1 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java
@@ -15,7 +15,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
     @Test
     public void testNoAction() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c"));
         executeTest("test no action", spec);
     }
@@ -23,7 +23,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
     @Test
     public void testClusteredSnps() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -window 10 -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " -window 10 --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("27b13f179bb4920615dff3a32730d845"));
         executeTest("test clustered SNPs", spec);
     }
@@ -31,17 +31,17 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
     @Test
     public void testMasks() {
         WalkerTestSpec spec1 = new WalkerTestSpec(
-                baseTestString() + " -mask foo -B:mask,VCF3 " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " -maskName foo --mask:VCF3 " + validationDataLocation + "vcfexample2.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("578f9e774784c25871678e6464fd212b"));
         executeTest("test mask all", spec1);
 
         WalkerTestSpec spec2 = new WalkerTestSpec(
-                baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " -maskName foo --mask:VCF " + validationDataLocation + "vcfMask.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("bfa86a674aefca1b13d341cb14ab3c4f"));
         executeTest("test mask some", spec2);
 
         WalkerTestSpec spec3 = new WalkerTestSpec(
-                baseTestString() + " -mask foo -maskExtend 10 -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + validationDataLocation + "vcfMask.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("5939f80d14b32d88587373532d7b90e5"));
         executeTest("test mask extend", spec3);
     }
@@ -49,7 +49,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
     @Test
     public void testFilter1() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("45219dbcfb6f81bba2ea0c35f5bfd368"));
         executeTest("test filter #1", spec);
     }
@@ -57,7 +57,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
     @Test
     public void testFilter2() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("c95845e817da7352b9b72bc9794f18fb"));
         executeTest("test filter #2", spec);
     }
@@ -65,7 +65,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
     @Test
     public void testFilterWithSeparateNames() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("b8cdd7f44ff1a395e0a9b06a87e1e530"));
         executeTest("test filter with separate names #2", spec);
     }
@@ -73,12 +73,12 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
     @Test
     public void testGenotypeFilters() {
         WalkerTestSpec spec1 = new WalkerTestSpec(
-                baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("96b61e4543a73fe725e433f007260039"));
         executeTest("test genotype filter #1", spec1);
 
         WalkerTestSpec spec2 = new WalkerTestSpec(
-                baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
+                baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
                 Arrays.asList("6c8112ab17ce39c8022c891ae73bf38e"));
         executeTest("test genotype filter #2", spec2);
     }
@@ -86,7 +86,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
     @Test
     public void testDeletions() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo -B:variant,VCF " + validationDataLocation + "twoDeletions.vcf", 1,
+                baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variant:VCF " + validationDataLocation + "twoDeletions.vcf", 1,
                 Arrays.asList("569546fd798afa0e65c5b61b440d07ac"));
         executeTest("test deletions", spec);
     }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index 1f23d262e..88c5116b1 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMultiSamplePilot1() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
-                Arrays.asList("c97829259463d04b0159591bb6fb44af"));
+                Arrays.asList("16b0c7b47745abcd1ddaa2e261719530"));
         executeTest("test MultiSample Pilot1", spec);
     }
 
@@ -54,12 +54,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testWithAllelesPassedIn() {
         WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                 baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
-                Arrays.asList("2b69667f4770e8c0c894066b7f27e440"));
+                Arrays.asList("811ddc0bd8322b14f14f58df8c627aa9"));
         executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
 
         WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
                 baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
-                Arrays.asList("b77fe007c2a97fcd59dfd5eef94d8b95"));
+                Arrays.asList("5cf08dd7ac3d218082f7be3915ce0b15"));
         executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
     }
 
@@ -67,7 +67,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testSingleSamplePilot2() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
-                Arrays.asList("ee8a5e63ddd470726a749e69c0c20f60"));
+                Arrays.asList("75156264696563c2f47620fef9424f7c"));
         executeTest("test SingleSample Pilot2", spec);
     }
 
@@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     //
     // --------------------------------------------------------------------------------------------------------------
 
-    private final static String COMPRESSED_OUTPUT_MD5 = "ef31654a2b85b9b2d3bba4f4a75a17b6";
+    private final static String COMPRESSED_OUTPUT_MD5 = "7255e03430549cb97d8fcae34cbffb02";
 
     @Test
     public void testCompressedOutput() {
@@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
 
         // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
 
-        String md5 = "46868a9c4134651c54535fb46b408aee";
+        String md5 = "7912109e83fda21dae90ef8d5dd0140d";
 
         WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                 baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@@ -138,9 +138,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     @Test
     public void testCallingParameters() {
         HashMap e = new HashMap();
-        e.put( "--min_base_quality_score 26", "5043c9a101e691602eb7a3f9704bdf20" );
-        e.put( "--min_mapping_quality_score 26", "71a833eb8fd93ee62ae0d5a430f27940" );
-        e.put( "--p_nonref_model GRID_SEARCH", "ddf443e9dcadef367476b26b4d52c134" );
+        e.put( "--min_base_quality_score 26", "6d3aa9f783ca63f37c952f83eeda593c" );
+        e.put( "--min_mapping_quality_score 26", "51bfdf777123bf49de5d92ffde5c74e7" );
+        e.put( "--p_nonref_model GRID_SEARCH", "333328ab2c8da2875fade599e80a271f" );
+        e.put( "--computeSLOD", "226caa28a4fa9fe34f3beb8a23f3d53d" );
 
         for ( Map.Entry entry : e.entrySet() ) {
             WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@@ -153,9 +154,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     @Test
     public void testOutputParameter() {
         HashMap e = new HashMap();
-        e.put( "-sites_only", "eaad6ceb71ab94290650a70bea5ab951" );
-        e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "05bf7db8a3d19ef4a3d14772c90b732f" );
-        e.put( "--output_mode EMIT_ALL_SITES", "e4b86740468d7369f0156550855586c7" );
+        e.put( "-sites_only", "5f659dee408710d3709ed72005cd863a" );
+        e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "55d09bf13149bddc06cc36be0801507b" );
+        e.put( "--output_mode EMIT_ALL_SITES", "727f49dcb2439b18446829efc3b1561c" );
 
         for ( Map.Entry entry : e.entrySet() ) {
             WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@@ -169,12 +170,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testConfidence() {
         WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                 baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
-                Arrays.asList("71a833eb8fd93ee62ae0d5a430f27940"));
+                Arrays.asList("51bfdf777123bf49de5d92ffde5c74e7"));
         executeTest("test confidence 1", spec1);
 
         WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
                 baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
-                Arrays.asList("79968844dc3ddecb97748c1acf2984c7"));
+                Arrays.asList("c67c285e70fd4457c9f9ce7bd878ddca"));
         executeTest("test confidence 2", spec2);
     }
 
@@ -186,8 +187,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     @Test
     public void testHeterozyosity() {
         HashMap e = new HashMap();
-        e.put( 0.01, "4e878664f61d2d800146d3762303fde1" );
-        e.put( 1.0 / 1850, "9204caec095ff5e63ca21a10b6fab453" );
+        e.put( 0.01, "7ecc564d4db97d5932cef2e558550ed2" );
+        e.put( 1.0 / 1850, "aa9e101bb9f9e111fe292fec467d915a" );
 
         for ( Map.Entry entry : e.entrySet() ) {
             WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@@ -211,7 +212,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                         " -o %s" +
                         " -L 1:10,000,000-10,100,000",
                 1,
-                Arrays.asList("1a58ec52df545f946f80cc16c5736a91"));
+                Arrays.asList("2efd686186b2c5129be4cf89274a24dd"));
 
         executeTest(String.format("test multiple technologies"), spec);
     }
@@ -230,25 +231,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                         " -L 1:10,000,000-10,100,000" +
                         " -baq CALCULATE_AS_NECESSARY",
                 1,
-                Arrays.asList("62d0f6d9de344ce68ce121c13b1e78b1"));
+                Arrays.asList("2892d35331fe9fc141ba19269ec7caed"));
 
         executeTest(String.format("test calling with BAQ"), spec);
     }
 
-    @Test
-    public void testCallingWithBAQOff() {
-        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
-                baseCommand +
-                        " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" +
-                        " -o %s" +
-                        " -L 1:10,000,000-10,100,000" +
-                        " -baq OFF",
-                1,
-                Arrays.asList("1a58ec52df545f946f80cc16c5736a91"));
-
-        executeTest(String.format("test calling with BAQ OFF"), spec);
-    }
-
     // --------------------------------------------------------------------------------------------------------------
     //
     // testing indel caller
@@ -263,7 +250,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                         " -o %s" +
                         " -L 1:10,000,000-10,500,000",
                 1,
-                Arrays.asList("631ae1f1eb6bc4c1a4136b8495250536"));
+                Arrays.asList("8c2afb4289ed44521933d1a74c8d6c7f"));
 
         executeTest(String.format("test indel caller in SLX"), spec);
     }
@@ -278,7 +265,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                         " -minIndelCnt 1" +
                         " -L 1:10,000,000-10,100,000",
                 1,
-                Arrays.asList("fd556585c79e2b892a5976668f45aa43"));
+                Arrays.asList("b6fb70590a10e1c27fb611732916f27d"));
 
         executeTest(String.format("test indel caller in SLX witn low min allele count"), spec);
     }
@@ -291,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                          " -o %s" +
                          " -L 1:10,000,000-10,500,000",
                  1,
-                 Arrays.asList("9cd56feedd2787919e571383889fde70"));
+                 Arrays.asList("61642502bd08cc03cdaaeb83a5426b46"));
 
          executeTest(String.format("test indel calling, multiple technologies"), spec);
      }
@@ -301,14 +288,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
         WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                 baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
                         "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
-                Arrays.asList("315e1b78d7a403d7fcbcf0caa8c496b8"));
+                Arrays.asList("69b0b3f089c80b9864294d838a061336"));
         executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
 
         WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
                 baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf "
                         + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
                         "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
-                Arrays.asList("cf89e0c54f14482a23c105b73a333d8a"));
+                Arrays.asList("c90174cfd7dd68bdef36fe2c60145e10"));
         executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
     }
 
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java
index 866c27f8d..fb7e84d22 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java
@@ -15,7 +15,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest {
                         " -glm BOTH" +
                         " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" +
                         " -L chr1:1-50,000,000" +
-                        " -D " + GATKDataLocation + "dbsnp_129_hg18.rod" +
+                        " --dbsnp:VCF " + b36dbSNP129 +
                         " -o /dev/null",
                 0,
                 new ArrayList(0));
@@ -30,7 +30,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest {
                         " -glm BOTH" +
                         " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" +
                         " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" +
-                        " -D " + GATKDataLocation + "dbsnp_129_hg18.rod" +
+                        " --dbsnp:vcf " + b36dbSNP129 +
                         " -o /dev/null",
                 0,
                 new ArrayList(0));
@@ -46,7 +46,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest {
                         " -glm BOTH" +
                         " -L chr1:1-50,000,000" +
                         " -nt 10" +
-                        " -D " + GATKDataLocation + "dbsnp_129_hg18.rod" +
+                        " --dbsnp:vcf " + b36dbSNP129 +
                         " -o /dev/null",
                 0,
                 new ArrayList(0));
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java
index 2676f7067..0ff6fc244 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java
@@ -1,7 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.indels;
 
 import org.broadinstitute.sting.WalkerTest;
-import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.testng.annotations.Test;
 
 import java.util.Arrays;
@@ -28,47 +27,28 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
         executeTest("test realigner defaults", spec1);
 
         WalkerTestSpec spec2 = new WalkerTestSpec(
-                baseCommand + "-B:indels,vcf " + knownIndels,
+                baseCommand + "-known " + knownIndels,
                 1,
                 Arrays.asList(base_md5_with_SW_or_VCF));
         executeTest("test realigner defaults with VCF", spec2);
-
-        WalkerTestSpec spec3 = new WalkerTestSpec(
-                baseCommand + "-D " + GATKDataLocation + "dbsnp_129_b36.rod",
-                1,
-                Arrays.asList(base_md5));
-        executeTest("realigner defaults with dbsnp", spec3);
-
     }
 
     @Test
     public void testKnownsOnly() {
         WalkerTestSpec spec1 = new WalkerTestSpec(
-                baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -B:indels,vcf " + knownIndels,
+                baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -known " + knownIndels,
                 1,
                 Arrays.asList("3dd5d2c9931b375455af0bff1a2c4888"));
         executeTest("realigner known indels only from VCF", spec1);
-
-        WalkerTestSpec spec2 = new WalkerTestSpec(
-                baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -D " + GATKDataLocation + "dbsnp_129_b36.rod",
-                1,
-                Arrays.asList("05a114623c126b0398fbc1703437461e"));
-        executeTest("realigner known indels only from dbsnp", spec2);
     }
 
     @Test
     public void testUseSW() {
         WalkerTestSpec spec1 = new WalkerTestSpec(
-                baseCommand + "--consensusDeterminationModel USE_SW -B:indels,vcf " + knownIndels,
+                baseCommand + "--consensusDeterminationModel USE_SW -known " + knownIndels,
                 1,
                 Arrays.asList(base_md5_with_SW_or_VCF));
         executeTest("realigner use SW from VCF", spec1);
-
-        WalkerTestSpec spec2 = new WalkerTestSpec(
-                baseCommand + "--consensusDeterminationModel USE_SW -D " + GATKDataLocation + "dbsnp_129_b36.rod",
-                1,
-                Arrays.asList(base_md5_with_SW_or_VCF));
-        executeTest("realigner use SW from dbsnp", spec2);
     }
 
     @Test
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java
index fd5ad0b22..77675b0f4 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java
@@ -30,7 +30,7 @@ public class IndelRealignerPerformanceTest extends WalkerTest {
                         " -LOD 5" +
                         " -maxConsensuses 100" +
                         " -greedy 100" +
-                        " -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod" +
+                        " -known " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
                         " -o /dev/null" +
                         " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" +
                         " -L chr1:1-5,650,000" +
@@ -45,7 +45,7 @@ public class IndelRealignerPerformanceTest extends WalkerTest {
                         " -LOD 5" +
                         " -maxConsensuses 100" +
                         " -greedy 100" +
-                        " -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod" +
+                        " -known " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
                         " -o /dev/null" +
                         " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" +
                         " -L chr1:1-150,000,000" +
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java
index 4b225aaea..1873ccbe2 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java
@@ -11,19 +11,19 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
     public void testIntervals() {
 
         WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
-                "-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s",
+                "-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s",
                  1,
                  Arrays.asList("e7accfa58415d6da80383953b1a3a986"));
         executeTest("test standard", spec1);
 
         WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
-                "-T RealignerTargetCreator -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_b36.rod -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s",
+                "-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s",
                  1,
-                 Arrays.asList("f23ba17ee0f9573dd307708175d90cd2"));
+                 Arrays.asList("0367d39a122c8ac0899fb868a82ef728"));
         executeTest("test dbsnp", spec2);
 
         WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
-                "-T RealignerTargetCreator -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI indels -o %s",
+                "-T RealignerTargetCreator -R " + b36KGReference + " --known " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI known -o %s",
                  1,
                  Arrays.asList("5206cee6c01b299417bf2feeb8b3dc96"));
         executeTest("test rods only", spec3);
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorPerformanceTest.java
index 0b6694fd9..cc37cc191 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorPerformanceTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorPerformanceTest.java
@@ -12,7 +12,7 @@ public class RealignerTargetCreatorPerformanceTest extends WalkerTest {
         WalkerTestSpec spec1 = new WalkerTestSpec(
                 "-R " + hg18Reference +
                         " -T RealignerTargetCreator" +
-                        " -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod" +
+                        " --known " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
                         " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" +
                         " -L chr1:1-50,000,000" +
                         " -o /dev/null",
@@ -23,7 +23,7 @@ public class RealignerTargetCreatorPerformanceTest extends WalkerTest {
         WalkerTestSpec spec2 = new WalkerTestSpec(
                 "-R " + hg18Reference +
                         " -T RealignerTargetCreator" +
-                        " -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod" +
+                        " --known " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
                         " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" +
                         " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" +
                         " -o /dev/null",
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java
index 21435dd7d..cf6b4e581 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java
@@ -16,8 +16,8 @@ public class MergeAndMatchHaplotypesIntegrationTest extends WalkerTest {
                 buildCommandLine(
                         "-T MergeAndMatchHaplotypes",
                         "-R " + b37KGReference,
-                        "-B:pbt,VCF " + fundamentalTestPBTVCF,
-                        "-B:rbp,VCF " + fundamentalTestRBPVCF,
+                        "--pbt " + fundamentalTestPBTVCF,
+                        "--rbp " + fundamentalTestRBPVCF,
                         "-o %s"
                 ),
                 1,
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java
index 69f98b700..c663c1dd7 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java
@@ -14,13 +14,14 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
         WalkerTestSpec spec = new WalkerTestSpec(
                 buildCommandLine(
                         "-T PhaseByTransmission",
+                        "-NO_HEADER",
                         "-R " + b37KGReference,
-                        "-B:variant,VCF " + fundamentalTestVCF,
+                        "--variant " + fundamentalTestVCF,
                         "-f NA12892+NA12891=NA12878",
                         "-o %s"
                 ),
                 1,
-                Arrays.asList("ff02b1583ee3a12ed66a9c0e08e346b2")
+                Arrays.asList("")
         );
         executeTest("testBasicFunctionality", spec);
     }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java
index 1bf3e579f..e1d22f107 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java
@@ -11,7 +11,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
         return "-T ReadBackedPhasing" +
                 " -R " + reference +
                 " -I " + validationDataLocation + reads +
-                " -B:variant,VCF " + validationDataLocation + VCF +
+                " --variant " + validationDataLocation + VCF +
                 " --cacheWindowSize " + cacheWindowSize +
                 " --maxPhaseSites " + maxPhaseSites +
                 " --phaseQualityThresh " + phaseQualityThresh +
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java
index fc4e5ac66..4be848164 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java
@@ -56,7 +56,7 @@ public class DictionaryConsistencyIntegrationTest extends WalkerTest {
     }
 
     private WalkerTest.WalkerTestSpec testVCF(String ref, String vcf, Class c) {
-        return new WalkerTest.WalkerTestSpec("-T VariantsToTable -M 10 -B:two,vcf "
+        return new WalkerTest.WalkerTestSpec("-T VariantsToTable -M 10 --variant:vcf "
                 + vcf + " -F POS,CHROM -R "
                 + ref +  " -o %s",
                 1, c);
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupIntegrationTest.java
index c5cdf9f02..ad190fae6 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupIntegrationTest.java
@@ -18,7 +18,7 @@ public class ValidatingPileupIntegrationTest extends WalkerTest {
                 "-T ValidatingPileup" +
                 " -I " + validationDataLocation + "MV1994.selected.bam" +
                 " -R " + validationDataLocation + "Escherichia_coli_K12_MG1655.fasta" +
-                " -B:pileup,SAMPileup "+ validationDataLocation + "MV1994.selected.pileup" +
+                " --pileup:SAMPileup "+ validationDataLocation + "MV1994.selected.pileup" +
                 " -S SILENT -nt 8",0, Collections.emptyList());
         executeTest("testEcoliThreaded",spec);
     }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
index 129161da3..74f803ac6 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
@@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import org.broadinstitute.sting.WalkerTest;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
 import java.util.HashMap;
@@ -12,78 +13,115 @@ import java.io.File;
 
 public class RecalibrationWalkersIntegrationTest extends WalkerTest {
     static HashMap paramsFiles = new HashMap();
-    static HashMap paramsFilesNoReadGroupTest = new HashMap();
     static HashMap paramsFilesSolidIndels = new HashMap();
 
-    @Test
-    public void testCountCovariates1() {
-        HashMap e = new HashMap();
-        e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7b5832d4b2a23b8ef2bb639eb59bfa88" );
-        e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9c006f8e9fb5752b1c139f5a8cc7ea88");
-        e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "e6f7b4ab9aa291022e0ba8b7dbe4c77e" );
-        e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "e6b98af01c5a08e4954b79ec42db6fc3" );
+    private static final class CCTest extends TestDataProvider {
+        String file, md5;
 
-        for ( String parallelism : Arrays.asList("", " -nt 4")) {
-            for ( Map.Entry entry : e.entrySet() ) {
-                String bam = entry.getKey();
-                String md5 = entry.getValue();
+        private CCTest(final String file, final String md5) {
+            super(CCTest.class);
+            this.file = file;
+            this.md5 = md5;
+        }
 
-                WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
-                        "-R " + b36KGReference +
-                                " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
-                                " -T CountCovariates" +
-                                " -I " + bam +
-                                ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" )
-                                        ? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) +
-                                " -cov ReadGroupCovariate" +
-                                " -cov QualityScoreCovariate" +
-                                " -cov CycleCovariate" +
-                                " -cov DinucCovariate" +
-                                " --solid_recal_mode SET_Q_ZERO" +
-                                " -recalFile %s" + parallelism,
-                        1, // just one output file
-                        Arrays.asList(md5));
-                List result = executeTest("testCountCovariates1" + parallelism, spec).getFirst();
-                paramsFiles.put(bam, result.get(0).getAbsolutePath());
-            }
+        public String toString() {
+            return "CCTest: " + file;
         }
     }
-    
-    @Test
-    public void testTableRecalibrator1() {
-        HashMap e = new HashMap();
-        e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" );
-        e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "6797d7ffa4ef6c48413719ba32696ccf");
-        e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "2bb3374dde131791d7638031ae3b3e10" );
-        e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1f9d8944b73169b367cb83b0d22e5432" );
 
-        for ( Map.Entry entry : e.entrySet() ) {
-            String bam = entry.getKey();
-            String md5 = entry.getValue();
-            String paramsFile = paramsFiles.get(bam);
-            System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
-            if ( paramsFile != null ) {
-                WalkerTestSpec spec = new WalkerTestSpec(
-                        "-R " + b36KGReference +
-                                " -T TableRecalibration" +
-                                " -I " + bam +
-                                ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" )
+    @DataProvider(name = "cctestdata")
+    public Object[][] createCCTestData() {
+        new CCTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "5a52b00d9794d27af723bcf93366681e" );
+        new CCTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "17d4b8001c982a70185e344929cf3941");
+        new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "714e65d6cb51ae32221a77ce84cbbcdc" );
+        new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "64e9f17a1cf6fc04c1f2717c2d2eca67" );
+        return CCTest.getTests(CCTest.class);
+    }
+
+    @Test(dataProvider = "cctestdata")
+    public void testCountCovariates1(CCTest test) {
+        testCC(test, "");
+    }
+
+    @Test(dataProvider = "cctestdata")
+    public void testCountCovariates4(CCTest test) {
+        testCC(test, " -nt 4");
+    }
+
+    private final void testCC(CCTest test, String parallelism) {
+        String bam = test.file;
+        String md5 = test.md5;
+
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                "-R " + b36KGReference +
+                        " -B:dbsnp,vcf " + b36dbSNP129 +
+                        " -T CountCovariates" +
+                        " -I " + bam +
+                        ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" )
+                                ? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) +
+                        " -cov ReadGroupCovariate" +
+                        " -cov QualityScoreCovariate" +
+                        " -cov CycleCovariate" +
+                        " -cov DinucCovariate" +
+                        " --solid_recal_mode SET_Q_ZERO" +
+                        " -recalFile %s" + parallelism,
+                1, // just one output file
+                Arrays.asList(md5));
+        List result = executeTest("testCountCovariates1" + parallelism, spec).getFirst();
+        paramsFiles.put(bam, result.get(0).getAbsolutePath());
+    }
+
+
+    private static final class TRTest extends TestDataProvider {
+        String file, md5;
+
+        private TRTest(final String file, final String md5) {
+            super(TRTest.class);
+            this.file = file;
+            this.md5 = md5;
+        }
+
+        public String toString() {
+            return "TRTest: " + file;
+        }
+    }
+
+    @DataProvider(name = "trtestdata")
+    public Object[][] createTRTestData() {
+        new TRTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "2864f231fab7030377f3c8826796e48f" );
+        new TRTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "c164dd635721ba6df3f06dac1877c32d");
+        new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "74314e5562c1a65547bb0edaacffe602" );
+        new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "2a37c6001826bfabf87063b1dfcf594f" );
+        return TRTest.getTests(TRTest.class);
+    }
+
+    @Test(dataProvider = "trtestdata", dependsOnMethods = "testCountCovariates1")
+    public void testTableRecalibrator1(TRTest test) {
+        String bam = test.file;
+        String md5 = test.md5;
+        String paramsFile = paramsFiles.get(bam);
+        System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
+        if ( paramsFile != null ) {
+            WalkerTestSpec spec = new WalkerTestSpec(
+                    "-R " + b36KGReference +
+                            " -T TableRecalibration" +
+                            " -I " + bam +
+                            ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" )
                                     ? " -L 1:10,800,000-10,810,000" : " -L 1:10,100,000-10,300,000" ) +
-                                " -o %s" +
-                                " --no_pg_tag" +
-                                " --solid_recal_mode SET_Q_ZERO" +
-                                " -recalFile " + paramsFile,
-                        1, // just one output file
-                        Arrays.asList(md5));
-                executeTest("testTableRecalibrator1", spec);
-            }
+                            " -o %s" +
+                            " --no_pg_tag" +
+                            " --solid_recal_mode SET_Q_ZERO" +
+                            " -recalFile " + paramsFile,
+                    1, // just one output file
+                    Arrays.asList(md5));
+            executeTest("testTableRecalibrator1", spec);
         }
     }
 
     @Test
     public void testCountCovariatesUseOriginalQuals() {
         HashMap e = new HashMap();
-        e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "3404965ec4fa99873fe6a44521944fd5");
+        e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "278846c55d97bd9812b758468a83f559");
 
         for ( Map.Entry entry : e.entrySet() ) {
             String bam = entry.getKey();
@@ -97,7 +135,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
                             " -standard" +
                             " -OQ" +
                             " -recalFile %s" +
-                            " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod",
+                            " -B:dbsnp,vcf " + b36dbSNP129,
                     1, // just one output file
                     Arrays.asList(md5));
             executeTest("testCountCovariatesUseOriginalQuals", spec);
@@ -107,7 +145,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
     @Test
     public void testTableRecalibratorMaxQ70() {
         HashMap e = new HashMap();
-        e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" );
+        e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "2864f231fab7030377f3c8826796e48f" );
 
         for ( Map.Entry entry : e.entrySet() ) {
             String bam = entry.getKey();
@@ -136,7 +174,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
     @Test
     public void testCountCovariatesSolidIndelsRemoveRefBias() {
         HashMap e = new HashMap();
-        e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "c9ea5f995e1e2b7a5688533e678dcedc" );
+        e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "8379f24cf5312587a1f92c162ecc220f" );
 
         for ( Map.Entry entry : e.entrySet() ) {
             String bam = entry.getKey();
@@ -144,7 +182,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
 
             WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                     "-R " + b36KGReference +
-                            " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
+                            " -B:dbsnp,vcf " + b36dbSNP129 +
                             " -T CountCovariates" +
                             " -I " + bam +
                             " -standard" +
@@ -162,7 +200,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
     @Test
     public void testTableRecalibratorSolidIndelsRemoveRefBias() {
         HashMap e = new HashMap();
-        e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "993fae4270e7e1e15986f270acf247af" );
+        e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "7d5edb75b176e4151de225f699719ee4" );
 
         for ( Map.Entry entry : e.entrySet() ) {
             String bam = entry.getKey();
@@ -238,7 +276,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
     @Test
     public void testCountCovariatesVCFPlusDBsnp() {
         HashMap e = new HashMap();
-        e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "a3d892bd60d8f679affda3c1e3af96c1");
+        e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "9131d96f39badbf9753653f55b148012");
 
         for ( Map.Entry entry : e.entrySet() ) {
             String bam = entry.getKey();
@@ -249,7 +287,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
                             " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" +
                             " -T CountCovariates" +
                             " -I " + bam +
-                            " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
+                            " -B:dbsnp,vcf " + b36dbSNP129 +
                             " -L 1:10,000,000-10,200,000" +
                             " -cov ReadGroupCovariate" +
                             " -cov QualityScoreCovariate" +
@@ -266,7 +304,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
     @Test
     public void testCountCovariatesNoIndex() {
         HashMap e = new HashMap();
-        e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "284ccac1f8fe485e52c86333cac7c2d4" );
+        e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "8993d32df5cb66c7149f59eccbd57f4c" );
 
         for ( Map.Entry entry : e.entrySet() ) {
             String bam = entry.getKey();
@@ -274,7 +312,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
 
             WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                     "-R " + b36KGReference +
-                            " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
+                            " -B:dbsnp,vcf " + b36dbSNP129 +
                             " -T CountCovariates" +
                             " -I " + bam +
                             " -cov ReadGroupCovariate" +
@@ -292,7 +330,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
     @Test
     public void testTableRecalibratorNoIndex() {
         HashMap e = new HashMap();
-        e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "c167799c2d9cab815d7c9b23337f162e" );
+        e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "5f913c98ca99754902e9d34f99df468f" );
 
         for ( Map.Entry entry : e.entrySet() ) {
             String bam = entry.getKey();
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java
index ade34c964..43ea401f7 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java
@@ -16,7 +16,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest {
                         " -L chr1:1-50,000,000" +
                         " -standard" +
                         " -OQ" +
-                        " --DBSNP " + GATKDataLocation + "dbsnp_129_hg18.rod" +
+                        " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
                         " -recalFile /dev/null" + moreArgs,
                 0,
                 new ArrayList(0));
@@ -31,7 +31,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest {
                         " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" +
                         " -standard" +
                         " -OQ" +
-                        " --DBSNP " + GATKDataLocation + "dbsnp_129_hg18.rod" +
+                        " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
                         " -recalFile /dev/null" + moreArgs,
                 0,
                 new ArrayList(0));
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java
index 95f4ac0ae..0a0d8c5b2 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java
@@ -19,8 +19,8 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
         String siteVCF = validationDataLocation + "sites_to_validate.vcf";
         String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf";
         String intervalTable = validationDataLocation + "amplicon_interval_table1.table";
-        String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s";
-        testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF;
+        String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s";
+        testArgs += " --ProbeIntervals:table "+intervalTable+" -BTI ProbeIntervals --MaskAlleles:VCF "+maskVCF;
         testArgs += " --virtualPrimerSize 30";
         WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
                 Arrays.asList("27f9450afa132888a8994167f0035fd7"));
@@ -32,8 +32,8 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
         String siteVCF = validationDataLocation + "sites_to_validate.vcf";
         String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf";
         String intervalTable = validationDataLocation + "amplicon_interval_table1.table";
-        String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s";
-        testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF;
+        String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s";
+        testArgs += " --ProbeIntervals:table "+intervalTable+" -BTI ProbeIntervals --MaskAlleles:VCF "+maskVCF;
         testArgs += " --virtualPrimerSize 30 --doNotUseBWA";
         WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
                 Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1"));
@@ -45,8 +45,8 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
         String siteVCF = validationDataLocation + "sites_to_validate.vcf";
         String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf";
         String intervalTable = validationDataLocation + "amplicon_interval_table1.table";
-        String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s";
-        testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF;
+        String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s";
+        testArgs += " --ProbeIntervals:table "+intervalTable+" -BTI ProbeIntervals --MaskAlleles:VCF "+maskVCF;
         testArgs += " --virtualPrimerSize 30 --filterMonomorphic";
         WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
                 Arrays.asList("77b3f30e38fedad812125bdf6cf3255f"));
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
index 23c606ad0..8fa5f0c29 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
@@ -4,8 +4,6 @@ import org.broadinstitute.sting.WalkerTest;
 import org.testng.annotations.Test;
 
 import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
 
 public class VariantEvalIntegrationTest extends WalkerTest {
     private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval";
@@ -16,27 +14,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
     private static String cmdRoot = "-T VariantEval" +
             " -R " + b36KGReference;
 
-    private static String root = cmdRoot +
-            " -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
-            " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
-            " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
-
-    private static String rootGZ = cmdRoot +
-            " -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
-            " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" +
-            " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz";
-
-    // TODO -- I can't seem to reindex this VCF using Tabix without it causing failures.  Looking into it.  [EB]
-    // private static String[] testsEnumerations = {root, rootGZ};
-    private static String[] testsEnumerations = {root};
-
     @Test
     public void testFundamentalsCountVariantsSNPsAndIndels() {
         WalkerTestSpec spec = new WalkerTestSpec(
                                 buildCommandLine(
                                         "-T VariantEval",
                                         "-R " + b37KGReference,
-                                        "-D " + b37dbSNP129,
+                                        "-B:dbsnp,VCF " + b37dbSNP132,
                                         "-B:eval,VCF " + fundamentalTestVCF,
                                         "-noEV",
                                         "-EV CountVariants",
@@ -45,7 +29,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                                         "-o %s"
                                 ),
                                 1,
-                                Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2")
+                                Arrays.asList("bced1842c78fbabb089dd12b7087050d")
                               );
         executeTest("testFundamentalsCountVariantsSNPsandIndels", spec);
     }
@@ -56,7 +40,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                 buildCommandLine(
                         "-T VariantEval",
                         "-R " + b37KGReference,
-                        "-D " + b37dbSNP129,
+                        "-B:dbsnp,VCF " + b37dbSNP132,
                         "-B:eval,VCF " + fundamentalTestVCF,
                         "-noEV",
                         "-EV CountVariants",
@@ -66,7 +50,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 1,
-                Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525")
+                Arrays.asList("06510bd37ffaa39e817ca0dcaf8f8ac2")
         );
         executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
     }
@@ -77,7 +61,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                 buildCommandLine(
                         "-T VariantEval",
                         "-R " + b37KGReference,
-                        "-D " + b37dbSNP129,
+                        "-B:dbsnp,VCF " + b37dbSNP132,
                         "-B:eval,VCF " + fundamentalTestVCF,
                         "-noEV",
                         "-EV CountVariants",
@@ -88,7 +72,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 1,
-                Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd")
+                Arrays.asList("19c5b1b6396921c5b1059a2849ae4fcc")
         );
         executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
     }
@@ -99,7 +83,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                 buildCommandLine(
                         "-T VariantEval",
                         "-R " + b37KGReference,
-                        "-D " + b37dbSNP129,
+                        "-B:dbsnp,VCF " + b37dbSNP132,
                         "-B:eval,VCF " + fundamentalTestVCF,
                         "-noEV",
                         "-EV CountVariants",
@@ -109,7 +93,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 1,
-                Arrays.asList("677fe398643e62a10d6739d36a720a12")
+                Arrays.asList("a71f8d81cf166cd97ac628092650964a")
         );
         executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec);
     }
@@ -120,7 +104,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                 buildCommandLine(
                         "-T VariantEval",
                         "-R " + b37KGReference,
-                        "-D " + b37dbSNP129,
+                        "-B:dbsnp,VCF " + b37dbSNP132,
                         "-B:eval,VCF " + fundamentalTestVCF,
                         "-noEV",
                         "-EV CountVariants",
@@ -130,7 +114,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 1,
-                Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd")
+                Arrays.asList("4dabe0658232f6174188515db6dfe112")
         );
         executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec);
     }
@@ -141,7 +125,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                 buildCommandLine(
                         "-T VariantEval",
                         "-R " + b37KGReference,
-                        "-D " + b37dbSNP129,
+                        "-B:dbsnp,VCF " + b37dbSNP132,
                         "-B:eval,VCF " + fundamentalTestVCF,
                         "-noEV",
                         "-EV CountVariants",
@@ -151,7 +135,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 1,
-                Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2")
+                Arrays.asList("3340587f10ceff83e5567ddfd1a9a60e")
         );
         executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec);
     }
@@ -162,7 +146,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                 buildCommandLine(
                         "-T VariantEval",
                         "-R " + b37KGReference,
-                        "-D " + b37dbSNP129,
+                        "-B:dbsnp,VCF " + b37dbSNP132,
                         "-B:eval,VCF " + fundamentalTestVCF,
                         "-noEV",
                         "-EV CountVariants",
@@ -172,7 +156,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 1,
-                Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9")
+                Arrays.asList("c730c7ee31c8138cef6efd8dd04fbbfc")
         );
         executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec);
     }
@@ -183,7 +167,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                 buildCommandLine(
                         "-T VariantEval",
                         "-R " + b37KGReference,
-                        "-D " + b37dbSNP129,
+                        "-B:dbsnp,VCF " + b37dbSNP132,
                         "-B:eval,VCF " + fundamentalTestVCF,
                         "-noEV",
                         "-EV CountVariants",
@@ -195,7 +179,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 1,
-                Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8")
+                Arrays.asList("2559ca8f454b03e81561f6947f79df18")
         );
         executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec);
     }
@@ -206,7 +190,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                 buildCommandLine(
                         "-T VariantEval",
                         "-R " + b37KGReference,
-                        "-D " + b37dbSNP129,
+                        "-B:dbsnp,VCF " + b37dbSNP132,
                         "-B:eval,VCF " + fundamentalTestVCF,
                         "-noEV",
                         "-EV CountVariants",
@@ -220,7 +204,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 1,
-                Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa")
+                Arrays.asList("23aa5f97641d2fd033095f21c51d2f37")
         );
         executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec);
     }
@@ -239,7 +223,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 1,
-                Arrays.asList("d44c8f44384189a09eea85a8e89d7299")
+                Arrays.asList("a69dd3f06903b3f374c6d6f010c653e0")
         );
         executeTest("testFundamentalsCountVariantsNoCompRod", spec);
     }
@@ -247,11 +231,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
     @Test
     public void testSelect1() {
         String extraArgs = "-L 1:1-10,000,000";
-        for (String tests : testsEnumerations) {
-            WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
-                    1, Arrays.asList("cdbe47ea01b9dd79ff1c5ce6f5fa8bec"));
-            executeTestParallel("testSelect1", spec);
-        }
+        String tests = cmdRoot +
+                " -B:dbsnp,VCF " + b36dbSNP129 +
+                " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
+                " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
+        WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
+                1, Arrays.asList("14054badcd89b24c2375e1d09918f681"));
+        executeTestParallel("testSelect1", spec);
     }
 
     @Test
@@ -260,14 +246,14 @@ public class VariantEvalIntegrationTest extends WalkerTest {
 
         WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
                 1,
-                Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1"));
+                Arrays.asList("96f27163f16bb945f19c6623cd6db34e"));
         executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
     }
 
     @Test
     public void testCompVsEvalAC() {
         String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
-        WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69"));
+        WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d1932be3748fcf6da77dc51aec323710"));
         executeTestParallel("testCompVsEvalAC",spec);
     }
 
@@ -278,42 +264,38 @@ public class VariantEvalIntegrationTest extends WalkerTest {
     @Test
     public void testTranches() {
         String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt";
-        WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9"));
+        WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe"));
         executeTestParallel("testTranches",spec);
     }
 
     @Test
     public void testCompOverlap() {
         String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
-        WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda"));
+        WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("462d4784dd55294ef9d5118217b157a5"));
         executeTestParallel("testCompOverlap",spec);
     }
 
     @Test
     public void testEvalTrackWithoutGenotypes() {
-        String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod";
-
         String extraArgs = "-T VariantEval -R " +
                            b37KGReference +
                            " -L 20" +
-                           " -D " + dbsnp +
+                           " -B:dbsnp,VCF " + b37dbSNP132 +
                            " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
                            " -noST -ST Novelty -o %s";
-        WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5b1fc9a4066aca61f1b5f7b933ad37d9"));
+        WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("0897dfba2f4a245faddce38000555cce"));
         executeTestParallel("testEvalTrackWithoutGenotypes",spec);
     }
 
     @Test
     public void testMultipleEvalTracksWithoutGenotypes() {
-        String dbsnp = GATKDataLocation + "dbsnp_129_b37.rod";
-
         String extraArgs = "-T VariantEval -R " + b37KGReference +
                 " -L 20" +
-                " -D " + dbsnp +
+                " -B:dbsnp,VCF " + b37dbSNP132 +
                 " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
                 " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
                 " -noST -ST Novelty -o %s";
-        WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6d902d9d4d8fef5219a43e416a51cee6"));
+        WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ead3602e14ec2944b5d9e4dacc08c819"));
         executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
     }
 
@@ -330,19 +312,19 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                            " -noST -noEV -ST Novelty -EV CompOverlap" +
                            " -o %s";
 
-        WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("55a1c53bced20701c56accfc3eb782a7"));
+        WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("167a347ce0729d1bc3d4fd5069ebd674"));
         executeTestParallel("testMultipleCompTracks",spec);
     }
 
     @Test
     public void testPerSampleAndSubsettedSampleHaveSameResults() {
-        String md5 = "454a1750fd36525f24172b21af5f49de";
+        String md5 = "40471a84b501eb440ee2d42e3081f228";
 
         WalkerTestSpec spec = new WalkerTestSpec(
                 buildCommandLine(
                         "-T VariantEval",
                         "-R " + b37KGReference,
-                        "-D " + b37dbSNP129,
+                        "-B:dbsnp,VCF " + b37dbSNP132,
                         "-B:eval,VCF " + fundamentalTestSNPsVCF,
                         "-noEV",
                         "-EV CompOverlap",
@@ -360,7 +342,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                 buildCommandLine(
                         "-T VariantEval",
                         "-R " + b37KGReference,
-                        "-D " + b37dbSNP129,
+                        "-B:dbsnp,VCF " + b37dbSNP132,
                         "-B:eval,VCF " + fundamentalTestSNPsOneSampleVCF,
                         "-noEV",
                         "-EV CompOverlap",
@@ -381,7 +363,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                                 buildCommandLine(
                                         "-T VariantEval",
                                         "-R " + b37KGReference,
-                                        "-D " + b37dbSNP129,
+                                        "-B:dbsnp,VCF " + b37dbSNP132,
                                         "-B:eval,VCF " + fundamentalTestSNPsVCF,
                                         "-noEV",
                                         "-EV CountVariants",
@@ -391,7 +373,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                                         "-o %s"
                                 ),
                                 1,
-                                Arrays.asList("bf324e4c87fe0d21170fcd2a67a20371")
+                                Arrays.asList("44464fe7c89a56cf128a932ef640f7da")
                               );
         executeTest("testAlleleCountStrat", spec);
     }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
index 057053a1c..a5b0412e8 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
@@ -26,9 +26,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
     }
 
     VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
-            "d33212a84368e821cbedecd4f59756d6",  // tranches
-            "4652dca41222bebdf9d9fda343b2a835",  // recal file
-            "243a397a33a935fcaccd5deb6d16f0c0"); // cut VCF
+            "0ddd1e0e483d2eaf56004615cea23ec7",  // tranches
+            "58780f63182e139fdbe17f6c18b5b774",  // recal file
+            "f67d844b6252a55452cf4167b77530b1"); // cut VCF
 
     @DataProvider(name = "VRTest")
     public Object[][] createData1() {
@@ -41,11 +41,13 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
         //System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-R " + b37KGReference +
-                        " -B:dbsnp,VCF,known=true,training=false,truth=false,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
-                        " -B:hapmap,VCF,known=false,training=true,truth=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
-                        " -B:omni,VCF,known=false,training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
+                        " -known:prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
+                        " -training:prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
+                        " -truth:prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
+                        " -training:prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
+                        " -truth:prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
                         " -T VariantRecalibrator" +
-                        " -B:input,VCF " + params.inVCF +
+                        " -input " + params.inVCF +
                         " -L 20:1,000,000-40,000,000" +
                         " -an QD -an HaplotypeScore -an HRun" +
                         " -percentBad 0.07" +
@@ -64,7 +66,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
                         " -T ApplyRecalibration" +
                         " -L 20:12,000,000-30,000,000" +
                         " -NO_HEADER" +
-                        " -B:input,VCF " + params.inVCF +
+                        " -input " + params.inVCF +
                         " -o %s" +
                         " -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) +
                         " -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null),
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java
index 904a5b29b..4abf0a102 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java
@@ -44,7 +44,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
 
     public void test1InOut(String file, String md5, String args, boolean vcf3) {
          WalkerTestSpec spec = new WalkerTestSpec(
-                 baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args),
+                 baseTestString(" -priority v1 -V:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args),
                  1,
                  Arrays.asList(md5));
          executeTest("testInOut1--" + file, spec);
@@ -52,7 +52,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
 
     public void combine2(String file1, String file2, String args, String md5, boolean vcf3) {
          WalkerTestSpec spec = new WalkerTestSpec(
-                 baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args),
+                 baseTestString(" -priority v1,v2 -V:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -V:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args),
                  1,
                  Arrays.asList(md5));
          executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
@@ -63,13 +63,21 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
         String file2 = "hapmap_3.3.b37.sites.vcf";
         WalkerTestSpec spec = new WalkerTestSpec(
                 "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference
-                        + " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1
-                        + " -B:hm3,VCF " + validationDataLocation + file2 + args,
+                        + " -L 1:1-10,000,000 -V:omni " + validationDataLocation + file1
+                        + " -V:hm3 " + validationDataLocation + file2 + args,
                 1,
                 Arrays.asList(md5));
         executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
     }
 
+    public void combinePLs(String file1, String file2, String md5) {
+         WalkerTestSpec spec = new WalkerTestSpec(
+                 "-T CombineVariants -NO_HEADER -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 " + validationDataLocation + file2,
+                 1,
+                 Arrays.asList(md5));
+         executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
+    }
+
     @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); }
     @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); }
     @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); }
@@ -78,6 +86,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
     @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b", false); }
     @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e", false); }
 
+    @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); }
+
     @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
     @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
     @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); }
@@ -91,10 +101,10 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
 
     @Test public void threeWayWithRefs() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" +
-                        " -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" +
-                        " -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" +
-                        " -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
+                baseTestString(" -V:NA19240_BGI "+validationDataLocation+"NA19240.BGI.RG.vcf" +
+                        " -V:NA19240_ILLUMINA "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" +
+                        " -V:NA19240_WUGSC "+validationDataLocation+"NA19240.WUGSC.RG.vcf" +
+                        " -V:denovoInfo "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
                         " -setKey centerSet" +
                         " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
                         " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
@@ -104,15 +114,14 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
         executeTest("threeWayWithRefs", spec);
     }
 
-
     // complex examples with filtering, indels, and multiple alleles
     public void combineComplexSites(String args, String md5) {
         String file1 = "combine.1.vcf";
         String file2 = "combine.2.vcf";
         WalkerTestSpec spec = new WalkerTestSpec(
                 "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference
-                        + " -B:one,VCF " + validationDataLocation + file1
-                        + " -B:two,VCF " + validationDataLocation + file2 + args,
+                        + " -V:one " + validationDataLocation + file1
+                        + " -V:two " + validationDataLocation + file2 + args,
                 1,
                 Arrays.asList(md5));
         executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
@@ -120,6 +129,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
 
     @Test public void complexTestFull() { combineComplexSites("", "b5a53ee92bdaacd2bb3327e9004ae058"); }
     @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); }
-    @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f72a178137e25dbe0b931934cdc0079d"); }
+    @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); }
     @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); }
 }
\ No newline at end of file
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java
new file mode 100644
index 000000000..2139a53e7
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2010.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.variantutils;
+
+import org.broadinstitute.sting.WalkerTest;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+
+/**
+ * Tests LeftAlignVariants
+ */
+public class LeftAlignVariantsIntegrationTest extends WalkerTest {
+
+    @Test
+    public void testLeftAlignment() {
+         WalkerTestSpec spec = new WalkerTestSpec(
+                 "-T LeftAlignVariants -o %s -R " + b37KGReference + " --variant:vcf " + validationDataLocation + "forLeftAlignVariantsTest.vcf -NO_HEADER",
+                 1,
+                 Arrays.asList("158b1d71b28c52e2789f164500b53732"));
+         executeTest("test left alignment", spec);
+    }
+}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java
index 82c894c6f..d10bb4452 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java
@@ -38,7 +38,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
     @Test
     public void testb36Tohg19() {
          WalkerTestSpec spec = new WalkerTestSpec(
-                 "-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
+                 "-T LiftoverVariants -o %s -R " + b36KGReference + " --variant:vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
                  1,
                  Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd"));
          executeTest("test b36 to hg19", spec);
@@ -47,7 +47,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
     @Test
     public void testb36Tohg19UnsortedSamples() {
          WalkerTestSpec spec = new WalkerTestSpec(
-                 "-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
+                 "-T LiftoverVariants -o %s -R " + b36KGReference + " --variant:vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
                  1,
                  Arrays.asList("3fd7ec2dc4064ef410786276b0dc9d08"));
          executeTest("test b36 to hg19, unsorted samples", spec);
@@ -56,7 +56,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
     @Test
     public void testhg18Tohg19Unsorted() {
          WalkerTestSpec spec = new WalkerTestSpec(
-                 "-T LiftoverVariants -o %s -R " + hg18Reference + " -B:variant,vcf " + validationDataLocation + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
+                 "-T LiftoverVariants -o %s -R " + hg18Reference + " --variant:vcf " + validationDataLocation + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
                  1,
                  Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b"));
          executeTest("test hg18 to hg19, unsorted", spec);
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java
index b5f41542e..bec0d5dd4 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java
@@ -16,7 +16,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
         String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
 
         WalkerTestSpec spec = new WalkerTestSpec(
-            baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' -B:variant,VCF3 " + testfile + " -NO_HEADER"),
+            baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant:VCF3 " + testfile + " -NO_HEADER"),
             1,
             Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
         );
@@ -29,7 +29,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
         String testfile = validationDataLocation + "test.dup.vcf";
 
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString(" -sn A -sn B -sn C -B:variant,VCF3 " + testfile + " -NO_HEADER"),
+                baseTestString(" -sn A -sn B -sn C --variant:VCF3 " + testfile + " -NO_HEADER"),
                 1,
                 Arrays.asList("b74038779fe6485dbb8734ae48178356")
         );
@@ -42,7 +42,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
         String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";
 
         WalkerTestSpec spec = new WalkerTestSpec(
-                "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -disc myvar -L 20:1012700-1020000 -B:variant,VCF " + b37hapmapGenotypes + " -B:myvar,VCF " + testFile + " -o %s -NO_HEADER",
+                "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant:VCF " + b37hapmapGenotypes + " -disc:VCF " + testFile + " -o %s -NO_HEADER",
                 1,
                 Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e")
         );
@@ -55,7 +55,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
         String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";
 
         WalkerTestSpec spec = new WalkerTestSpec(
-                "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -conc hapmap -L 20:1012700-1020000 -B:hapmap,VCF " + b37hapmapGenotypes + " -B:variant,VCF " + testFile + " -o %s -NO_HEADER",
+                "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variant " + testFile + " -o %s -NO_HEADER",
                 1,
                 Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a")
         );
@@ -63,4 +63,16 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
         executeTest("testConcordance--" + testFile, spec);
     }
 
+    @Test(enabled=false)
+    public void testRemovePLs() {
+        String testFile = validationDataLocation + "combine.3.vcf";
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s -NO_HEADER",
+                1,
+                Arrays.asList("")
+        );
+
+        executeTest("testWithPLs--" + testFile, spec);
+    }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java
index d7efe4212..ec3d1f580 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java
@@ -56,7 +56,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
         WalkerTestSpec spec = new WalkerTestSpec(
             "-T SelectVariants" +
                     " -R " + b36KGReference +
-                    " -B:variant,vcf3,storage=STREAM " + tmpFifo.getAbsolutePath() +
+                    " --variant:vcf3,storage=STREAM " + tmpFifo.getAbsolutePath() +
                     " --NO_HEADER" +
                     " -o %s",
             1,
@@ -80,7 +80,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
         WalkerTestSpec selectTestSpec = new WalkerTestSpec(
             "-T SelectVariants" +
             " -R " + b36KGReference +
-            " -B:variant,vcf3,storage=STREAM " + testFile +
+            " --variant:vcf3,storage=STREAM " + testFile +
             " --NO_HEADER" +
             " -select 'QD > 2.0'" +
             " -o " + tmpFifo.getAbsolutePath(),
@@ -98,7 +98,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
             " -EV CompOverlap -noEV -noST" +
             " -o %s",
             1,
-            Arrays.asList("f60729c900bc8368717653b3fad80d1e")           //"f60729c900bc8368717653b3fad80d1e"
+            Arrays.asList("ea09bf764adba9765b99921c5ba2c709")
         );
         executeTest("testVCFStreamingChain", selectTestSpec);
 
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java
index 67c4297b1..adf3b21a8 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java
@@ -34,7 +34,7 @@ import java.util.Arrays;
 public class ValidateVariantsIntegrationTest extends WalkerTest {
 
     public static String baseTestString(String file, String type) {
-        return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 -B:variant,VCF " + validationDataLocation + file + " --validationType " + type;
+        return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variant:vcf " + validationDataLocation + file + " --validationType " + type;
     }
 
     @Test
@@ -95,7 +95,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
     @Test
     public void testBadID() {
         WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString("validationExampleBad.vcf", "IDS") + " -D " + GATKDataLocation + "dbsnp_129_b36.rod",
+                baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129,
                 0,
                 UserException.MalformedFile.class
         );
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java
index 1db712353..19021c1c2 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java
@@ -35,7 +35,7 @@ import java.io.File;
 public class VariantsToTableIntegrationTest extends WalkerTest {
     private String variantsToTableCmd(String moreArgs) {
         return "-R " + hg18Reference +
-                " -B:eval,vcf " + validationDataLocation + "/soap_gatk_annotated.vcf" +
+                " --variant:vcf " + validationDataLocation + "/soap_gatk_annotated.vcf" +
                 " -T VariantsToTable" +
                 " -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F FILTER -F TRANSITION -F DP -F SB -F set -F RankSumP -F refseq.functionalClass*" +
                 " -L chr1 -KMA -o %s" + moreArgs;
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java
index 8c96c1e11..df247aed5 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java
@@ -16,6 +16,22 @@ import java.util.ArrayList;
  */
 public class VariantsToVCFIntegrationTest extends WalkerTest {
 
+    @Test
+    public void testVariantsToVCFUsingDbsnpInput() {
+        List md5 = new ArrayList();
+        md5.add("d64942fed2a5b7b407f9537dd2b4832e");
+
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                "-R " + b36KGReference +
+                        " --variant:dbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" +
+                        " -T VariantsToVCF" +
+                        " -L 1:1-30,000,000" +
+                        " -o %s" +
+                        " -NO_HEADER",
+                1, // just one output file
+                md5);
+        executeTest("testVariantsToVCFUsingDbsnpInput", spec).getFirst();
+    }
 
     @Test
     public void testVariantsToVCFUsingGeliInput() {
@@ -24,7 +40,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
 
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-R " + b36KGReference +
-                        " -B:variant,GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" +
+                        " --variant:GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" +
                         " -T VariantsToVCF" +
                         " -L 1:10,000,000-11,000,000" +
                         " -sample NA123AB" +
@@ -32,25 +48,25 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
                         " -NO_HEADER",
                 1, // just one output file
                 md5);
-        executeTest("testVariantsToVCFUsingGeliInput #1", spec).getFirst();
+        executeTest("testVariantsToVCFUsingGeliInput - calls", spec).getFirst();
     }
 
     @Test
     public void testGenotypesToVCFUsingGeliInput() {
         List md5 = new ArrayList();
-        md5.add("71e8c98d7c3a73b6287ecc339086fe03");
+        md5.add("2413f036ec4100b8d5db179946159a82");
 
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-R " + b36KGReference +
-                        " -B:variant,GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" +
+                        " --variant:GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" +
                         " -T VariantsToVCF" +
-                        " -L 1:10,000,000-11,000,000" +
+                        " -L 1:10,100,000-10,200,000" +
                         " -sample NA123AB" +
                         " -o %s" +
                         " -NO_HEADER",
                 1, // just one output file
                 md5);
-        executeTest("testVariantsToVCFUsingGeliInput #2", spec).getFirst();
+        executeTest("testVariantsToVCFUsingGeliInput - genotypes", spec).getFirst();
     }
 
     @Test
@@ -60,7 +76,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
 
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-R " + b36KGReference +
-                        " -B:variant,HapMap " + validationDataLocation + "rawHapMap.yri.chr1.txt" +
+                        " --variant:HapMap " + validationDataLocation + "rawHapMap.yri.chr1.txt" +
                         " -T VariantsToVCF" +
                         " -L 1:1-1,000,000" +
                         " -o %s" +
@@ -77,7 +93,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
 
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-R " + b36KGReference +
-                        " -B:variant,VCF " + validationDataLocation + "complexExample.vcf4" +
+                        " --variant:VCF " + validationDataLocation + "complexExample.vcf4" +
                         " -T VariantsToVCF" +
                         " -o %s" +
                         " -NO_HEADER",
diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java
new file mode 100644
index 000000000..6d492565b
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.codecs.snpEff;
+
+import org.apache.commons.io.input.ReaderInputStream;
+import org.broad.tribble.TribbleException;
+import org.broad.tribble.readers.AsciiLineReader;
+import org.broad.tribble.readers.LineReader;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
+import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
+import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
+
+import java.io.StringReader;
+
+public class SnpEffCodecUnitTest {
+
+    @Test
+    public void testParseWellFormedSnpEffHeaderLine() {
+        String wellFormedSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
+                  "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
+                  "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
+                  "AAs around\tCustom_interval_ID";
+
+        SnpEffCodec codec = new SnpEffCodec();
+        LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wellFormedSnpEffHeaderLine)));
+        String headerReturned = (String)codec.readHeader(reader);
+
+        Assert.assertEquals(headerReturned, wellFormedSnpEffHeaderLine);
+    }
+
+    @Test(expectedExceptions = TribbleException.InvalidHeader.class)
+    public void testParseWrongNumberOfFieldsSnpEffHeaderLine() {
+        String wrongNumberOfFieldsSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
+                  "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
+                  "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
+                  "AAs around";
+
+        SnpEffCodec codec = new SnpEffCodec();
+        LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wrongNumberOfFieldsSnpEffHeaderLine)));
+        codec.readHeader(reader);
+    }
+
+    @Test(expectedExceptions = TribbleException.InvalidHeader.class)
+    public void testParseMisnamedColumnSnpEffHeaderLine() {
+        String misnamedColumnSnpEffHeaderLine = "# Chromo\tPosition\tRef\tChange\tChange type\t" +
+                  "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
+                  "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
+                  "AAs around\tCustom_interval_ID";
+
+        SnpEffCodec codec = new SnpEffCodec();
+        LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(misnamedColumnSnpEffHeaderLine)));
+        codec.readHeader(reader);
+    }
+
+    @Test
+    public void testParseSimpleEffectSnpEffLine() {
+        String simpleEffectSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
+                  "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t\t";
+
+        SnpEffFeature expectedFeature = new SnpEffFeature("1",
+                                                          69428l,
+                                                          "T",
+                                                          "G",
+                                                          ChangeType.SNP,
+                                                          Zygosity.Hom,
+                                                          6049.69,
+                                                          61573l,
+                                                          null,
+                                                          "ENSG00000177693",
+                                                          "OR4F5",
+                                                          "mRNA",
+                                                          "ENST00000326183",
+                                                          "exon_1_69055_70108",
+                                                          1,
+                                                          false,
+                                                          EffectType.NON_SYNONYMOUS_CODING,
+                                                          null,
+                                                          "F/C",
+                                                          "TTT/TGT",
+                                                          113,
+                                                          918,
+                                                          null,
+                                                          null,
+                                                          null
+                                                         );
+
+        SnpEffCodec codec = new SnpEffCodec();
+        SnpEffFeature feature = (SnpEffFeature)codec.decode(simpleEffectSnpEffLine);
+
+        Assert.assertEquals(feature, expectedFeature);
+    }
+
+    @Test
+    public void testParseNonCodingRegionSnpEffLine() {
+        String nonCodingRegionSnpEffLine = "1\t1337592\tG\tC\tSNP\tHom\t1935.52\t21885\t\tENSG00000250188\t" +
+                  "RP4-758J18.5\tmRNA\tENST00000514958\texon_1_1337454_1338076\t2\tWITHIN_NON_CODING_GENE, NON_SYNONYMOUS_CODING\t" +
+                  "L/V\tCTA/GTA\t272\t952\t\t\t";
+
+        SnpEffFeature expectedFeature = new SnpEffFeature("1",
+                                                          1337592l,
+                                                          "G",
+                                                          "C",
+                                                          ChangeType.SNP,
+                                                          Zygosity.Hom,
+                                                          1935.52,
+                                                          21885l,
+                                                          null,
+                                                          "ENSG00000250188",
+                                                          "RP4-758J18.5",
+                                                          "mRNA",
+                                                          "ENST00000514958",
+                                                          "exon_1_1337454_1338076",
+                                                          2,
+                                                          true,
+                                                          EffectType.NON_SYNONYMOUS_CODING,
+                                                          null,
+                                                          "L/V",
+                                                          "CTA/GTA",
+                                                          272,
+                                                          952,
+                                                          null,
+                                                          null,
+                                                          null
+                                                         );
+
+        SnpEffCodec codec = new SnpEffCodec();
+        SnpEffFeature feature = (SnpEffFeature)codec.decode(nonCodingRegionSnpEffLine);
+
+        Assert.assertEquals(feature, expectedFeature);
+    }
+
+    @Test
+    public void testParseExtraEffectInformationSnpEffLine() {
+        String extraEffectInformationSnpEffLine = "1\t879537\tT\tC\tSNP\tHom\t341.58\t13733\t\tENSG00000187634\tSAMD11\t" +
+                  "mRNA\tENST00000341065\t\t\tUTR_3_PRIME: 4 bases from transcript end\t\t\t\t\t\t\t";
+
+        SnpEffFeature expectedFeature = new SnpEffFeature("1",
+                                                          879537l,
+                                                          "T",
+                                                          "C",
+                                                          ChangeType.SNP,
+                                                          Zygosity.Hom,
+                                                          341.58,
+                                                          13733l,
+                                                          null,
+                                                          "ENSG00000187634",
+                                                          "SAMD11",
+                                                          "mRNA",
+                                                          "ENST00000341065",
+                                                          null,
+                                                          null,
+                                                          false,
+                                                          EffectType.UTR_3_PRIME,
+                                                          "4 bases from transcript end",
+                                                          null,
+                                                          null,
+                                                          null,
+                                                          null,
+                                                          null,
+                                                          null,
+                                                          null
+                                                         );
+
+        SnpEffCodec codec = new SnpEffCodec();
+        SnpEffFeature feature = (SnpEffFeature)codec.decode(extraEffectInformationSnpEffLine);
+
+        Assert.assertEquals(feature, expectedFeature);
+    }
+
+    @Test
+    public void testParseMultiEffectSnpEffLine() {
+        String multiEffectSnpEffLine = "1\t901901\tC\tT\tSNP\tHom\t162.91\t4646\t\tENSG00000187583\tPLEKHN1\tmRNA\t" +
+                  "ENST00000379410\texon_1_901877_901994\t1\tSTART_GAINED: ATG, UTR_5_PRIME: 11 bases from TSS\t\t\t\t\t\t\t";
+
+        SnpEffFeature expectedFeature = new SnpEffFeature("1",
+                                                          901901l,
+                                                          "C",
+                                                          "T",
+                                                          ChangeType.SNP,
+                                                          Zygosity.Hom,
+                                                          162.91,
+                                                          4646l,
+                                                          null,
+                                                          "ENSG00000187583",
+                                                          "PLEKHN1",
+                                                          "mRNA",
+                                                          "ENST00000379410",
+                                                          "exon_1_901877_901994",
+                                                          1,
+                                                          false,
+                                                          EffectType.START_GAINED,
+                                                          "ATG, UTR_5_PRIME: 11 bases from TSS",
+                                                          null,
+                                                          null,
+                                                          null,
+                                                          null,
+                                                          null,
+                                                          null,
+                                                          null
+                                                         );
+
+        SnpEffCodec codec = new SnpEffCodec();
+        SnpEffFeature feature = (SnpEffFeature)codec.decode(multiEffectSnpEffLine);
+
+        Assert.assertEquals(feature, expectedFeature);
+    }
+
+    @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
+    public void testParseWrongNumberOfFieldsSnpEffLine() {
+        String wrongNumberOfFieldsSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
+                  "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t";
+
+        SnpEffCodec codec = new SnpEffCodec();
+        SnpEffFeature feature = (SnpEffFeature)codec.decode(wrongNumberOfFieldsSnpEffLine);
+    }
+
+    @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
+    public void testParseBlankEffectFieldSnpEffLine() {
+        String blankEffectFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
+                  "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\t\tF/C\tTTT/TGT\t113\t918\t\t\t";
+
+        SnpEffCodec codec = new SnpEffCodec();
+        SnpEffFeature feature = (SnpEffFeature)codec.decode(blankEffectFieldSnpEffLine);
+    }
+
+    @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
+    public void testParseInvalidNumericFieldSnpEffLine() {
+        String invalidNumericFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
+                  "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\tfoo\t\t\t";;
+
+        SnpEffCodec codec = new SnpEffCodec();
+        SnpEffFeature feature = (SnpEffFeature)codec.decode(invalidNumericFieldSnpEffLine);
+    }
+}
diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java
index 68a2ecf8d..d08cda949 100755
--- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java
@@ -70,7 +70,7 @@ public class IndexFactoryUnitTest {
             CloseableTribbleIterator it = source.iterator();
             while (it.hasNext() && (counter++ < maxRecords || maxRecords == -1) ) {
                 VariantContext vc = it.next();
-                writer.add(vc, vc.getReferenceBaseForIndel());
+                writer.add(vc);
             }
             writer.close();
 
diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
index 32ff25c7b..2ef116708 100644
--- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
@@ -17,11 +17,11 @@ public class VCFIntegrationTest extends WalkerTest {
 
         String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s ";
 
-        String test1 = baseCommand + "-T VariantAnnotator -BTI variant -B:variant,vcf " + testVCF;
+        String test1 = baseCommand + "-T VariantAnnotator --variant " + testVCF + " -BTI variant";
         WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(md5ofInputVCF));
         List result = executeTest("Test Variant Annotator with no changes", spec1).getFirst();
 
-        String test2 = baseCommand + "-T VariantsToVCF -B:variant,vcf " + result.get(0).getAbsolutePath();
+        String test2 = baseCommand + "-T VariantsToVCF --variant " + result.get(0).getAbsolutePath();
         WalkerTestSpec spec2 = new WalkerTestSpec(test2, 1, Arrays.asList(md5ofInputVCF));
         executeTest("Test Variants To VCF from new output", spec2);
     }
diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java
index 34a2e616a..e3a926fb9 100644
--- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java
@@ -57,8 +57,8 @@ public class VCFWriterUnitTest extends BaseTest {
         VCFHeader header = createFakeHeader(metaData,additionalColumns);
         VCFWriter writer = new StandardVCFWriter(fakeVCFFile);
         writer.writeHeader(header);
-        writer.add(createVC(header),"A".getBytes()[0]);
-        writer.add(createVC(header),"A".getBytes()[0]);
+        writer.add(createVC(header));
+        writer.add(createVC(header));
         writer.close();
         VCFCodec reader = new VCFCodec();
         AsciiLineReader lineReader;
@@ -135,7 +135,7 @@ public class VCFWriterUnitTest extends BaseTest {
             genotypes.put(name,gt);
             
         }
-        return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes);
+        return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes, (byte)'A');
 
 
     }
diff --git a/public/java/test/org/broadinstitute/sting/utils/text/TextFormattingUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/text/TextFormattingUtilsUnitTest.java
new file mode 100644
index 000000000..45a618f71
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/utils/text/TextFormattingUtilsUnitTest.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.text;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+
+public class TextFormattingUtilsUnitTest extends BaseTest {
+    @Test(expectedExceptions = ReviewedStingException.class)
+    public void testSplitWhiteSpaceNullLine() {
+        TextFormattingUtils.splitWhiteSpace(null);
+    }
+
+    @Test
+    public void testSplitWhiteSpace() {
+        Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz"), new String[] { "foo", "bar", "baz" });
+        Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo  bar  baz"), new String[] { "foo", "bar", "baz" });
+        Assert.assertEquals(TextFormattingUtils.splitWhiteSpace(" foo bar baz"), new String[] { "foo", "bar", "baz" });
+        Assert.assertEquals(TextFormattingUtils.splitWhiteSpace(" foo bar baz "), new String[] { "foo", "bar", "baz" });
+        Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz "), new String[] { "foo", "bar", "baz" });
+        Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("\tfoo\tbar\tbaz\t"), new String[]{"foo", "bar", "baz"});
+    }
+
+    @Test(expectedExceptions = ReviewedStingException.class)
+    public void testGetWordStartsNullLine() {
+        TextFormattingUtils.getWordStarts(null);
+    }
+
+    @Test
+    public void testGetWordStarts() {
+        Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz"), Arrays.asList(4, 8));
+        Assert.assertEquals(TextFormattingUtils.getWordStarts("foo  bar  baz"), Arrays.asList(5, 10));
+        Assert.assertEquals(TextFormattingUtils.getWordStarts(" foo bar baz"), Arrays.asList(1, 5, 9));
+        Assert.assertEquals(TextFormattingUtils.getWordStarts(" foo bar baz "), Arrays.asList(1, 5, 9));
+        Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz "), Arrays.asList(4, 8));
+        Assert.assertEquals(TextFormattingUtils.getWordStarts("\tfoo\tbar\tbaz\t"), Arrays.asList(1, 5, 9));
+    }
+
+    @Test(expectedExceptions = ReviewedStingException.class)
+    public void testSplitFixedWidthNullLine() {
+        TextFormattingUtils.splitFixedWidth(null, Collections.emptyList());
+    }
+
+    @Test(expectedExceptions = ReviewedStingException.class)
+    public void testSplitFixedWidthNullColumnStarts() {
+        TextFormattingUtils.splitFixedWidth("foo bar baz", null);
+    }
+
+    @Test
+    public void testSplitFixedWidth() {
+        Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz", Arrays.asList(4, 8)), new String[] { "foo", "bar", "baz" });
+        Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo  bar  baz", Arrays.asList(5, 10)), new String[] { "foo", "bar", "baz" });
+        Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" foo bar baz", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" });
+        Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" foo bar baz ", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" });
+        Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz ", Arrays.asList(4, 8)), new String[] { "foo", "bar", "baz" });
+        Assert.assertEquals(TextFormattingUtils.splitFixedWidth("\tfoo\tbar\tbaz\t", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" });
+        Assert.assertEquals(TextFormattingUtils.splitFixedWidth("f o b r b z", Arrays.asList(4, 8)), new String[] { "f o", "b r", "b z" });
+        Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" f o b r b z", Arrays.asList(4, 8)), new String[] { "f o", "b r", "b z" });
+        Assert.assertEquals(TextFormattingUtils.splitFixedWidth("  f o b r b z", Arrays.asList(4, 8)), new String[] { "f", "o b", "r b z" });
+    }
+}
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java
index a344817a0..67fe7d012 100755
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java
@@ -3,6 +3,7 @@
 package org.broadinstitute.sting.utils.variantcontext;
 
 import org.broadinstitute.sting.WalkerTest;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
 import java.util.HashMap;
@@ -14,51 +15,51 @@ public class VariantContextIntegrationTest extends WalkerTest {
             " -R " + b36KGReference;
 
     private static String root = cmdRoot +
-            " -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
-            " -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf";
+            " -L 1:1-1,000,000 -V " + b36dbSNP129;
 
-    static HashMap expectations = new HashMap();
-    static {
-        expectations.put("-L 1:1-10000 --printPerLocus", "e4ee2eaa3114888e918a1c82df7a027a");
-        expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "5b5635e4877d82e8a27d70dac24bda2f");
-        expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "ceced3f270b4fe407ee83bc9028becde");
-        expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9a9b9e283553c28bf58de1cafa38fe92");
-        expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b");
-        expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "033bd952fca048fe1a4f6422b57ab2ed");
-        expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5e40980c02797f90821317874426a87a");
-        expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "e5a00766f8c1ff9cf92310bafdec3126");
-        expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc");
+    private static final class VCITTest extends TestDataProvider {
+        String args, md5;
+
+        private VCITTest(final String args, final String md5) {
+            super(VCITTest.class);
+            this.args = args;
+            this.md5 = md5;
+        }
     }
 
-    @Test
-    public void testConversionSelection() {
-        for ( Map.Entry entry : expectations.entrySet() ) {
-            String extraArgs = entry.getKey();
-            String md5 = entry.getValue();
+    @DataProvider(name = "VCITTestData")
+    public Object[][] createVCITTestData() {
+        new VCITTest("--printPerLocus", "e9d0f1fe80659bb55b40aa6c3a2e921e");
+        new VCITTest("--printPerLocus --onlyContextsOfType SNP", "0e620db3e45771df42c54a9c0ae4a29f");
+        new VCITTest("--printPerLocus --onlyContextsOfType INDEL", "b725c204fefe3814644d50e7c20f9dfe");
+        new VCITTest("--printPerLocus --onlyContextsOfType MIXED", "3ccc33f496a1718df55722d11cc14334");
+        new VCITTest("--printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc");
+        new VCITTest("--printPerLocus --takeFirstOnly", "3a45561da042b2b44b6a679744f16103");
+        new VCITTest("--printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "4746f269ecc377103f83eb61cc162c39");
+        new VCITTest("--printPerLocus --onlyContextsStartinAtCurrentPosition", "2749e3fae458650a85a2317e346dc44c");
+        new VCITTest("--printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9bd48c2a40813023e29ffaa23d59d382");
 
-            WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s",
-                    1, // just one output file
-                    Arrays.asList(md5));
-            executeTest("testDbSNPAndVCFConversions", spec);
-        }
+        return VCITTest.getTests(VCITTest.class);
+    }
+
+    @Test(dataProvider = "VCITTestData")
+    public void testConversionSelection(VCITTest test) {
+	String extraArgs = test.args;
+	String md5 = test.md5;
+
+	WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s",
+						  1, // just one output file
+						  Arrays.asList(md5));
+	executeTest("testSelectors", spec);
     }
 
     @Test
     public void testToVCF() {
         // this really just tests that we are seeing the same number of objects over all of chr1
 
-        WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s",
+        WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -V:VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s",
                 2, // just one output file
                 Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "ff91731213fd0bbdc200ab6fd1c93e63"));
          executeTest("testToVCF", spec);
     }
-
-    @Test
-    public void testLargeScaleConversion() {
-        // this really just tests that we are seeing the same number of objects over all of chr1
-        WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s",
-                1, // just one output file
-                Arrays.asList("529f936aa6c303658b23caf4e527782f"));
-         executeTest("testLargeScaleConversion", spec);
-    }
 }
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java
index e82817714..d8fa0eae4 100755
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java
@@ -92,45 +92,45 @@ public class VariantContextUnitTest {
 
         // test INDELs
         alleles = Arrays.asList(Aref, ATC);
-        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles);
+        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
         Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
 
         alleles = Arrays.asList(ATCref, A);
-        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles);
+        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
         Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
 
         alleles = Arrays.asList(Tref, TA, TC);
-        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles);
+        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
         Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
 
         alleles = Arrays.asList(ATCref, A, AC);
-        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles);
+        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
         Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
 
         alleles = Arrays.asList(ATCref, A, Allele.create("ATCTC"));
-        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles);
+        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
         Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
 
         // test MIXED
         alleles = Arrays.asList(TAref, T, TC);
-        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles);
+        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
         Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
 
         alleles = Arrays.asList(TAref, T, AC);
-        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles);
+        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
         Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
 
         alleles = Arrays.asList(ACref, ATC, AT);
-        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles);
+        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
         Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
 
         alleles = Arrays.asList(Aref, T, symbolic);
-        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles);
+        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
         Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
 
         // test SYMBOLIC
         alleles = Arrays.asList(Tref, symbolic);
-        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles);
+        vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
         Assert.assertEquals(vc.getType(), VariantContext.Type.SYMBOLIC);
     }
 
@@ -191,7 +191,7 @@ public class VariantContextUnitTest {
     @Test
     public void testCreatingDeletionVariantContext() {
         List alleles = Arrays.asList(ATCref, del);
-        VariantContext vc = new VariantContext("test", delLoc, delLocStart, delLocStop, alleles);
+        VariantContext vc = new VariantContext("test", delLoc, delLocStart, delLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
 
         Assert.assertEquals(vc.getChr(), delLoc);
         Assert.assertEquals(vc.getStart(), delLocStart);
@@ -218,7 +218,7 @@ public class VariantContextUnitTest {
     @Test
     public void testCreatingInsertionVariantContext() {
         List alleles = Arrays.asList(delRef, ATC);
-        VariantContext vc = new VariantContext("test", insLoc, insLocStart, insLocStop, alleles);
+        VariantContext vc = new VariantContext("test", insLoc, insLocStart, insLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
 
         Assert.assertEquals(vc.getChr(), insLoc);
         Assert.assertEquals(vc.getStart(), insLocStart);
@@ -251,7 +251,7 @@ public class VariantContextUnitTest {
         new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, del));
     }
 
-    @Test (expectedExceptions = IllegalArgumentException.class)
+    @Test (expectedExceptions = IllegalStateException.class)
     public void testBadConstructorArgs3() {
         new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(del));
     }
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
index 1f4f79993..47ba0220f 100755
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
@@ -2,7 +2,6 @@ package org.broadinstitute.sting.queue.qscripts
 
 import org.broadinstitute.sting.queue.extensions.gatk._
 import org.broadinstitute.sting.queue.QScript
-import org.broadinstitute.sting.queue.function.ListWriterFunction
 import org.broadinstitute.sting.queue.extensions.picard._
 import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
 import org.broadinstitute.sting.utils.baq.BAQ.CalculationMode
@@ -12,6 +11,7 @@ import net.sf.samtools.SAMFileReader
 import net.sf.samtools.SAMFileHeader.SortOrder
 
 import org.broadinstitute.sting.queue.util.QScriptUtils
+import org.broadinstitute.sting.queue.function.{CommandLineFunction, ListWriterFunction}
 
 class DataProcessingPipeline extends QScript {
   qscript =>
@@ -283,12 +283,6 @@ class DataProcessingPipeline extends QScript {
   ****************************************************************************/
 
 
-  // General arguments to GATK walkers
-  trait CommandLineGATKArgs extends CommandLineGATK {
-    this.reference_sequence = qscript.reference
-    this.memoryLimit = 4
-    this.isIntermediate = true
-  }
 
   // General arguments to non-GATK tools
   trait ExternalCommonArgs extends CommandLineFunction {
@@ -296,6 +290,14 @@ class DataProcessingPipeline extends QScript {
     this.isIntermediate = true
   }
 
+  // General arguments to GATK walkers
+  trait CommandLineGATKArgs extends CommandLineGATK with ExternalCommonArgs {
+    this.reference_sequence = qscript.reference
+  }
+
+  trait SAMargs extends PicardBamFunction with ExternalCommonArgs {
+      this.maxRecordsInRam = 100000
+  }
 
   case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
     if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
@@ -303,7 +305,7 @@ class DataProcessingPipeline extends QScript {
     this.out = outIntervals
     this.mismatchFraction = 0.0
     this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
-    if (!indels.isEmpty)
+    if (indels != null)
       this.rodBind :+= RodBind("indels", "VCF", indels)
     this.scatterCount = nContigs
     this.analysisName = queueLogDir + outIntervals + ".target"
@@ -311,11 +313,12 @@ class DataProcessingPipeline extends QScript {
   }
 
   case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
+    @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
     this.input_file :+= inBams
     this.targetIntervals = tIntervals
     this.out = outBam
     this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
-    if (!qscript.indels.isEmpty)
+    if (qscript.indels != null)
       this.rodBind :+= RodBind("indels", "VCF", qscript.indels)
     this.consensusDeterminationModel =  consensusDeterminationModel
     this.compress = 0
@@ -365,6 +368,7 @@ class DataProcessingPipeline extends QScript {
   }
 
   case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs {
+    @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
     this.input = List(inBam)
     this.output = outBam
     this.metrics = metricsFile
@@ -373,6 +377,7 @@ class DataProcessingPipeline extends QScript {
   }
 
   case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
+    @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
     this.input = inBams
     this.output = outBam
     this.analysisName = queueLogDir + outBam + ".joinBams"
@@ -380,6 +385,7 @@ class DataProcessingPipeline extends QScript {
   }
 
   case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs {
+    @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
     this.input = List(inSam)
     this.output = outBam
     this.sortOrder = sortOrderP
@@ -390,7 +396,6 @@ class DataProcessingPipeline extends QScript {
   case class validate (inBam: File, outLog: File) extends ValidateSamFile with ExternalCommonArgs {
     this.input = List(inBam)
     this.output = outLog
-    this.maxRecordsInRam = 100000
     this.REFERENCE_SEQUENCE = qscript.reference
     this.isIntermediate = false
     this.analysisName = queueLogDir + outLog + ".validate"
@@ -399,6 +404,7 @@ class DataProcessingPipeline extends QScript {
 
 
   case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups with ExternalCommonArgs {
+    @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
     this.input = List(inBam)
     this.output = outBam
     this.RGID = readGroup.id
@@ -408,8 +414,6 @@ class DataProcessingPipeline extends QScript {
     this.RGPL = readGroup.pl
     this.RGPU = readGroup.pu
     this.RGSM = readGroup.sm
-    this.memoryLimit = 4
-    this.isIntermediate = true
     this.analysisName = queueLogDir + outBam + ".rg"
     this.jobName = queueLogDir + outBam + ".rg"
   }
@@ -435,6 +439,7 @@ class DataProcessingPipeline extends QScript {
     @Input(doc="bwa alignment index file") var sai = inSai
     @Output(doc="output aligned bam file") var alignedBam = outBam
     def commandLine = bwaPath + " samse " + reference + " " + sai + " " + bam + " > " + alignedBam
+    this.memoryLimit = 6
     this.analysisName = queueLogDir + outBam + ".bwa_sam_se"
     this.jobName = queueLogDir + outBam + ".bwa_sam_se"
   }
@@ -445,6 +450,7 @@ class DataProcessingPipeline extends QScript {
     @Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2
     @Output(doc="output aligned bam file") var alignedBam = outBam
     def commandLine = bwaPath + " sampe " + reference + " " + sai1 + " " + sai2 + " " + bam + " " + bam + " > " + alignedBam
+    this.memoryLimit = 6
     this.analysisName = queueLogDir + outBam + ".bwa_sam_pe"
     this.jobName = queueLogDir + outBam + ".bwa_sam_pe"
   }
@@ -455,6 +461,4 @@ class DataProcessingPipeline extends QScript {
     this.analysisName = queueLogDir + outBamList + ".bamList"
     this.jobName = queueLogDir + outBamList + ".bamList"
   }
-
-
 }
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala
index 934cf2a3c..59c00b8cd 100755
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala
@@ -13,7 +13,7 @@ class GATKResourcesBundle extends QScript {
   var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar")
 
   @Argument(doc="liftOverPerl", required=false)
-  var liftOverPerl: File = new File("./perl/liftOverVCF.pl")
+  var liftOverPerl: File = new File("./public/perl/liftOverVCF.pl")
 
   @Argument(shortName = "ver", doc="The SVN version of this release", required=true)
   var VERSION: String = _
@@ -57,11 +57,11 @@ class GATKResourcesBundle extends QScript {
     //Console.printf("liftover(%s => %s)%n", inRef.name, outRef.name)
     (inRef.name, outRef.name) match {
       case ("b37", "hg19") =>
-        return new LiftOverPerl(in, out, new File("chainFiles/b37tohg19.chain"), inRef, outRef)
+        return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg19.chain"), inRef, outRef)
       case ("b37", "hg18") =>
-        return new LiftOverPerl(in, out, new File("chainFiles/b37tohg18.chain"), inRef, outRef)
+        return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg18.chain"), inRef, outRef)
       case ("b37", "b36") =>
-        return new LiftOverPerl(in, out, new File("chainFiles/b37tob36.chain"), inRef, outRef)
+        return new LiftOverPerl(in, out, new File("public/chainFiles/b37tob36.chain"), inRef, outRef)
       case _ => return null
     }
   }
@@ -85,7 +85,7 @@ class GATKResourcesBundle extends QScript {
     //
     b37 = new Reference("b37", new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta"))
     hg18 = new Reference("hg18", new File("/Users/depristo/Desktop/broadLocal/localData/Homo_sapiens_assembly18.fasta"))
-    exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta"))
+    exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta"))
     refs = List(b37, hg18, exampleFASTA)
 
     val DATAROOT = "/Users/depristo/Desktop/broadLocal/localData/"
@@ -94,7 +94,7 @@ class GATKResourcesBundle extends QScript {
     addResource(new Resource(DATAROOT + "dbsnp_132_b37.vcf", "dbsnp_132", b37, true, false))
 
     addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
-    addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
+    addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
   }
 
   def initializeStandardDataFiles() = {
@@ -105,7 +105,7 @@ class GATKResourcesBundle extends QScript {
     b37 = new Reference("b37", new File("/humgen/1kg/reference/human_g1k_v37.fasta"))
     hg18 = new Reference("hg18", new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"))
     b36 = new Reference("b36", new File("/humgen/1kg/reference/human_b36_both.fasta"))
-    exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta"))
+    exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta"))
     refs = List(hg19, b37, hg18, b36, exampleFASTA)
 
     addResource(new Resource(b37.file, "", b37, false))
@@ -134,6 +134,9 @@ class GATKResourcesBundle extends QScript {
     addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf",
       "1000G_indels_for_realignment", b37, true, false))
 
+    addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf",
+      "indels_mills_devine", b37, true, true))
+    
     //
     // example call set for wiki tutorial
     //
@@ -152,8 +155,8 @@ class GATKResourcesBundle extends QScript {
     addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/refGene_b37.sorted.txt",
       "refGene", b37, true, false))
 
-    addResource(new Resource("chainFiles/hg18tob37.chain", "", hg18, false, false))
-    addResource(new Resource("chainFiles/b36tob37.chain", "", b36, false, false))
+    addResource(new Resource("public/chainFiles/hg18tob37.chain", "", hg18, false, false))
+    addResource(new Resource("public/chainFiles/b36tob37.chain", "", b36, false, false))
 
     // todo -- chain files?
     // todo 1000G SNP and indel call sets?
@@ -162,7 +165,7 @@ class GATKResourcesBundle extends QScript {
     // exampleFASTA file
     //
     addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
-    addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
+    addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
   }
 
   def createBundleDirectories(dir: File) = {
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala
new file mode 100755
index 000000000..9f3dd9a2c
--- /dev/null
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala
@@ -0,0 +1,174 @@
+package org.broadinstitute.sting.queue.qscripts
+
+import org.broadinstitute.sting.queue.QScript
+import org.broadinstitute.sting.queue.extensions.gatk._
+import org.broadinstitute.sting.queue.util.QScriptUtils
+import net.sf.samtools.SAMFileHeader.SortOrder
+import org.broadinstitute.sting.queue.extensions.picard.{SortSam, AddOrReplaceReadGroups}
+import org.broadinstitute.sting.utils.exceptions.UserException
+import org.broadinstitute.sting.commandline.Hidden
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: carneiro
+ * Date: 4/20/11
+ * Time: 16:29 PM
+ */
+
+
+class RecalibrateBaseQualities extends QScript {
+
+  @Input(doc="input FASTA/FASTQ/BAM file - or list of FASTA/FASTQ/BAM files. ", shortName="i", required=true)
+  var input: File = _
+
+  @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true)
+  var R: String = _
+
+  @Input(doc="Reference fasta file", shortName="R", required=true)
+  var reference: File = _
+
+  @Input(doc="dbsnp VCF file to use ", shortName="D", required=true)
+  var dbSNP: File = _
+
+  @Input(doc="Number of jobs to scatter/gather. Default: 0." , shortName = "sg", required=false)
+  var threads: Int = 0
+
+  @Input(doc="Sample Name to fill in the Read Group information (only necessary if using fasta/fastq)" , shortName = "sn", required=false)
+  var sample: String = "NA"
+
+  @Input(doc="The path to the binary of bwa to align fasta/fastq files", fullName="path_to_bwa", shortName="bwa", required=false)
+  var bwaPath: File = _
+
+  @Hidden
+  @Input(doc="The default base qualities to use before recalibration. Default is Q20 (should be good for every dataset)." , shortName = "dbq", required=false)
+  var dbq: Int = 20
+
+
+
+
+
+  val queueLogDir: String = ".qlog/"
+
+  def script = {
+
+    val fileList: List[File] = QScriptUtils.createListFromFile(input)
+
+    for (file: File <- fileList) {
+
+      var USE_BWA: Boolean = false
+
+      println("DEBUG: processing " + file + "\nDEBUG: name -- " + file.getName)
+
+      if (file.endsWith(".fasta") || file.endsWith(".fq")) {
+        if (bwaPath == null) {
+          throw new UserException("You provided a fasta/fastq file but didn't provide the path for BWA");
+        }
+        USE_BWA = true
+      }
+
+      // FASTA -> BAM steps
+      val alignedSam: File = file.getName + ".aligned.sam"
+      val sortedBam: File = swapExt(alignedSam, ".sam", ".bam")
+      val qualBam: File = swapExt(sortedBam, ".bam", ".q.bam")
+      val rgBam: File = swapExt(file, ".bam", ".rg.bam")
+
+      val bamBase = if (USE_BWA) {rgBam} else {file}
+
+      // BAM Steps
+      val recalFile1: File = swapExt(bamBase, ".bam", ".recal1.csv")
+      val recalFile2: File = swapExt(bamBase, ".bam", ".recal2.csv")
+      val recalBam: File   = swapExt(bamBase, ".bam", ".recal.bam")
+      val path1: String    = recalBam + ".before"
+      val path2: String    = recalBam + ".after"
+
+
+      if (USE_BWA) {
+        add(align(file, alignedSam),
+            sortSam(alignedSam, sortedBam),
+            addQuals(sortedBam, qualBam, dbq),
+            addReadGroup(qualBam, rgBam, sample))
+      }
+
+      add(cov(bamBase, recalFile1),
+          recal(bamBase, recalFile1, recalBam),
+          cov(recalBam, recalFile2),
+          analyzeCovariates(recalFile1, path1),
+          analyzeCovariates(recalFile2, path2))
+    }
+  }
+
+
+  // General arguments to non-GATK tools
+  trait ExternalCommonArgs extends CommandLineFunction {
+    this.memoryLimit = 4
+    this.isIntermediate = true
+  }
+
+  trait CommandLineGATKArgs extends CommandLineGATK with ExternalCommonArgs {
+    this.reference_sequence = reference
+  }
+
+
+  case class align(@Input inFastq: File, @Output outSam: File) extends ExternalCommonArgs {
+    def commandLine = bwaPath + " bwasw -b5 -q2 -r1 -z10 -t8 " + reference + " " + inFastq + " > " + outSam
+    this.analysisName = queueLogDir + outSam + ".bwa_sam_se"
+    this.jobName = queueLogDir + outSam + ".bwa_sam_se"
+  }
+
+  case class sortSam (@Input inSam: File, @Output outBam: File) extends SortSam with ExternalCommonArgs {
+    @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
+    this.input = List(inSam)
+    this.output = outBam
+    this.sortOrder = SortOrder.coordinate
+    this.analysisName = queueLogDir + outBam + ".sortSam"
+    this.jobName = queueLogDir + outBam + ".sortSam"
+  }
+
+  case class addQuals(inBam: File, outBam: File, qual: Int) extends PrintReads with CommandLineGATKArgs {
+    this.input_file :+= inBam
+    this.out = outBam
+    this.DBQ = qual
+  }
+
+  case class addReadGroup (inBam: File, outBam: File, sample: String) extends AddOrReplaceReadGroups with ExternalCommonArgs {
+    @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
+    this.input = List(inBam)
+    this.output = outBam
+    this.RGID = "1"
+    this.RGCN = "BI"
+    this.RGPL = "PacBio_RS"
+    this.RGSM = sample
+    this.RGLB = "default_library"
+    this.RGPU = "default_pu"
+    this.analysisName = queueLogDir + outBam + ".rg"
+    this.jobName = queueLogDir + outBam + ".rg"
+  }
+
+  case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
+    this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
+    this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
+    this.input_file :+= inBam
+    this.recal_file = outRecalFile
+    this.analysisName = queueLogDir + outRecalFile + ".covariates"
+    this.jobName = queueLogDir + outRecalFile + ".covariates"
+    this.scatterCount = threads
+  }
+
+  case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
+    this.input_file :+= inBam
+    this.recal_file = inRecalFile
+    this.out = outBam
+    this.isIntermediate = false
+    this.analysisName = queueLogDir + outBam + ".recalibration"
+    this.jobName = queueLogDir + outBam + ".recalibration"
+    this.scatterCount = threads
+  }
+
+  case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
+    this.resources = R
+    this.recal_file = inRecalFile
+    this.output_dir = outPath
+    this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
+    this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
+  }
+}
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala
deleted file mode 100755
index cbe53db8d..000000000
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala
+++ /dev/null
@@ -1,90 +0,0 @@
-package org.broadinstitute.sting.queue.qscripts
-
-import org.broadinstitute.sting.queue.QScript
-import org.broadinstitute.sting.queue.extensions.gatk._
-import org.broadinstitute.sting.queue.util.QScriptUtils
-
-/**
- * Created by IntelliJ IDEA.
- * User: carneiro
- * Date: 4/20/11
- * Time: 16:29 PM
- */
-
-
-class RecalibrateBaseQualities extends QScript {
-
-  @Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=true)
-  var GATKjar: File = _
-
-  @Input(doc="input BAM file - or list of BAM files", shortName="i", required=true)
-  var input: File = _
-
-  @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true)
-  var R: String = _
-
-  @Input(doc="Reference fasta file", shortName="R", required=true)
-  var reference: File = _ // new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
-
-  @Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=true)
-  var dbSNP: File = _     // new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
-
-  val queueLogDir: String = ".qlog/"
-  var nContigs: Int = 0
-
-  def script = {
-
-    val bamList = QScriptUtils.createListFromFile(input)
-    nContigs = QScriptUtils.getNumberOfContigs(bamList(0))
-
-    for (bam <- bamList) {
-
-      val recalFile1: File = swapExt(bam, ".bam", ".recal1.csv")
-      val recalFile2: File = swapExt(bam, ".bam", ".recal2.csv")
-      val recalBam: File   = swapExt(bam, ".bam", ".recal.bam")
-      val path1: String    = recalBam + ".before"
-      val path2: String    = recalBam + ".after"
-
-      add(cov(bam, recalFile1),
-          recal(bam, recalFile1, recalBam),
-          cov(recalBam, recalFile2),
-          analyzeCovariates(recalFile1, path1),
-          analyzeCovariates(recalFile2, path2))
-    }
-  }
-
-  trait CommandLineGATKArgs extends CommandLineGATK {
-    this.jarFile = GATKjar
-    this.reference_sequence = reference
-    this.memoryLimit = 4
-    this.isIntermediate = true
-  }
-
-  case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
-    this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
-    this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
-    this.input_file :+= inBam
-    this.recal_file = outRecalFile
-    this.analysisName = queueLogDir + outRecalFile + ".covariates"
-    this.jobName = queueLogDir + outRecalFile + ".covariates"
-    this.scatterCount = nContigs
-  }
-
-  case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
-    this.input_file :+= inBam
-    this.recal_file = inRecalFile
-    this.out = outBam
-    this.isIntermediate = false
-    this.analysisName = queueLogDir + outBam + ".recalibration"
-    this.jobName = queueLogDir + outBam + ".recalibration"
-    this.scatterCount = nContigs
-  }
-
-  case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
-    this.resources = R
-    this.recal_file = inRecalFile
-    this.output_dir = outPath
-    this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
-    this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
-  }
-}
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala
index 4a93233eb..1d473b210 100644
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala
@@ -59,10 +59,10 @@ class ExampleUnifiedGenotyper extends QScript {
     evalUnfiltered.rodBind :+= RodBind("eval", "VCF", genotyper.out)
     evalUnfiltered.out = swapExt(genotyper.out, "vcf", "eval")
 
-    variantFilter.rodBind :+= RodBind("vcf", "VCF", genotyper.out)
+    variantFilter.rodBind :+= RodBind("variant", "VCF", genotyper.out)
     variantFilter.out = swapExt(qscript.bamFile, "bam", "filtered.vcf")
     variantFilter.filterName = filterNames
-    variantFilter.filterExpression = filterExpressions
+    variantFilter.filterExpression = filterExpressions.map("\"" + _ + "\"")
 
     evalFiltered.rodBind :+= RodBind("eval", "VCF", variantFilter.out)
     evalFiltered.out = swapExt(variantFilter.out, "vcf", "eval")
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala
index 128d8773c..03f9d3315 100755
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala
@@ -52,7 +52,7 @@ class ShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRu
 
     updateStatus(RunnerStatus.RUNNING)
     job.run()
-    updateStatus(RunnerStatus.FAILED)
+    updateStatus(RunnerStatus.DONE)
   }
 
   override def checkUnknownStatus() {}
diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala
index 27e186585..d70022147 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala
@@ -43,8 +43,7 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
     this.intervals = this.originalGATK.intervals
     this.intervalsString = this.originalGATK.intervalsString
 
-    this.rodBind = this.gatherParts.zipWithIndex map { case (input, index) => new RodBind("input"+index, "VCF", input) }
-    this.rod_priority_list = (0 until this.gatherParts.size).map("input"+_).mkString(",")
+    this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) }
     this.out = this.originalOutput
     this.assumeIdenticalSamples = true
 
diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala
index 99aaa9474..12bd880d8 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala
@@ -22,15 +22,15 @@ object QScriptUtils {
    * to have empty lines and comment lines (lines starting with #).
    */
   def createListFromFile(in: File):List[File] = {
-    // If the file provided ends with .bam, it is not a bam list, we treat it as a single file.
+    // If the file provided ends with .bam, .fasta or .fq, it is not a bam list, we treat it as a single file.
     // and return a list with only this file.
-    if (in.toString.endsWith(".bam"))
+    if (in.toString.endsWith(".bam") || in.toString.endsWith(".fasta") || in.toString.endsWith(".fq"))
       return List(in)
 
     var list: List[File] = List()
-    for (bam <- fromFile(in).getLines)
-      if (!bam.startsWith("#") && !bam.isEmpty )
-        list :+= new File(bam.trim())
+    for (file <- fromFile(in).getLines)
+      if (!file.startsWith("#") && !file.isEmpty )
+        list :+= new File(file.trim())
     list.sortWith(_.compareTo(_) < 0)
   }
 
diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala b/public/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala
index 40a296022..58341a0a5 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala
@@ -4,6 +4,7 @@ import collection.JavaConversions._
 import org.broadinstitute.sting.queue.QException
 import java.lang.Class
 import org.broadinstitute.sting.commandline.{ArgumentMatches, ArgumentSource, ArgumentTypeDescriptor, ParsingEngine}
+import java.lang.reflect.Type
 
 /**
  * An ArgumentTypeDescriptor that can parse the scala collections.
@@ -42,6 +43,10 @@ class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
    * @param argumentMatches The argument match strings that were found for this argument source.
    * @return The parsed object.
    */
+  def parse(parsingEngine: ParsingEngine, source: ArgumentSource, typeType: Type, argumentMatches: ArgumentMatches) = {
+    parse(parsingEngine,source, makeRawTypeIfNecessary(typeType), argumentMatches)
+  }
+  
   def parse(parsingEngine: ParsingEngine, source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = {
     val componentType = ReflectionUtils.getCollectionType(source.field)
     val componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType)
diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala
index c2c956118..27ac559c5 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala
@@ -34,8 +34,8 @@ import org.broadinstitute.sting.BaseTest
 import org.broadinstitute.sting.MD5DB
 import org.broadinstitute.sting.queue.QCommandLine
 import org.broadinstitute.sting.queue.util.{Logging, ProcessController}
-import java.io.{FileNotFoundException, File}
-import org.broadinstitute.sting.gatk.report.GATKReportParser
+import java.io.File
+import org.broadinstitute.sting.gatk.report.GATKReport
 import org.apache.commons.io.FileUtils
 import org.broadinstitute.sting.queue.engine.CommandLinePluginManager
 
@@ -118,12 +118,11 @@ object PipelineTest extends BaseTest with Logging {
     // write the report to the shared validation data location
     val formatter = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss")
     val reportLocation = "%s%s/%s/validation.%s.eval".format(validationReportsDataLocation, jobRunner, name, formatter.format(new Date))
-    val report = new File(reportLocation)
+    val reportFile = new File(reportLocation)
 
-    FileUtils.copyFile(new File(runDir(name, jobRunner) + evalSpec.evalReport), report);
+    FileUtils.copyFile(new File(runDir(name, jobRunner) + evalSpec.evalReport), reportFile);
 
-    val parser = new GATKReportParser
-    parser.parse(report)
+    val report = new GATKReport(reportFile);
 
     var allInRange = true
 
@@ -131,7 +130,9 @@ object PipelineTest extends BaseTest with Logging {
     println(name + " validation values:")
     println("    value (min,target,max) table key metric")
     for (validation <- evalSpec.validations) {
-      val value = parser.getValue(validation.table, validation.key, validation.metric)
+      val table = report.getTable(validation.table)
+      val key = table.getPrimaryKey(validation.key)
+      val value = String.valueOf(table.get(key, validation.metric))
       val inRange = if (value == null) false else validation.inRange(value)
       val flag = if (!inRange) "*" else " "
       println("  %s %s (%s,%s,%s) %s %s %s".format(flag, value, validation.min, validation.target, validation.max, validation.table, validation.key, validation.metric))
diff --git a/settings/helpTemplates/common.html b/settings/helpTemplates/common.html
index ebc060d0a..1554a1d40 100644
--- a/settings/helpTemplates/common.html
+++ b/settings/helpTemplates/common.html
@@ -6,10 +6,10 @@
 
 
 <#macro headerInfo>
+
+
+<#macro footerInfo>
     

See also Main index | GATK wiki | GATK support forum

GATK version ${version} built at ${timestamp}.

-<#macro footerInfo> - - diff --git a/settings/helpTemplates/generic.template.html b/settings/helpTemplates/generic.template.html index ca0d1e76f..032407164 100644 --- a/settings/helpTemplates/generic.template.html +++ b/settings/helpTemplates/generic.template.html @@ -53,19 +53,18 @@

${name}

<@headerInfo /> -

Brief summary

- ${summary} +

${summary}

<#if author??>

Author

${author} -

Detailed description

+

Introduction

${description} <#-- Create the argument summary --> <#if arguments.all?size != 0>
-

Feature specific arguments

+

${name} specific arguments

diff --git a/settings/helpTemplates/style.css b/settings/helpTemplates/style.css index 79f409f55..1d7bcc576 100644 --- a/settings/helpTemplates/style.css +++ b/settings/helpTemplates/style.css @@ -14,29 +14,67 @@ p, ul, ol, dl, dt, dd, td font-size: 12pt; } -p.version, p.see-also +p { - font-size: 8pt; + margin-left: 1em; } -h1, h2, h3 +p.summary +{ + margin-left: 2em; + margin-top: -20pt; + font-style: italic; +} + +p.see-also +{ + font-size: 10pt; + margin-left: 0em; + margin-top: 3em; + text-align: center; +} + +p.version +{ + font-size: 8pt; + margin-left: 0em; + margin-top: -8pt; + text-align: center; +} + + +h1, h2, h3, h4 { font-family: Corbel, Arial, Helvetica, Sans-Serif; font-weight: bold; text-align: left; - color: #669; } h1 { font-size: 32pt; letter-spacing: -2px; + color: #669; } -h3 +h2 { - font-size: 16pt; - font-weight: normal; + font-size: 16pt; + font-weight: bold; + margin-top: 2em; + color: #669; +} + +h3 +{ + font-size: 12pt; + margin-left: 1em; + color: #000; +} + +hr +{ + margin-top: 4em; } /*