diff --git a/build.xml b/build.xml index fe4c7a3f4..438e9c90c 100644 --- a/build.xml +++ b/build.xml @@ -79,6 +79,17 @@ + + + + + + + + + + + @@ -457,6 +468,26 @@ + + + + + + + + + + + + + + + @@ -489,6 +520,8 @@ + + @@ -957,6 +990,12 @@ + + @@ -1050,7 +1089,7 @@ - + diff --git a/ivy.xml b/ivy.xml index 3f3d1c97f..a394aa6d7 100644 --- a/ivy.xml +++ b/ivy.xml @@ -30,6 +30,9 @@ + + + diff --git a/public/R/src/gsalib/DESCRIPTION b/public/R/src/gsalib/DESCRIPTION new file mode 100644 index 000000000..6116e8c66 --- /dev/null +++ b/public/R/src/gsalib/DESCRIPTION @@ -0,0 +1,10 @@ +Package: gsalib +Type: Package +Title: Utility functions +Version: 1.0 +Date: 2010-10-02 +Author: Kiran Garimella +Maintainer: Kiran Garimella +Description: Utility functions for GATK NGS analyses +License: BSD +LazyLoad: yes diff --git a/public/R/src/gsalib/R/gsa.error.R b/public/R/src/gsalib/R/gsa.error.R new file mode 100644 index 000000000..1c6a56046 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.error.R @@ -0,0 +1,12 @@ +gsa.error <- function(message) { + message(""); + gsa.message("Error: **********"); + gsa.message(sprintf("Error: %s", message)); + gsa.message("Error: **********"); + message(""); + + traceback(); + + message(""); + stop(message, call. = FALSE); +} diff --git a/public/R/src/gsalib/R/gsa.getargs.R b/public/R/src/gsalib/R/gsa.getargs.R new file mode 100644 index 000000000..94613bf93 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.getargs.R @@ -0,0 +1,116 @@ +.gsa.getargs.usage <- function(argspec, doc) { + cargs = commandArgs(); + + usage = "Usage:"; + + fileIndex = grep("--file=", cargs); + if (length(fileIndex) > 0) { + progname = gsub("--file=", "", cargs[fileIndex[1]]); + + usage = sprintf("Usage: Rscript %s [arguments]", progname); + + if (!is.na(doc)) { + message(sprintf("%s: %s\n", progname, doc)); + } + } + + message(usage); + + for (argname in names(argspec)) { + key = argname; + defaultValue = 0; + doc = ""; + + if (is.list(argspec[[argname]])) { + defaultValue = argspec[[argname]]$value; + doc = argspec[[argname]]$doc; + } + + message(sprintf(" -%-10s\t[default: %s]\t%s", key, defaultValue, doc)); + } + + message(""); + + stop(call. = FALSE); +} + +gsa.getargs <- function(argspec, doc = NA) { + argsenv = new.env(); + + for (argname in names(argspec)) { + value = 0; + if (is.list(argspec[[argname]])) { + value = argspec[[argname]]$value; + } else { + value = argspec[[argname]]; + } + + assign(argname, value, envir=argsenv); + } + + if (interactive()) { + for (argname in names(argspec)) { + value = get(argname, envir=argsenv); + + if (is.na(value) | is.null(value)) { + if (exists("cmdargs")) { + assign(argname, cmdargs[[argname]], envir=argsenv); + } else { + assign(argname, readline(sprintf("Please enter a value for '%s': ", argname)), envir=argsenv); + } + } else { + assign(argname, value, envir=argsenv); + } + } + } else { + cargs = commandArgs(TRUE); + + if (length(cargs) == 0) { + .gsa.getargs.usage(argspec, doc); + } + + for (i in 1:length(cargs)) { + if (length(grep("^-", cargs[i], ignore.case=TRUE)) > 0) { + key = gsub("-", "", cargs[i]); + value = cargs[i+1]; + + if (key == "h" | key == "help") { + .gsa.getargs.usage(argspec, doc); + } + + if (length(grep("^[\\d\\.e\\+\\-]+$", value, perl=TRUE, ignore.case=TRUE)) > 0) { + value = as.numeric(value); + } + + assign(key, value, envir=argsenv); + } + } + } + + args = as.list(argsenv); + + isMissingArgs = 0; + missingArgs = c(); + + for (arg in names(argspec)) { + if (is.na(args[[arg]]) | is.null(args[[arg]])) { + gsa.warn(sprintf("Value for required argument '-%s' was not specified", arg)); + + isMissingArgs = 1; + missingArgs = c(missingArgs, arg); + } + } + + if (isMissingArgs) { + gsa.error( + paste( + "Missing required arguments: -", + paste(missingArgs, collapse=" -"), + ". Specify -h or -help to this script for a list of available arguments.", + sep="" + ) + ); + } + + args; +} diff --git a/public/R/src/gsalib/R/gsa.message.R b/public/R/src/gsalib/R/gsa.message.R new file mode 100644 index 000000000..a2b909d3d --- /dev/null +++ b/public/R/src/gsalib/R/gsa.message.R @@ -0,0 +1,3 @@ +gsa.message <- function(message) { + message(sprintf("[gsalib] %s", message)); +} diff --git a/public/R/src/gsalib/R/gsa.plot.venn.R b/public/R/src/gsalib/R/gsa.plot.venn.R new file mode 100644 index 000000000..b1353ccc1 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.plot.venn.R @@ -0,0 +1,50 @@ +gsa.plot.venn <- +function(a, b, c=0, a_and_b, a_and_c=0, b_and_c=0, + col=c("#FF6342", "#63C6DE", "#ADDE63"), + pos=c(0.20, 0.20, 0.80, 0.82), + debug=0 + ) { + library(png); + library(graphics); + + # Set up properties + for (i in 1:length(col)) { + rgbcol = col2rgb(col[i]); + col[i] = sprintf("%02X%02X%02X", rgbcol[1], rgbcol[2], rgbcol[3]); + } + + chco = paste(col[1], col[2], col[3], sep=","); + chd = paste(a, b, c, a_and_b, a_and_c, b_and_c, sep=","); + + props = c( + 'cht=v', + 'chs=525x525', + 'chds=0,10000000000', + paste('chco=', chco, sep=""), + paste('chd=t:', chd, sep="") + ); + proplist = paste(props[1], props[2], props[3], props[4], props[5], sep='&'); + + # Get the venn diagram (as a temporary file) + filename = tempfile("venn"); + cmd = paste("wget -O ", filename, " 'http://chart.apis.google.com/chart?", proplist, "' > /dev/null 2>&1", sep=""); + + if (debug == 1) { + print(cmd); + } + system(cmd); + + # Render the temp png file into a plotting frame + a = readPNG(filename); + + plot(0, 0, type="n", xaxt="n", yaxt="n", bty="n", xlim=c(0, 1), ylim=c(0, 1), xlab="", ylab=""); + if (c == 0 || a >= b) { + rasterImage(a, pos[1], pos[2], pos[3], pos[4]); + } else { + rasterImage(a, 0.37+pos[1], 0.37+pos[2], 0.37+pos[3], 0.37+pos[4], angle=180); + } + + # Clean up! + unlink(filename); +} + diff --git a/public/R/src/gsalib/R/gsa.read.eval.R b/public/R/src/gsalib/R/gsa.read.eval.R new file mode 100644 index 000000000..f1d49092b --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.eval.R @@ -0,0 +1,83 @@ +.gsa.attemptToLoadFile <- function(filename) { + file = NA; + + if (file.exists(filename) & file.info(filename)$size > 500) { + file = read.csv(filename, header=TRUE, comment.char="#"); + } + + file; +} + +gsa.read.eval <- +function(evalRoot) { + fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep=""); + fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep=""); + fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep=""); + fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep=""); + fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep=""); + fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep=""); + fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep=""); + fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep=""); + fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep=""); + fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep=""); + fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep=""); + fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep=""); + fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep=""); + fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep=""); + + eval = list( + AlleleCountStats = NA, + CompOverlap = NA, + CountVariants = NA, + GenotypeConcordance = NA, + MetricsByAc = NA, + MetricsBySample = NA, + Quality_Metrics_by_allele_count = NA, + QualityScoreHistogram = NA, + SampleStatistics = NA, + SampleSummaryStatistics = NA, + SimpleMetricsBySample = NA, + TiTv = NA, + TiTvStats = NA, + Variant_Quality_Score = NA, + + CallsetNames = c(), + CallsetOnlyNames = c(), + CallsetFilteredNames = c() + ); + + eval$AlleleCountStats = .gsa.attemptToLoadFile(fileAlleleCountStats); + eval$CompOverlap = .gsa.attemptToLoadFile(fileCompOverlap); + eval$CountVariants = .gsa.attemptToLoadFile(fileCountVariants); + eval$GenotypeConcordance = .gsa.attemptToLoadFile(fileGenotypeConcordance); + eval$MetricsByAc = .gsa.attemptToLoadFile(fileMetricsByAc); + eval$MetricsBySample = .gsa.attemptToLoadFile(fileMetricsBySample); + eval$Quality_Metrics_by_allele_count = .gsa.attemptToLoadFile(fileQuality_Metrics_by_allele_count); + eval$QualityScoreHistogram = .gsa.attemptToLoadFile(fileQualityScoreHistogram); + eval$SampleStatistics = .gsa.attemptToLoadFile(fileSampleStatistics); + eval$SampleSummaryStatistics = .gsa.attemptToLoadFile(fileSampleSummaryStatistics); + eval$SimpleMetricsBySample = .gsa.attemptToLoadFile(fileSimpleMetricsBySample); + eval$TiTv = .gsa.attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator); + eval$TiTvStats = .gsa.attemptToLoadFile(fileTiTvStats); + eval$Variant_Quality_Score = .gsa.attemptToLoadFile(fileVariant_Quality_Score); + + uniqueJexlExpressions = unique(eval$TiTv$jexl_expression); + eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]); + eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames)); + eval$CallsetFilteredNames = as.vector(c( + paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), sep=""), + paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), sep="")) + ); + + if (!(eval$CallsetFilteredNames[1] %in% unique(eval$TiTv$jexl_expression))) { + eval$CallsetFilteredNames[1] = paste("In", eval$CallsetNames[1], "-FilteredIn", eval$CallsetNames[2], sep=""); + } + + if (!(eval$CallsetFilteredNames[2] %in% unique(eval$TiTv$jexl_expression))) { + eval$CallsetFilteredNames[2] = paste("In", eval$CallsetNames[2], "-FilteredIn", eval$CallsetNames[1], sep=""); + #eval$CallsetFilteredNames[2] = paste(gsub("^(\\w)", "In", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In", eval$CallsetNames[1], perl=TRUE), sep=""); + } + + eval; +} + diff --git a/public/R/src/gsalib/R/gsa.read.gatkreport.R b/public/R/src/gsalib/R/gsa.read.gatkreport.R new file mode 100644 index 000000000..9b3ef1ad1 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.gatkreport.R @@ -0,0 +1,64 @@ +# Load a table into the specified environment. Make sure that each new table gets a unique name (this allows one to cat a bunch of tables with the same name together and load them into R without each table overwriting the last. +.gsa.assignGATKTableToEnvironment <- function(tableName, tableHeader, tableRows, tableEnv) { + d = data.frame(tableRows, row.names=NULL, stringsAsFactors=FALSE); + colnames(d) = tableHeader; + + for (i in 1:ncol(d)) { + v = suppressWarnings(as.numeric(d[,i])); + + if (length(na.omit(as.numeric(v))) == length(d[,i])) { + d[,i] = v; + } + } + + usedNames = ls(envir=tableEnv, pattern=tableName); + + if (length(usedNames) > 0) { + tableName = paste(tableName, ".", length(usedNames), sep=""); + } + + assign(tableName, d, envir=tableEnv); +} + +# Load all GATKReport tables from a file +gsa.read.gatkreport <- function(filename) { + con = file(filename, "r", blocking = TRUE); + lines = readLines(con); + close(con); + + tableEnv = new.env(); + + tableName = NA; + tableHeader = c(); + tableRows = c(); + + for (line in lines) { + if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) { + headerFields = unlist(strsplit(line, "[[:space:]]+")); + + if (!is.na(tableName)) { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + } + + tableName = headerFields[2]; + tableHeader = c(); + tableRows = c(); + } else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) { + # do nothing + } else if (!is.na(tableName)) { + row = unlist(strsplit(line, "[[:space:]]+")); + + if (length(tableHeader) == 0) { + tableHeader = row; + } else { + tableRows = rbind(tableRows, row); + } + } + } + + if (!is.na(tableName)) { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + } + + gatkreport = as.list(tableEnv); +} diff --git a/public/R/src/gsalib/R/gsa.read.squidmetrics.R b/public/R/src/gsalib/R/gsa.read.squidmetrics.R new file mode 100644 index 000000000..39fa1ad32 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.squidmetrics.R @@ -0,0 +1,28 @@ +gsa.read.squidmetrics = function(project, bylane = FALSE) { + suppressMessages(library(ROracle)); + + drv = dbDriver("Oracle"); + con = dbConnect(drv, "REPORTING/REPORTING@ora01:1521/SEQPROD"); + + if (bylane) { + statement = paste("SELECT * FROM ILLUMINA_PICARD_METRICS WHERE \"Project\" = '", project, "'", sep=""); + print(statement); + + rs = dbSendQuery(con, statement = statement); + d = fetch(rs, n=-1); + dbHasCompleted(rs); + dbClearResult(rs); + } else { + statement = paste("SELECT * FROM ILLUMINA_SAMPLE_STATUS_AGG WHERE \"Project\" = '", project, "'", sep=""); + print(statement); + + rs = dbSendQuery(con, statement = statement); + d = fetch(rs, n=-1); + dbHasCompleted(rs); + dbClearResult(rs); + } + + oraCloseDriver(drv); + + subset(d, Project == project); +} diff --git a/public/R/src/gsalib/R/gsa.read.vcf.R b/public/R/src/gsalib/R/gsa.read.vcf.R new file mode 100644 index 000000000..5beb6455d --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.vcf.R @@ -0,0 +1,23 @@ +gsa.read.vcf <- function(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE) { + headers = readLines(vcffile, n=100); + headerline = headers[grep("#CHROM", headers)]; + header = unlist(strsplit(gsub("#", "", headerline), "\t")) + + d = read.table(vcffile, header=FALSE, skip=skip, nrows=nrows, stringsAsFactors=FALSE); + colnames(d) = header; + + if (expandGenotypeFields) { + columns = ncol(d); + + offset = columns + 1; + for (sampleIndex in 10:columns) { + gt = unlist(lapply(strsplit(d[,sampleIndex], ":"), function(x) x[1])); + d[,offset] = gt; + colnames(d)[offset] = sprintf("%s.GT", colnames(d)[sampleIndex]); + + offset = offset + 1; + } + } + + return(d); +} diff --git a/public/R/src/gsalib/R/gsa.warn.R b/public/R/src/gsalib/R/gsa.warn.R new file mode 100644 index 000000000..7ee08ce65 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.warn.R @@ -0,0 +1,3 @@ +gsa.warn <- function(message) { + gsa.message(sprintf("Warning: %s", message)); +} diff --git a/public/R/src/gsalib/Read-and-delete-me b/public/R/src/gsalib/Read-and-delete-me new file mode 100644 index 000000000..d04323a6e --- /dev/null +++ b/public/R/src/gsalib/Read-and-delete-me @@ -0,0 +1,9 @@ +* Edit the help file skeletons in 'man', possibly combining help files + for multiple functions. +* Put any C/C++/Fortran code in 'src'. +* If you have compiled code, add a .First.lib() function in 'R' to load + the shared library. +* Run R CMD build to build the package tarball. +* Run R CMD check to check the package tarball. + +Read "Writing R Extensions" for more information. diff --git a/public/R/src/gsalib/data/tearsheetdrop.jpg b/public/R/src/gsalib/data/tearsheetdrop.jpg new file mode 100755 index 000000000..c9d480fa0 Binary files /dev/null and b/public/R/src/gsalib/data/tearsheetdrop.jpg differ diff --git a/public/R/src/gsalib/man/gsa.error.Rd b/public/R/src/gsalib/man/gsa.error.Rd new file mode 100644 index 000000000..df7c0cbde --- /dev/null +++ b/public/R/src/gsalib/man/gsa.error.Rd @@ -0,0 +1,49 @@ +\name{gsa.error} +\alias{gsa.error} +\title{ +GSA error +} +\description{ +Write an error message to standard out with the prefix '[gsalib] Error:', print a traceback, and exit. +} +\usage{ +gsa.error(message) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{message}{ +The error message to write. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +%% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +gsa.error("This is a message"); +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsa.getargs.Rd b/public/R/src/gsalib/man/gsa.getargs.Rd new file mode 100644 index 000000000..27aa1b05a --- /dev/null +++ b/public/R/src/gsalib/man/gsa.getargs.Rd @@ -0,0 +1,57 @@ +\name{gsa.getargs} +\alias{gsa.getargs} +\title{ +Get script arguments +} +\description{ +Get script arguments given a list object specifying arguments and documentation. Can be used in command-line or interactive mode. This is helpful when developing scripts in interactive mode that will eventually become command-line programs. If no arguments are specified or help is requested in command-line mode, the script will print out a usage statement with available arguments and exit. +} +\usage{ +gsa.getargs(argspec, doc = NA) +} +\arguments{ + \item{argspec}{ +A list object. Each key is an argument name. The value is another list object with a 'value' and 'doc' keys. For example: +\preformatted{argspec = list( + arg1 = list(value=10, doc="Info for optional arg1"), + arg2 = list(value=NA, doc="Info for required arg2") +); +} + +If the value provided is NA, the argument is considered required and must be specified when the script is invoked. For command-line mode, this means the argument must be specified on the command-line. In interactive mode, there are two ways of specifying these arguments. First, if a properly formatted list argument called 'cmdargs' is present in the current environment (i.e. the object returned by gsa.getargs() from a previous invocation), the value is taken from this object. Otherwise, the argument is prompted for. +} + + \item{doc}{ +An optional string succinctly documenting the purpose of the script. +} +} +\details{ +Interactive scripts typically make use of hardcoded filepaths and parameter settings. This makes testing easy, but generalization to non-interactive mode more difficult. This utility provides a mechanism for writing scripts that work properly in both interactive and command-line modes. + +To use this method, specify a list with key-value pairs representing the arguments as specified above. In command-line mode, if no arguments are specified or the user specifies '-h' or '-help' anywhere on the command string, a help message indicating available arguments, their default values, and some documentation about the argument are provided. +} +\value{ +Returns a list with keys matching the argspec and values representing the specified arguments. + +\item{arg1 }{Value for argument 1} +\item{arg2 }{Value for argument 2} +...etc. +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\examples{ +argspec = list( + file = list(value="/my/test.vcf", doc="VCF file"), + verbose = list(value=0, doc="If 1, set verbose mode"), + test2 = list(value=2.3e9, doc="Another argument that does stuff") +); + +cmdargs = gsa.getargs(argspec, doc="My test program"); + +print(cmdargs$file); # will print '[1] "/my/test.vcf"' +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.message.Rd b/public/R/src/gsalib/man/gsa.message.Rd new file mode 100644 index 000000000..9752de8a9 --- /dev/null +++ b/public/R/src/gsalib/man/gsa.message.Rd @@ -0,0 +1,44 @@ +\name{gsa.message} +\alias{gsa.message} +\title{ +GSA message +} +\description{ +Write a message to standard out with the prefix '[gsalib]'. +} +\usage{ +gsa.message(message) +} +\arguments{ + \item{message}{ +The message to write. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +## Write message to stdout +gsa.message("This is a message"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.plot.venn.Rd b/public/R/src/gsalib/man/gsa.plot.venn.Rd new file mode 100644 index 000000000..bf4feb5bc --- /dev/null +++ b/public/R/src/gsalib/man/gsa.plot.venn.Rd @@ -0,0 +1,75 @@ +\name{gsa.plot.venn} +\alias{gsa.plot.venn} +\title{ +Plot a proportional venn diagram +} +\description{ +Plot a proportional venn diagram (two or three-way venns allowed) +} +\usage{ +gsa.plot.venn(a, b, c = 0, a_and_b, a_and_c = 0, b_and_c = 0, col = c("#FF6342", "#63C6DE", "#ADDE63"), pos = c(0.2, 0.2, 0.8, 0.82), debug = 0) +} +\arguments{ + \item{a}{ +size of 'a' circle +} + \item{b}{ +size of 'b' circle +} + \item{c}{ +size of 'c' circle +} + \item{a_and_b}{ +size of a and b overlap +} + \item{a_and_c}{ +size of a and c overlap +} + \item{b_and_c}{ +size of b and c overlap +} + \item{col}{ +vector of colors for each venn piece +} + \item{pos}{ +vector of positional elements +} + \item{debug}{ +if 1, set debug mode and print useful information +} +} +\details{ +Plots a two-way or three-way proportional Venn diagram. Internally, this method uses the Google Chart API to generate the diagram, then renders it into the plot window where it can be annotated in interesting ways. +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +%% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +## Plot a two-way Venn diagram +gsa.plot.venn(1000, 750, 0, 400); + +## Plot a three-way Venn diagram +gsa.plot.venn(1000, 750, 900, 400, 650, 500); +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsa.read.eval.Rd b/public/R/src/gsalib/man/gsa.read.eval.Rd new file mode 100644 index 000000000..0e2baba73 --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.eval.Rd @@ -0,0 +1,111 @@ +\name{gsa.read.eval} +\alias{gsa.read.eval} +\title{ +Read a VariantEval file +} +\description{ +Read a VariantEval file that's output in R format. +} +\usage{ +gsa.read.eval(evalRoot) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{evalRoot}{ +%% ~~Describe \code{evalRoot} here~~ +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +%% ~~who you are~~ +} +\note{ +%% ~~further notes~~ +} + +%% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +##---- Should be DIRECTLY executable !! ---- +##-- ==> Define data, use random, +##-- or do help(data=index) for the standard data sets. + +## The function is currently defined as +function(evalRoot) { + fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep=""); + fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep=""); + fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep=""); + fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep=""); + fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep=""); + fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep=""); + fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep=""); + fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep=""); + fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep=""); + fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep=""); + fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep=""); + fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep=""); + fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep=""); + fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep=""); + + eval = list( + AlleleCountStats = NA, + CompOverlap = NA, + CountVariants = NA, + GenotypeConcordance = NA, + MetricsByAc = NA, + MetricsBySample = NA, + Quality_Metrics_by_allele_count = NA, + QualityScoreHistogram = NA, + SampleStatistics = NA, + SampleSummaryStatistics = NA, + SimpleMetricsBySample = NA, + TiTv = NA, + TiTvStats = NA, + Variant_Quality_Score = NA, + + CallsetNames = c(), + CallsetOnlyNames = c(), + CallsetFilteredNames = c() + ); + + eval$AlleleCountStats = .attemptToLoadFile(fileAlleleCountStats); + eval$CompOverlap = .attemptToLoadFile(fileCompOverlap); + eval$CountVariants = .attemptToLoadFile(fileCountVariants); + eval$GenotypeConcordance = .attemptToLoadFile(fileGenotypeConcordance); + eval$MetricsByAc = .attemptToLoadFile(fileMetricsByAc); + eval$MetricsBySample = .attemptToLoadFile(fileMetricsBySample); + eval$Quality_Metrics_by_allele_count = .attemptToLoadFile(fileQuality_Metrics_by_allele_count); + eval$QualityScoreHistogram = .attemptToLoadFile(fileQualityScoreHistogram); + eval$SampleStatistics = .attemptToLoadFile(fileSampleStatistics); + eval$SampleSummaryStatistics = .attemptToLoadFile(fileSampleSummaryStatistics); + eval$SimpleMetricsBySample = .attemptToLoadFile(fileSimpleMetricsBySample); + eval$TiTv = .attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator); + eval$TiTvStats = .attemptToLoadFile(fileTiTvStats); + eval$Variant_Quality_Score = .attemptToLoadFile(fileVariant_Quality_Score); + + uniqueJexlExpressions = unique(eval$TiTv$jexl_expression); + eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]); + eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames)); + eval$CallsetFilteredNames = as.vector(c()); + eval; + } +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsa.read.gatkreport.Rd b/public/R/src/gsalib/man/gsa.read.gatkreport.Rd new file mode 100644 index 000000000..67c2c7b28 --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.gatkreport.Rd @@ -0,0 +1,55 @@ +\name{gsa.read.gatkreport} +\alias{gsa.read.gatkreport} +\title{ +gsa.read.gatkreport +} +\description{ +Reads a GATKReport file - a multi-table document - and loads each table as a separate data.frame object in a list. +} +\usage{ +gsa.read.gatkreport(filename) +} +\arguments{ + \item{filename}{ +The path to the GATKReport file. +} +} +\details{ +The GATKReport format replaces the multi-file output format used by many GATK tools and provides a single, consolidated file format. This format accomodates multiple tables and is still R-loadable - through this function. + +The file format looks like this: +\preformatted{##:GATKReport.v0.1 TableName : The description of the table +col1 col2 col3 +0 0.007451835696110506 25.474613284804366 +1 0.002362777171937477 29.844949954504095 +2 9.087604507451836E-4 32.87590975254731 +3 5.452562704471102E-4 34.498999090081895 +4 9.087604507451836E-4 35.14831665150137 +} + +} +\value{ +Returns a list object, where each key is the TableName and the value is the data.frame object with the contents of the table. If multiple tables with the same name exist, each one after the first will be given names of "TableName.v1", "TableName.v2", ..., "TableName.vN". +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +report = gsa.read.gatkreport("/path/to/my/output.gatkreport"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd b/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd new file mode 100644 index 000000000..0a8b37843 --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd @@ -0,0 +1,48 @@ +\name{gsa.read.squidmetrics} +\alias{gsa.read.squidmetrics} +\title{ +gsa.read.squidmetrics +} +\description{ +Reads metrics for a specified SQUID project into a dataframe. +} +\usage{ +gsa.read.squidmetrics("C315") +} +\arguments{ + \item{project}{ +The project for which metrics should be obtained. +} + \item{bylane}{ +If TRUE, obtains per-lane metrics rather than the default per-sample metrics. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +Returns a data frame with samples (or lanes) as the row and the metric as the column. +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +This method will only work within the Broad Institute internal network. +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +## Obtain metrics for project C315. +d = gsa.read.squidmetrics("C315"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.read.vcf.Rd b/public/R/src/gsalib/man/gsa.read.vcf.Rd new file mode 100644 index 000000000..cffd35e8f --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.vcf.Rd @@ -0,0 +1,53 @@ +\name{gsa.read.vcf} +\alias{gsa.read.vcf} +\title{ +gsa.read.vcf +} +\description{ +Reads a VCF file into a table. Optionally expands genotype columns into separate columns containing the genotype, separate from the other fields specified in the FORMAT field. +} +\usage{ +gsa.read.vcf(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE) +} +\arguments{ + \item{vcffile}{ +The path to the vcf file. +} + \item{skip}{ +The number of lines of the data file to skip before beginning to read data. +} + \item{nrows}{ +The maximum number of rows to read in. Negative and other invalid values are ignored. +} + \item{expandGenotypeFields}{ +If TRUE, adds an additional column per sample containing just the genotype. +} +} +\details{ +The VCF format is the standard variant call file format used in the GATK. This function reads that data in as a table for easy analysis. +} +\value{ +Returns a data.frame object, where each column corresponds to the columns in the VCF file. +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +vcf = gsa.read.vcf("/path/to/my/output.vcf"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.warn.Rd b/public/R/src/gsalib/man/gsa.warn.Rd new file mode 100644 index 000000000..0b9770b5c --- /dev/null +++ b/public/R/src/gsalib/man/gsa.warn.Rd @@ -0,0 +1,46 @@ +\name{gsa.warn} +\alias{gsa.warn} +\title{ +GSA warn +} +\description{ +Write a warning message to standard out with the prefix '[gsalib] Warning:'. +} +\usage{ +gsa.warn(message) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{message}{ +The warning message to write. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +## Write message to stdout +gsa.warn("This is a warning message"); +} +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsalib-package.Rd b/public/R/src/gsalib/man/gsalib-package.Rd new file mode 100644 index 000000000..2b8d6db9f --- /dev/null +++ b/public/R/src/gsalib/man/gsalib-package.Rd @@ -0,0 +1,68 @@ +\name{gsalib-package} +\alias{gsalib-package} +\alias{gsalib} +\docType{package} +\title{ +GATK utility analysis functions +} +\description{ +Utility functions for analyzing GATK-processed NGS data +} +\details{ +This package contains functions for working with GATK-processed NGS data. These functions include a command-line parser that also allows a script to be used in interactive mode (good for developing scripts that will eventually be automated), a proportional Venn diagram generator, convenience methods for parsing VariantEval output, and more. +} +\author{ +Genome Sequencing and Analysis Group + +Medical and Population Genetics Program + +Maintainer: Kiran Garimella +} +\references{ +GSA wiki page: http://www.broadinstitute.org/gsa/wiki + +GATK help forum: http://www.getsatisfaction.com/gsa +} +\examples{ +## get script arguments in interactive and non-interactive mode +cmdargs = gsa.getargs( list( + requiredArg1 = list( + value = NA, + doc = "Documentation for requiredArg1" + ), + + optionalArg1 = list( + value = 3e9, + doc = "Documentation for optionalArg1" + ) +) ); + +## plot a proportional Venn diagram +gsa.plot.venn(500, 250, 0, 100); + +## read a GATKReport file +report = gsa.gatk.report("/path/to/my/output.gatkreport"); + +## emit a message +gsa.message("This is a message"); + +## emit a warning message +gsa.message("This is a warning message"); + +## emit an error message +gsa.message("This is an error message"); + +## read the SQUID metrics for a given sequencing project (internal to the Broad only) +s = gsa.read.squidmetrics("C427"); + +## read command-line arguments +cmdargs = gsa.getargs( + list( + file = list(value="/my/test.vcf", doc="VCF file"), + verbose = list(value=0, doc="If 1, set verbose mode"), + test2 = list(value=2.3e9, doc="Another argument that does stuff") + ), + doc="My test program" +); +} +\keyword{ package } diff --git a/public/c/SeparateQltout.cc b/public/c/SeparateQltout.cc new file mode 100644 index 000000000..7644c9603 --- /dev/null +++ b/public/c/SeparateQltout.cc @@ -0,0 +1,70 @@ +#include "MainTools.h" +#include "Basevector.h" +#include "lookup/LookAlign.h" +#include "lookup/SerialQltout.h" + +unsigned int MatchingEnd(look_align &la, vecbasevector &candidates, vecbasevector &ref) { + //la.PrintParseable(cout); + + for (int i = 0; i < candidates.size(); i++) { + look_align newla = la; + + if (newla.rc1) { candidates[i].ReverseComplement(); } + newla.ResetFromAlign(newla.a, candidates[i], ref[la.target_id]); + + //newla.PrintParseable(cout, &candidates[i], &ref[newla.target_id]); + //cout << newla.Errors() << " " << la.Errors() << endl; + + if (newla.Errors() == la.Errors()) { + return i; + } + } + + //FatalErr("Query id " + ToString(la.query_id) + " had no matches."); + + return candidates.size() + 1; +} + +int main(int argc, char **argv) { + RunTime(); + + BeginCommandArguments; + CommandArgument_String(ALIGNS); + CommandArgument_String(FASTB_END_1); + CommandArgument_String(FASTB_END_2); + CommandArgument_String(REFERENCE); + + CommandArgument_String(ALIGNS_END_1_OUT); + CommandArgument_String(ALIGNS_END_2_OUT); + EndCommandArguments; + + vecbasevector ref(REFERENCE); + vecbasevector reads1(FASTB_END_1); + vecbasevector reads2(FASTB_END_2); + + ofstream aligns1stream(ALIGNS_END_1_OUT.c_str()); + ofstream aligns2stream(ALIGNS_END_2_OUT.c_str()); + + basevector bv; + + SerialQltout sqltout(ALIGNS); + look_align la; + while (sqltout.Next(la)) { + vecbasevector candidates(2); + candidates[0] = reads1[la.query_id]; + candidates[1] = reads2[la.query_id]; + + unsigned int matchingend = MatchingEnd(la, candidates, ref); + if (matchingend < 2) { + bv = (matchingend == 0) ? reads1[la.query_id] : reads2[la.query_id]; + + //la.PrintParseable(cout, &bv, &ref[la.target_id]); + la.PrintParseable(((matchingend == 0) ? aligns1stream : aligns2stream), &bv, &ref[la.target_id]); + } + } + + aligns1stream.close(); + aligns2stream.close(); + + return 0; +} diff --git a/public/c/bwa/Makefile b/public/c/bwa/Makefile new file mode 100644 index 000000000..6399a0e6d --- /dev/null +++ b/public/c/bwa/Makefile @@ -0,0 +1,21 @@ +CXX=g++ +CXXFLAGS=-g -Wall -O2 -m64 -fPIC + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -I$(BWA_HOME) -I$(JAVA_INCLUDE) $< -o $@ + +all: init lib + +init: + @echo Please make sure the following platforms are set correctly on your machine. + @echo BWA_HOME=$(BWA_HOME) + @echo JAVA_INCLUDE=$(JAVA_INCLUDE) + @echo TARGET_LIB=$(TARGET_LIB) + @echo EXTRA_LIBS=$(EXTRA_LIBS) + @echo LIBTOOL_COMMAND=$(LIBTOOL_COMMAND) + +lib: org_broadinstitute_sting_alignment_bwa_c_BWACAligner.o bwa_gateway.o + $(LIBTOOL_COMMAND) $? -o $(TARGET_LIB) -L$(BWA_HOME) -lbwacore $(EXTRA_LIBS) + +clean: + rm *.o libbwa.* diff --git a/public/c/bwa/build_linux.sh b/public/c/bwa/build_linux.sh new file mode 100755 index 000000000..b3631a28d --- /dev/null +++ b/public/c/bwa/build_linux.sh @@ -0,0 +1,7 @@ +#!/bin/sh +export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa-trunk/bwa" +export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux" +export TARGET_LIB="libbwa.so" +export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread" +export LIBTOOL_COMMAND="g++ -shared -Wl,-soname,libbwa.so" +make diff --git a/public/c/bwa/build_mac.sh b/public/c/bwa/build_mac.sh new file mode 100644 index 000000000..bfed900bb --- /dev/null +++ b/public/c/bwa/build_mac.sh @@ -0,0 +1,7 @@ +#!/bin/sh +export BWA_HOME="/Users/mhanna/src/bwa" +export JAVA_INCLUDE="/System/Library/Frameworks/JavaVM.framework/Headers" +export TARGET_LIB="libbwa.dylib" +export EXTRA_LIBS="-lc -lz -lsupc++" +export LIBTOOL_COMMAND="libtool -dynamic" +make diff --git a/public/c/bwa/bwa_gateway.cpp b/public/c/bwa/bwa_gateway.cpp new file mode 100644 index 000000000..00f5aa5bc --- /dev/null +++ b/public/c/bwa/bwa_gateway.cpp @@ -0,0 +1,277 @@ +#include +#include +#include + +#include "bwase.h" +#include "bwa_gateway.h" + +BWA::BWA(const char* ann_filename, + const char* amb_filename, + const char* pac_filename, + const char* forward_bwt_filename, + const char* forward_sa_filename, + const char* reverse_bwt_filename, + const char* reverse_sa_filename) +{ + // Load the bns (?) and reference + bns = bns_restore_core(ann_filename,amb_filename,pac_filename); + reference = new ubyte_t[bns->l_pac/4+1]; + rewind(bns->fp_pac); + fread(reference, 1, bns->l_pac/4+1, bns->fp_pac); + fclose(bns->fp_pac); + bns->fp_pac = NULL; + + // Load the BWTs (both directions) and suffix arrays (both directions) + bwts[0] = bwt_restore_bwt(forward_bwt_filename); + bwt_restore_sa(forward_sa_filename, bwts[0]); + bwts[1] = bwt_restore_bwt(reverse_bwt_filename); + bwt_restore_sa(reverse_sa_filename, bwts[1]); + load_default_options(); + + // Always reinitialize the random seed whenever a new set of files are loaded. + initialize_random_seed(); + + // initialize the bwase subsystem + bwase_initialize(); +} + +BWA::~BWA() { + delete[] reference; + bns_destroy(bns); + bwt_destroy(bwts[0]); + bwt_destroy(bwts[1]); +} + +void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, unsigned& num_paths, unsigned& best_path_count, unsigned& second_best_path_count) +{ + bwa_seq_t* sequence = create_sequence(bases, read_length); + + // Calculate the suffix array interval for each sequence, storing the result in sequence->aln (and sequence->n_aln). + // This method will destroy the contents of seq and rseq. + bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options); + + paths = new bwt_aln1_t[sequence->n_aln]; + memcpy(paths,sequence->aln,sequence->n_aln*sizeof(bwt_aln1_t)); + num_paths = sequence->n_aln; + + // Call aln2seq to initialize the type of match present. + bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); + best_path_count = sequence->c1; + second_best_path_count = sequence->c2; + + bwa_free_read_seq(1,sequence); +} + +Alignment* BWA::generate_single_alignment(const char* bases, const unsigned read_length) { + bwa_seq_t* sequence = create_sequence(bases,read_length); + + // Calculate paths. + bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options); + + // Check for no alignments found and return null. + if(sequence->n_aln == 0) { + bwa_free_read_seq(1,sequence); + return NULL; + } + + // bwa_cal_sa_reg_gap destroys the bases / read length. Copy them back in. + copy_bases_into_sequence(sequence,bases,read_length); + + // Pick best alignment and propagate its information into the sequence. + bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); + + // Generate the best alignment from the sequence. + Alignment* alignment = new Alignment; + *alignment = generate_final_alignment_from_sequence(sequence); + + bwa_free_read_seq(1,sequence); + + return alignment; +} + +void BWA::generate_alignments_from_paths(const char* bases, + const unsigned read_length, + bwt_aln1_t* paths, + const unsigned num_paths, + const unsigned best_count, + const unsigned second_best_count, + Alignment*& alignments, + unsigned& num_alignments) +{ + bwa_seq_t* sequence = create_sequence(bases,read_length); + + sequence->aln = paths; + sequence->n_aln = num_paths; + + // (Ab)use bwa_aln2seq to propagate values stored in the path out into the sequence itself. + bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); + + // But overwrite key parts of the sequence in case the user passed back only a smaller subset + // of the paths. + sequence->c1 = best_count; + sequence->c2 = second_best_count; + sequence->type = sequence->c1 > 1 ? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE; + + num_alignments = 0; + for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++) + num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1; + + alignments = new Alignment[num_alignments]; + unsigned alignment_idx = 0; + + for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) { + // Stub in a 'working' path, so that only the desired alignment is local-aligned. + const bwt_aln1_t* path = paths + path_idx; + bwt_aln1_t working_path = *path; + + // Loop through all alignments, aligning each one individually. + for(unsigned sa_idx = path->k; sa_idx <= path->l; sa_idx++) { + working_path.k = working_path.l = sa_idx; + sequence->aln = &working_path; + sequence->n_aln = 1; + + sequence->sa = sa_idx; + sequence->strand = path->a; + sequence->score = path->score; + + // Each time through bwa_refine_gapped, seq gets reversed. Revert the reverse. + // TODO: Fix the interface to bwa_refine_gapped so its easier to work with. + if(alignment_idx > 0) + seq_reverse(sequence->len, sequence->seq, 0); + + // Copy the local alignment data into the alignment object. + *(alignments + alignment_idx) = generate_final_alignment_from_sequence(sequence); + + alignment_idx++; + } + } + + sequence->aln = NULL; + sequence->n_aln = 0; + + bwa_free_read_seq(1,sequence); +} + +Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) { + // Calculate the local coordinate and local alignment. + bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr); + bwa_refine_gapped(bns, 1, sequence, reference, NULL); + + // Copy the local alignment data into the alignment object. + Alignment alignment; + + // Populate basic path info + alignment.edit_distance = sequence->nm; + alignment.num_mismatches = sequence->n_mm; + alignment.num_gap_opens = sequence->n_gapo; + alignment.num_gap_extensions = sequence->n_gape; + alignment.num_best = sequence->c1; + alignment.num_second_best = sequence->c2; + + // Final alignment position. + alignment.type = sequence->type; + bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig); + alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1; + alignment.negative_strand = sequence->strand; + alignment.mapping_quality = sequence->mapQ; + + // Cigar step. + alignment.cigar = NULL; + if(sequence->cigar) { + alignment.cigar = new uint16_t[sequence->n_cigar]; + memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t)); + } + alignment.n_cigar = sequence->n_cigar; + + // MD tag with a better breakdown of differences in the cigar + alignment.md = strdup(sequence->md); + delete[] sequence->md; + sequence->md = NULL; + + return alignment; +} + +void BWA::load_default_options() +{ + options.s_mm = 3; + options.s_gapo = 11; + options.s_gape = 4; + options.mode = 3; + options.indel_end_skip = 5; + options.max_del_occ = 10; + options.max_entries = 2000000; + options.fnr = 0.04; + options.max_diff = -1; + options.max_gapo = 1; + options.max_gape = 6; + options.max_seed_diff = 2; + options.seed_len = 2147483647; + options.n_threads = 1; + options.max_top2 = 30; + options.trim_qual = 0; +} + +void BWA::initialize_random_seed() +{ + srand48(bns->seed); +} + +void BWA::set_max_edit_distance(float edit_distance) { + if(edit_distance > 0 && edit_distance < 1) { + options.fnr = edit_distance; + options.max_diff = -1; + } + else { + options.fnr = -1.0; + options.max_diff = (int)edit_distance; + } +} + +void BWA::set_max_gap_opens(int max_gap_opens) { options.max_gapo = max_gap_opens; } +void BWA::set_max_gap_extensions(int max_gap_extensions) { options.max_gape = max_gap_extensions; } +void BWA::set_disallow_indel_within_range(int indel_range) { options.indel_end_skip = indel_range; } +void BWA::set_mismatch_penalty(int penalty) { options.s_mm = penalty; } +void BWA::set_gap_open_penalty(int penalty) { options.s_gapo = penalty; } +void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; } + +/** + * Create a sequence with a set of reasonable initial defaults. + * Will leave seq and rseq empty. + */ +bwa_seq_t* BWA::create_sequence(const char* bases, const unsigned read_length) +{ + bwa_seq_t* sequence = new bwa_seq_t; + + sequence->tid = -1; + + sequence->name = 0; + + copy_bases_into_sequence(sequence, bases, read_length); + + sequence->qual = 0; + sequence->aln = 0; + sequence->md = 0; + + sequence->cigar = NULL; + sequence->n_cigar = 0; + + sequence->multi = NULL; + sequence->n_multi = 0; + + return sequence; +} + +void BWA::copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length) +{ + // seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap + sequence->seq = new ubyte_t[read_length]; + sequence->rseq = new ubyte_t[read_length]; + for(unsigned i = 0; i < read_length; i++) sequence->seq[i] = nst_nt4_table[(unsigned)bases[i]]; + memcpy(sequence->rseq,sequence->seq,read_length); + + // BWA expects the read bases to arrive reversed. + seq_reverse(read_length,sequence->seq,0); + seq_reverse(read_length,sequence->rseq,1); + + sequence->full_len = sequence->len = read_length; +} diff --git a/public/c/bwa/bwa_gateway.h b/public/c/bwa/bwa_gateway.h new file mode 100644 index 000000000..2d26ec650 --- /dev/null +++ b/public/c/bwa/bwa_gateway.h @@ -0,0 +1,83 @@ +#ifndef BWA_GATEWAY +#define BWA_GATEWAY + +#include + +#include "bntseq.h" +#include "bwt.h" +#include "bwtaln.h" + +class Alignment { + public: + uint32_t type; + int contig; + bwtint_t pos; + bool negative_strand; + uint32_t mapping_quality; + + uint16_t *cigar; + int n_cigar; + + uint8_t num_mismatches; + uint8_t num_gap_opens; + uint8_t num_gap_extensions; + uint16_t edit_distance; + + uint32_t num_best; + uint32_t num_second_best; + + char* md; +}; + +class BWA { + private: + bntseq_t *bns; + ubyte_t* reference; + bwt_t* bwts[2]; + gap_opt_t options; + + void load_default_options(); + void initialize_random_seed(); + bwa_seq_t* create_sequence(const char* bases, const unsigned read_length); + void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length); + Alignment generate_final_alignment_from_sequence(bwa_seq_t* sequence); + + public: + BWA(const char* ann_filename, + const char* amb_filename, + const char* pac_filename, + const char* forward_bwt_filename, + const char* forward_sa_filename, + const char* reverse_bwt_filename, + const char* reverse_sa_filename); + ~BWA(); + + // Parameterize the aligner. + void set_max_edit_distance(float edit_distance); + void set_max_gap_opens(int max_gap_opens); + void set_max_gap_extensions(int max_gap_extensions); + void set_disallow_indel_within_range(int indel_range); + void set_mismatch_penalty(int penalty); + void set_gap_open_penalty(int penalty); + void set_gap_extension_penalty(int penalty); + + // Perform the alignment + Alignment* generate_single_alignment(const char* bases, + const unsigned read_length); + void find_paths(const char* bases, + const unsigned read_length, + bwt_aln1_t*& paths, + unsigned& num_paths, + unsigned& best_path_count, + unsigned& second_best_path_count); + void generate_alignments_from_paths(const char* bases, + const unsigned read_length, + bwt_aln1_t* paths, + const unsigned num_paths, + const unsigned best_count, + const unsigned second_best_count, + Alignment*& alignments, + unsigned& num_alignments); +}; + +#endif // BWA_GATEWAY diff --git a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp new file mode 100644 index 000000000..1ccbef0d4 --- /dev/null +++ b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp @@ -0,0 +1,437 @@ +#include +#include +#include + +#include "bntseq.h" +#include "bwt.h" +#include "bwtaln.h" +#include "bwa_gateway.h" +#include "org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h" + +typedef void (BWA::*int_setter)(int value); +typedef void (BWA::*float_setter)(float value); + +static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment); +static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name); +static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter); +static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter); +static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message); + +JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject bwtFiles, jobject configuration) +{ + jstring java_ann = get_configuration_file(env,bwtFiles,"annFile"); + if(java_ann == NULL) return 0L; + jstring java_amb = get_configuration_file(env,bwtFiles,"ambFile"); + if(java_amb == NULL) return 0L; + jstring java_pac = get_configuration_file(env,bwtFiles,"pacFile"); + if(java_pac == NULL) return 0L; + jstring java_forward_bwt = get_configuration_file(env,bwtFiles,"forwardBWTFile"); + if(java_forward_bwt == NULL) return 0L; + jstring java_forward_sa = get_configuration_file(env,bwtFiles,"forwardSAFile"); + if(java_forward_sa == NULL) return 0L; + jstring java_reverse_bwt = get_configuration_file(env,bwtFiles,"reverseBWTFile"); + if(java_reverse_bwt == NULL) return 0L; + jstring java_reverse_sa = get_configuration_file(env,bwtFiles,"reverseSAFile"); + if(java_reverse_sa == NULL) return 0L; + + const char* ann_filename = env->GetStringUTFChars(java_ann,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* amb_filename = env->GetStringUTFChars(java_amb,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* pac_filename = env->GetStringUTFChars(java_pac,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* forward_bwt_filename = env->GetStringUTFChars(java_forward_bwt,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* forward_sa_filename = env->GetStringUTFChars(java_forward_sa,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* reverse_bwt_filename = env->GetStringUTFChars(java_reverse_bwt,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* reverse_sa_filename = env->GetStringUTFChars(java_reverse_sa,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + + BWA* bwa = new BWA(ann_filename, + amb_filename, + pac_filename, + forward_bwt_filename, + forward_sa_filename, + reverse_bwt_filename, + reverse_sa_filename); + + Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(env,instance,(jlong)bwa,configuration); + if(env->ExceptionCheck()) return 0L; + + env->ReleaseStringUTFChars(java_ann,ann_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_amb,amb_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_pac,pac_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_forward_bwt,forward_bwt_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_forward_sa,forward_sa_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_reverse_bwt,reverse_bwt_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_reverse_sa,reverse_sa_filename); + if(env->ExceptionCheck()) return 0L; + + return (jlong)bwa; +} + +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_destroy(JNIEnv* env, jobject instance, jlong java_bwa) +{ + BWA* bwa = (BWA*)java_bwa; + delete bwa; +} + +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(JNIEnv *env, jobject instance, jlong java_bwa, jobject configuration) { + BWA* bwa = (BWA*)java_bwa; + set_float_configuration_param(env, configuration, "maximumEditDistance", bwa, &BWA::set_max_edit_distance); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "maximumGapOpens", bwa, &BWA::set_max_gap_opens); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "maximumGapExtensions", bwa, &BWA::set_max_gap_extensions); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "disallowIndelWithinRange", bwa, &BWA::set_disallow_indel_within_range); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "mismatchPenalty", bwa, &BWA::set_mismatch_penalty); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "gapOpenPenalty", bwa, &BWA::set_gap_open_penalty); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty); + if(env->ExceptionCheck()) return; +} + +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) +{ + BWA* bwa = (BWA*)java_bwa; + + const jsize read_length = env->GetArrayLength(java_bases); + if(env->ExceptionCheck()) return NULL; + + jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); + if(read_bases == NULL) return NULL; + + bwt_aln1_t* paths = NULL; + unsigned num_paths = 0; + + unsigned best_path_count, second_best_path_count; + bwa->find_paths((const char*)read_bases,read_length,paths,num_paths,best_path_count,second_best_path_count); + + jobjectArray java_paths = env->NewObjectArray(num_paths, env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath"), NULL); + if(java_paths == NULL) return NULL; + + for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) { + bwt_aln1_t& path = *(paths + path_idx); + + jclass java_path_class = env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath"); + if(java_path_class == NULL) return NULL; + + jmethodID java_path_constructor = env->GetMethodID(java_path_class, "", "(IIIZJJIII)V"); + if(java_path_constructor == NULL) return NULL; + + // Note that k/l are being cast to long. Bad things will happen if JNI assumes that they're ints. + jobject java_path = env->NewObject(java_path_class, + java_path_constructor, + path.n_mm, + path.n_gapo, + path.n_gape, + path.a, + (jlong)path.k, + (jlong)path.l, + path.score, + best_path_count, + second_best_path_count); + if(java_path == NULL) return NULL; + + env->SetObjectArrayElement(java_paths,path_idx,java_path); + if(env->ExceptionCheck()) return NULL; + + env->DeleteLocalRef(java_path_class); + if(env->ExceptionCheck()) return NULL; + } + + delete[] paths; + + env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); + + return env->ExceptionCheck() ? NULL : java_paths; +} + +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases, jobjectArray java_paths) +{ + BWA* bwa = (BWA*)java_bwa; + + const jsize read_length = env->GetArrayLength(java_bases); + if(env->ExceptionCheck()) return NULL; + + jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); + if(read_bases == NULL) return NULL; + + const jsize num_paths = env->GetArrayLength(java_paths); + bwt_aln1_t* paths = new bwt_aln1_t[num_paths]; + unsigned best_count = 0, second_best_count = 0; + + for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) { + jobject java_path = env->GetObjectArrayElement(java_paths,path_idx); + jclass java_path_class = env->GetObjectClass(java_path); + if(java_path_class == NULL) return NULL; + + bwt_aln1_t& path = *(paths + path_idx); + + jfieldID mismatches_field = env->GetFieldID(java_path_class, "numMismatches", "I"); + if(mismatches_field == NULL) return NULL; + path.n_mm = env->GetIntField(java_path,mismatches_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID gap_opens_field = env->GetFieldID(java_path_class, "numGapOpens", "I"); + if(gap_opens_field == NULL) return NULL; + path.n_gapo = env->GetIntField(java_path,gap_opens_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID gap_extensions_field = env->GetFieldID(java_path_class, "numGapExtensions", "I"); + if(gap_extensions_field == NULL) return NULL; + path.n_gape = env->GetIntField(java_path,gap_extensions_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID negative_strand_field = env->GetFieldID(java_path_class, "negativeStrand", "Z"); + if(negative_strand_field == NULL) return NULL; + path.a = env->GetBooleanField(java_path,negative_strand_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID k_field = env->GetFieldID(java_path_class, "k", "J"); + if(k_field == NULL) return NULL; + path.k = env->GetLongField(java_path,k_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID l_field = env->GetFieldID(java_path_class, "l", "J"); + if(l_field == NULL) return NULL; + path.l = env->GetLongField(java_path,l_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID score_field = env->GetFieldID(java_path_class, "score", "I"); + if(score_field == NULL) return NULL; + path.score = env->GetIntField(java_path,score_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID best_count_field = env->GetFieldID(java_path_class, "bestCount", "I"); + if(best_count_field == NULL) return NULL; + best_count = env->GetIntField(java_path,best_count_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID second_best_count_field = env->GetFieldID(java_path_class, "secondBestCount", "I"); + if(second_best_count_field == NULL) return NULL; + second_best_count = env->GetIntField(java_path,second_best_count_field); + if(env->ExceptionCheck()) return NULL; + } + + Alignment* alignments = NULL; + unsigned num_alignments = 0; + bwa->generate_alignments_from_paths((const char*)read_bases,read_length,paths,num_paths,best_count,second_best_count,alignments,num_alignments); + + jobjectArray java_alignments = env->NewObjectArray(num_alignments, env->FindClass("org/broadinstitute/sting/alignment/Alignment"), NULL); + if(java_alignments == NULL) return NULL; + + for(unsigned alignment_idx = 0; alignment_idx < (unsigned)num_alignments; alignment_idx++) { + Alignment& alignment = *(alignments + alignment_idx); + jobject java_alignment = convert_to_java_alignment(env,read_bases,read_length,alignment); + if(java_alignment == NULL) return NULL; + env->SetObjectArrayElement(java_alignments,alignment_idx,java_alignment); + if(env->ExceptionCheck()) return NULL; + } + + delete[] alignments; + delete[] paths; + + env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); + + return env->ExceptionCheck() ? NULL : java_alignments; +} + +JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) { + BWA* bwa = (BWA*)java_bwa; + + const jsize read_length = env->GetArrayLength(java_bases); + if(env->ExceptionCheck()) return NULL; + + jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); + if(read_bases == NULL) return NULL; + + Alignment* best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length); + jobject java_best_alignment = (best_alignment != NULL) ? convert_to_java_alignment(env,read_bases,read_length,*best_alignment) : NULL; + delete best_alignment; + + env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); + + return java_best_alignment; +} + +static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment) { + unsigned cigar_length; + if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0; + else if(!alignment.cigar) cigar_length = 1; + else cigar_length = alignment.n_cigar; + + jcharArray java_cigar_operators = env->NewCharArray(cigar_length); + if(java_cigar_operators == NULL) return NULL; + jintArray java_cigar_lengths = env->NewIntArray(cigar_length); + if(java_cigar_lengths == NULL) return NULL; + + if(alignment.cigar) { + for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) { + jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14]; + jint cigar_length = alignment.cigar[cigar_idx]&0x3fff; + + env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator); + if(env->ExceptionCheck()) return NULL; + env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length); + if(env->ExceptionCheck()) return NULL; + } + } + else { + if(alignment.type != BWA_TYPE_NO_MATCH) { + jchar cigar_operator = 'M'; + env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator); + if(env->ExceptionCheck()) return NULL; + env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length); + if(env->ExceptionCheck()) return NULL; + } + } + delete[] alignment.cigar; + + jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment"); + if(java_alignment_class == NULL) return NULL; + + jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "", "(IIZI[C[IILjava/lang/String;IIIII)V"); + if(java_alignment_constructor == NULL) return NULL; + + jstring java_md = env->NewStringUTF(alignment.md); + if(java_md == NULL) return NULL; + delete[] alignment.md; + + jobject java_alignment = env->NewObject(java_alignment_class, + java_alignment_constructor, + alignment.contig, + alignment.pos, + alignment.negative_strand, + alignment.mapping_quality, + java_cigar_operators, + java_cigar_lengths, + alignment.edit_distance, + java_md, + alignment.num_mismatches, + alignment.num_gap_opens, + alignment.num_gap_extensions, + alignment.num_best, + alignment.num_second_best); + if(java_alignment == NULL) return NULL; + + env->DeleteLocalRef(java_alignment_class); + if(env->ExceptionCheck()) return NULL; + + return java_alignment; +} + +static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name) { + jclass configuration_class = env->GetObjectClass(configuration); + if(configuration_class == NULL) return NULL; + + jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/io/File;"); + if(configuration_field == NULL) return NULL; + + jobject configuration_file = (jobject)env->GetObjectField(configuration,configuration_field); + + jclass file_class = env->FindClass("java/io/File"); + if(file_class == NULL) return NULL; + + jmethodID path_extractor = env->GetMethodID(file_class,"getAbsolutePath", "()Ljava/lang/String;"); + if(path_extractor == NULL) return NULL; + + jstring path = (jstring)env->CallObjectMethod(configuration_file,path_extractor); + if(path == NULL) return NULL; + + env->DeleteLocalRef(configuration_class); + env->DeleteLocalRef(file_class); + env->DeleteLocalRef(configuration_file); + + return path; +} + +static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) { + jclass configuration_class = env->GetObjectClass(configuration); + if(configuration_class == NULL) return; + + jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Integer;"); + if(configuration_field == NULL) return; + + jobject boxed_value = env->GetObjectField(configuration,configuration_field); + if(env->ExceptionCheck()) return; + + if(boxed_value != NULL) { + jclass int_box_class = env->FindClass("java/lang/Integer"); + if(int_box_class == NULL) return; + + jmethodID int_extractor = env->GetMethodID(int_box_class,"intValue", "()I"); + if(int_extractor == NULL) return; + + jint value = env->CallIntMethod(boxed_value,int_extractor); + if(env->ExceptionCheck()) return; + + if(value < 0) + { + throw_config_value_exception(env,field_name,"cannot be set to a negative value"); + return; + } + + (bwa->*setter)(value); + + env->DeleteLocalRef(int_box_class); + } + + env->DeleteLocalRef(boxed_value); + env->DeleteLocalRef(configuration_class); +} + +static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter) +{ + jclass configuration_class = env->GetObjectClass(configuration); + if(configuration_class == NULL) return; + + jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Float;"); + if(configuration_field == NULL) return; + + jobject boxed_value = env->GetObjectField(configuration,configuration_field); + if(boxed_value != NULL) { + jclass float_box_class = env->FindClass("java/lang/Float"); + if(float_box_class == NULL) return; + + jmethodID float_extractor = env->GetMethodID(float_box_class,"floatValue", "()F"); + if(float_extractor == NULL) return; + + jfloat value = env->CallFloatMethod(boxed_value,float_extractor); + if(env->ExceptionCheck()) return; + + if(value < 0) + { + throw_config_value_exception(env,field_name,"cannot be set to a negative value"); + return; + } + + (bwa->*setter)(value); + + env->DeleteLocalRef(float_box_class); + } + + env->DeleteLocalRef(boxed_value); + env->DeleteLocalRef(configuration_class); +} + +static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message) +{ + char* buffer = new char[strlen(field_name)+1+strlen(message)+1]; + sprintf(buffer,"%s %s",field_name,message); + jclass sting_exception_class = env->FindClass("org/broadinstitute/sting/utils/StingException"); + if(sting_exception_class == NULL) return; + env->ThrowNew(sting_exception_class, buffer); + delete[] buffer; +} diff --git a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h new file mode 100644 index 000000000..0c44e430a --- /dev/null +++ b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h @@ -0,0 +1,61 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_broadinstitute_sting_alignment_bwa_c_BWACAligner */ + +#ifndef _Included_org_broadinstitute_sting_alignment_bwa_c_BWACAligner +#define _Included_org_broadinstitute_sting_alignment_bwa_c_BWACAligner +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: create + * Signature: (Lorg/broadinstitute/sting/alignment/bwa/BWTFiles;Lorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)J + */ +JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create + (JNIEnv *, jobject, jobject, jobject); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: updateConfiguration + * Signature: (JLorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration + (JNIEnv *, jobject, jlong, jobject); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: destroy + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_destroy + (JNIEnv *, jobject, jlong); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: getPaths + * Signature: (J[B)[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath; + */ +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths + (JNIEnv *, jobject, jlong, jbyteArray); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: convertPathsToAlignments + * Signature: (J[B[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;)[Lorg/broadinstitute/sting/alignment/Alignment; + */ +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments + (JNIEnv *, jobject, jlong, jbyteArray, jobjectArray); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: getBestAlignment + * Signature: (J[B)Lorg/broadinstitute/sting/alignment/Alignment; + */ +JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment + (JNIEnv *, jobject, jlong, jbyteArray); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/public/c/libenvironhack/Makefile b/public/c/libenvironhack/Makefile new file mode 100644 index 000000000..302ff8e31 --- /dev/null +++ b/public/c/libenvironhack/Makefile @@ -0,0 +1,10 @@ +CC=gcc +CCFLAGS=-Wall -dynamiclib -arch i386 -arch x86_64 + +libenvironhack.dylib: libenvironhack.c + $(CC) $(CCFLAGS) -init _init_environ $< -o $@ + +all: libenvironhack.dylib + +clean: + rm -f libenvironhack.dylib diff --git a/public/c/libenvironhack/libenvironhack.c b/public/c/libenvironhack/libenvironhack.c new file mode 100644 index 000000000..8b2a2640e --- /dev/null +++ b/public/c/libenvironhack/libenvironhack.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* +LSF 7.0.6 on the mac is missing the unsatisfied exported symbol for environ which was removed on MacOS X 10.5+. +nm $LSF_LIBDIR/liblsf.dylib | grep environ +See "man environ" for more info, along with http://lists.apple.com/archives/java-dev/2007/Dec/msg00096.html +*/ + +#include + +char **environ = (char **)0; + +void init_environ(void) { + environ = (*_NSGetEnviron()); +} diff --git a/public/c/libenvironhack/libenvironhack.dylib b/public/c/libenvironhack/libenvironhack.dylib new file mode 100755 index 000000000..a45e038b4 Binary files /dev/null and b/public/c/libenvironhack/libenvironhack.dylib differ diff --git a/public/java/src/org/broadinstitute/sting/alignment/Aligner.java b/public/java/src/org/broadinstitute/sting/alignment/Aligner.java new file mode 100644 index 000000000..4bf05cb75 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/Aligner.java @@ -0,0 +1,49 @@ +package org.broadinstitute.sting.alignment; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; + +/** + * Create perfect alignments from the read to the genome represented by the given BWT / suffix array. + * + * @author mhanna + * @version 0.1 + */ +public interface Aligner { + /** + * Close this instance of the BWA pointer and delete its resources. + */ + public void close(); + + /** + * Allow the aligner to choose one alignment randomly from the pile of best alignments. + * @param bases Bases to align. + * @return An align + */ + public Alignment getBestAlignment(final byte[] bases); + + /** + * Align the read to the reference. + * @param read Read to align. + * @param header Optional header to drop in place. + * @return A list of the alignments. + */ + public SAMRecord align(final SAMRecord read, final SAMFileHeader header); + + /** + * Get a iterator of alignments, batched by mapping quality. + * @param bases List of bases. + * @return Iterator to alignments. + */ + public Iterable getAllAlignments(final byte[] bases); + + /** + * Get a iterator of aligned reads, batched by mapping quality. + * @param read Read to align. + * @param newHeader Optional new header to use when aligning the read. If present, it must be null. + * @return Iterator to alignments. + */ + public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader); +} + + diff --git a/public/java/src/org/broadinstitute/sting/alignment/Alignment.java b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java new file mode 100644 index 000000000..c63f5615f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java @@ -0,0 +1,221 @@ +package org.broadinstitute.sting.alignment; + +import net.sf.samtools.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +/** + * Represents an alignment of a read to a site in the reference genome. + * + * @author mhanna + * @version 0.1 + */ +public class Alignment { + protected int contigIndex; + protected long alignmentStart; + protected boolean negativeStrand; + protected int mappingQuality; + + protected char[] cigarOperators; + protected int[] cigarLengths; + + protected int editDistance; + protected String mismatchingPositions; + + protected int numMismatches; + protected int numGapOpens; + protected int numGapExtensions; + protected int bestCount; + protected int secondBestCount; + + /** + * Gets the index of the given contig. + * @return the inde + */ + public int getContigIndex() { return contigIndex; } + + /** + * Gets the starting position for the given alignment. + * @return Starting position. + */ + public long getAlignmentStart() { return alignmentStart; } + + /** + * Is the given alignment on the reverse strand? + * @return True if the alignment is on the reverse strand. + */ + public boolean isNegativeStrand() { return negativeStrand; } + + /** + * Gets the score of this alignment. + * @return The score. + */ + public int getMappingQuality() { return mappingQuality; } + + /** + * Gets the edit distance; will eventually end up in the NM SAM tag + * if this alignment makes it that far. + * @return The edit distance. + */ + public int getEditDistance() { return editDistance; } + + /** + * A string representation of which positions mismatch; contents of MD tag. + * @return String representation of mismatching positions. + */ + public String getMismatchingPositions() { return mismatchingPositions; } + + /** + * Gets the number of mismatches in the read. + * @return Number of mismatches. + */ + public int getNumMismatches() { return numMismatches; } + + /** + * Get the number of gap opens. + * @return Number of gap opens. + */ + public int getNumGapOpens() { return numGapOpens; } + + /** + * Get the number of gap extensions. + * @return Number of gap extensions. + */ + public int getNumGapExtensions() { return numGapExtensions; } + + /** + * Get the number of best alignments. + * @return Number of top scoring alignments. + */ + public int getBestCount() { return bestCount; } + + /** + * Get the number of second best alignments. + * @return Number of second best scoring alignments. + */ + public int getSecondBestCount() { return secondBestCount; } + + /** + * Gets the cigar for this alignment. + * @return sam-jdk formatted alignment. + */ + public Cigar getCigar() { + Cigar cigar = new Cigar(); + for(int i = 0; i < cigarOperators.length; i++) { + CigarOperator operator = CigarOperator.characterToEnum(cigarOperators[i]); + cigar.add(new CigarElement(cigarLengths[i],operator)); + } + return cigar; + } + + /** + * Temporarily implement getCigarString() for debugging; the TextCigarCodec is unfortunately + * package-protected. + * @return + */ + public String getCigarString() { + Cigar cigar = getCigar(); + if(cigar.isEmpty()) return "*"; + + StringBuilder cigarString = new StringBuilder(); + for(CigarElement element: cigar.getCigarElements()) { + cigarString.append(element.getLength()); + cigarString.append(element.getOperator()); + } + return cigarString.toString(); + } + + /** + * Stub for inheritance. + */ + public Alignment() {} + + /** + * Create a new alignment object. + * @param contigIndex The contig to which this read aligned. + * @param alignmentStart The point within the contig to which this read aligned. + * @param negativeStrand Forward or reverse alignment of the given read. + * @param mappingQuality How good does BWA think this mapping is? + * @param cigarOperators The ordered operators in the cigar string. + * @param cigarLengths The lengths to which each operator applies. + * @param editDistance The edit distance (cumulative) of the read. + * @param mismatchingPositions String representation of which bases in the read mismatch. + * @param numMismatches Number of total mismatches in the read. + * @param numGapOpens Number of gap opens in the read. + * @param numGapExtensions Number of gap extensions in the read. + * @param bestCount Number of best alignments in the read. + * @param secondBestCount Number of second best alignments in the read. + */ + public Alignment(int contigIndex, + int alignmentStart, + boolean negativeStrand, + int mappingQuality, + char[] cigarOperators, + int[] cigarLengths, + int editDistance, + String mismatchingPositions, + int numMismatches, + int numGapOpens, + int numGapExtensions, + int bestCount, + int secondBestCount) { + this.contigIndex = contigIndex; + this.alignmentStart = alignmentStart; + this.negativeStrand = negativeStrand; + this.mappingQuality = mappingQuality; + this.cigarOperators = cigarOperators; + this.cigarLengths = cigarLengths; + this.editDistance = editDistance; + this.mismatchingPositions = mismatchingPositions; + this.numMismatches = numMismatches; + this.numGapOpens = numGapOpens; + this.numGapExtensions = numGapExtensions; + this.bestCount = bestCount; + this.secondBestCount = secondBestCount; + } + + /** + * Creates a read directly from an alignment. + * @param alignment The alignment to convert to a read. + * @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags. + * @param newSAMHeader The new SAM header to use in creating this read. Can be null, but if so, the sequence + * dictionary in the + * @return A mapped alignment. + */ + public static SAMRecord convertToRead(Alignment alignment, SAMRecord unmappedRead, SAMFileHeader newSAMHeader) { + SAMRecord read; + try { + read = (SAMRecord)unmappedRead.clone(); + } + catch(CloneNotSupportedException ex) { + throw new ReviewedStingException("Unable to create aligned read from template."); + } + + if(newSAMHeader != null) + read.setHeader(newSAMHeader); + + // If we're realigning a previously aligned record, strip out the placement of the alignment. + read.setReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME); + read.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); + read.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME); + read.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START); + + if(alignment != null) { + read.setReadUnmappedFlag(false); + read.setReferenceIndex(alignment.getContigIndex()); + read.setAlignmentStart((int)alignment.getAlignmentStart()); + read.setReadNegativeStrandFlag(alignment.isNegativeStrand()); + read.setMappingQuality(alignment.getMappingQuality()); + read.setCigar(alignment.getCigar()); + if(alignment.isNegativeStrand()) { + read.setReadBases(BaseUtils.simpleReverseComplement(read.getReadBases())); + read.setBaseQualities(Utils.reverse(read.getBaseQualities())); + } + read.setAttribute("NM",alignment.getEditDistance()); + read.setAttribute("MD",alignment.getMismatchingPositions()); + } + + return read; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java new file mode 100644 index 000000000..c6755e878 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Iterator; + +/** + * Validates consistency of the aligner interface by taking reads already aligned by BWA in a BAM file, stripping them + * of their alignment data, realigning them, and making sure one of the best resulting realignments matches the original + * alignment from the input file. + * + * @author mhanna + * @version 0.1 + */ +public class AlignmentValidationWalker extends ReadWalker { + /** + * The supporting BWT index generated using BWT. + */ + @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) + private String prefix = null; + + /** + * The instance used to generate alignments. + */ + private BWACAligner aligner = null; + + /** + * Create an aligner object. The aligner object will load and hold the BWT until close() is called. + */ + @Override + public void initialize() { + if(prefix == null) + prefix = getToolkit().getArguments().referenceFile.getAbsolutePath(); + BWTFiles bwtFiles = new BWTFiles(prefix); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + } + + /** + * Aligns a read to the given reference. + * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. + * @param read Read to align. + * @return Number of reads aligned by this map (aka 1). + */ + @Override + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + //logger.info(String.format("examining read %s", read.getReadName())); + + byte[] bases = read.getReadBases(); + if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases); + + boolean matches = true; + Iterable alignments = aligner.getAllAlignments(bases); + Iterator alignmentIterator = alignments.iterator(); + + if(!alignmentIterator.hasNext()) { + matches = read.getReadUnmappedFlag(); + } + else { + Alignment[] alignmentsOfBestQuality = alignmentIterator.next(); + for(Alignment alignment: alignmentsOfBestQuality) { + matches = (alignment.getContigIndex() == read.getReferenceIndex()); + matches &= (alignment.getAlignmentStart() == read.getAlignmentStart()); + matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag()); + matches &= (alignment.getCigar().equals(read.getCigar())); + matches &= (alignment.getMappingQuality() == read.getMappingQuality()); + if(matches) break; + } + } + + if(!matches) { + logger.error("Found mismatch!"); + logger.error(String.format("Read %s:",read.getReadName())); + logger.error(String.format(" Contig index: %d",read.getReferenceIndex())); + logger.error(String.format(" Alignment start: %d", read.getAlignmentStart())); + logger.error(String.format(" Negative strand: %b", read.getReadNegativeStrandFlag())); + logger.error(String.format(" Cigar: %s%n", read.getCigarString())); + logger.error(String.format(" Mapping quality: %s%n", read.getMappingQuality())); + for(Alignment[] alignmentsByScore: alignments) { + for(int i = 0; i < alignmentsByScore.length; i++) { + logger.error(String.format("Alignment %d:",i)); + logger.error(String.format(" Contig index: %d",alignmentsByScore[i].getContigIndex())); + logger.error(String.format(" Alignment start: %d", alignmentsByScore[i].getAlignmentStart())); + logger.error(String.format(" Negative strand: %b", alignmentsByScore[i].isNegativeStrand())); + logger.error(String.format(" Cigar: %s", alignmentsByScore[i].getCigarString())); + logger.error(String.format(" Mapping quality: %s%n", alignmentsByScore[i].getMappingQuality())); + } + } + throw new ReviewedStingException(String.format("Read %s mismatches!", read.getReadName())); + } + + return 1; + } + + /** + * Initial value for reduce. In this case, validated reads will be counted. + * @return 0, indicating no reads yet validated. + */ + @Override + public Integer reduceInit() { return 0; } + + /** + * Calculates the number of reads processed. + * @param value Number of reads processed by this map. + * @param sum Number of reads processed before this map. + * @return Number of reads processed up to and including this map. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + /** + * Cleanup. + * @param result Number of reads processed. + */ + @Override + public void onTraversalDone(Integer result) { + aligner.close(); + super.onTraversalDone(result); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java new file mode 100644 index 000000000..7064e637f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment; + +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceDictionary; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.WalkerName; + +import java.io.File; + +/** + * Aligns reads to a given reference using Heng Li's BWA aligner, presenting the resulting alignments in SAM or BAM format. + * Mimics the steps 'bwa aln' followed by 'bwa samse' using the BWA/C implementation. + * + * @author mhanna + * @version 0.1 + */ +@WalkerName("Align") +public class AlignmentWalker extends ReadWalker { + @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " + + "generated by bwa index -d bwtsw. If unspecified, will default " + + "to the reference specified via the -R argument.",required=false) + private File targetReferenceFile = null; + + @Output + private StingSAMFileWriter out = null; + + /** + * The actual aligner. + */ + private BWACAligner aligner = null; + + /** + * New header to use, if desired. + */ + private SAMFileHeader header; + + /** + * Create an aligner object. The aligner object will load and hold the BWT until close() is called. + */ + @Override + public void initialize() { + if(targetReferenceFile == null) + targetReferenceFile = getToolkit().getArguments().referenceFile; + BWTFiles bwtFiles = new BWTFiles(targetReferenceFile.getAbsolutePath()); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + + // Take the header of the SAM file, tweak it by adding in the reference dictionary and specifying that the target file is unsorted. + header = getToolkit().getSAMFileHeader().clone(); + SAMSequenceDictionary referenceDictionary = + ReferenceSequenceFileFactory.getReferenceSequenceFile(targetReferenceFile).getSequenceDictionary(); + header.setSequenceDictionary(referenceDictionary); + header.setSortOrder(SAMFileHeader.SortOrder.unsorted); + + out.writeHeader(header); + } + + /** + * Aligns a read to the given reference. + * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. + * @param read Read to align. + * @return Number of alignments found for this read. + */ + @Override + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + SAMRecord alignedRead = aligner.align(read,header); + out.addAlignment(alignedRead); + return 1; + } + + /** + * Initial value for reduce. In this case, alignments will be counted. + * @return 0, indicating no alignments yet found. + */ + @Override + public Integer reduceInit() { return 0; } + + /** + * Calculates the number of alignments found. + * @param value Number of alignments found by this map. + * @param sum Number of alignments found before this map. + * @return Number of alignments found up to and including this map. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + /** + * Cleanup. + * @param result Number of reads processed. + */ + @Override + public void onTraversalDone(Integer result) { + aligner.close(); + super.onTraversalDone(result); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java new file mode 100644 index 000000000..57d92319f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; + +import java.io.PrintStream; +import java.util.Iterator; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * Counts the number of best alignments as presented by BWA and outputs a histogram of number of placements vs. the + * frequency of that number of placements. + * + * @author mhanna + * @version 0.1 + */ +public class CountBestAlignmentsWalker extends ReadWalker { + /** + * The supporting BWT index generated using BWT. + */ + @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) + private String prefix = null; + + @Output + private PrintStream out = null; + + /** + * The actual aligner. + */ + private Aligner aligner = null; + + private SortedMap alignmentFrequencies = new TreeMap(); + + /** + * Create an aligner object. The aligner object will load and hold the BWT until close() is called. + */ + @Override + public void initialize() { + if(prefix == null) + prefix = getToolkit().getArguments().referenceFile.getAbsolutePath(); + BWTFiles bwtFiles = new BWTFiles(prefix); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + } + + /** + * Aligns a read to the given reference. + * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. + * @param read Read to align. + * @return Number of alignments found for this read. + */ + @Override + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + Iterator alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator(); + if(alignmentIterator.hasNext()) { + int numAlignments = alignmentIterator.next().length; + if(alignmentFrequencies.containsKey(numAlignments)) + alignmentFrequencies.put(numAlignments,alignmentFrequencies.get(numAlignments)+1); + else + alignmentFrequencies.put(numAlignments,1); + } + return 1; + } + + /** + * Initial value for reduce. In this case, validated reads will be counted. + * @return 0, indicating no reads yet validated. + */ + @Override + public Integer reduceInit() { return 0; } + + /** + * Calculates the number of reads processed. + * @param value Number of reads processed by this map. + * @param sum Number of reads processed before this map. + * @return Number of reads processed up to and including this map. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + /** + * Cleanup. + * @param result Number of reads processed. + */ + @Override + public void onTraversalDone(Integer result) { + aligner.close(); + for(Map.Entry alignmentFrequency: alignmentFrequencies.entrySet()) + out.printf("%d\t%d%n", alignmentFrequency.getKey(), alignmentFrequency.getValue()); + super.onTraversalDone(result); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java new file mode 100644 index 000000000..ddbf784f5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java @@ -0,0 +1,38 @@ +package org.broadinstitute.sting.alignment.bwa; + +import org.broadinstitute.sting.alignment.Aligner; + +/** + * Align reads using BWA. + * + * @author mhanna + * @version 0.1 + */ +public abstract class BWAAligner implements Aligner { + /** + * The supporting files used by BWA. + */ + protected BWTFiles bwtFiles; + + /** + * The current configuration for the BWA aligner. + */ + protected BWAConfiguration configuration; + + /** + * Create a new BWAAligner. Purpose of this call is to ensure that all BWA constructors accept the correct + * parameters. + * @param bwtFiles The many files representing BWTs persisted to disk. + * @param configuration Configuration parameters for the alignment. + */ + public BWAAligner(BWTFiles bwtFiles, BWAConfiguration configuration) { + this.bwtFiles = bwtFiles; + this.configuration = configuration; + } + + /** + * Update the configuration passed to the BWA aligner. + * @param configuration New configuration to set. + */ + public abstract void updateConfiguration(BWAConfiguration configuration); +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java new file mode 100644 index 000000000..73441cb6a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java @@ -0,0 +1,44 @@ +package org.broadinstitute.sting.alignment.bwa; + +/** + * Configuration for the BWA/C aligner. + * + * @author mhanna + * @version 0.1 + */ +public class BWAConfiguration { + /** + * The maximum edit distance used by BWA. + */ + public Float maximumEditDistance = null; + + /** + * How many gap opens are acceptable within this alignment? + */ + public Integer maximumGapOpens = null; + + /** + * How many gap extensions are acceptable within this alignment? + */ + public Integer maximumGapExtensions = null; + + /** + * Do we disallow indels within a certain range from the start / end? + */ + public Integer disallowIndelWithinRange = null; + + /** + * What is the scoring penalty for a mismatch? + */ + public Integer mismatchPenalty = null; + + /** + * What is the scoring penalty for a gap open? + */ + public Integer gapOpenPenalty = null; + + /** + * What is the scoring penalty for a gap extension? + */ + public Integer gapExtensionPenalty = null; +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java new file mode 100644 index 000000000..a0589ac84 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java @@ -0,0 +1,234 @@ +package org.broadinstitute.sting.alignment.bwa; + +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; +import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.alignment.reference.bwt.*; +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.IOException; + +/** + * Support files for BWT. + * + * @author mhanna + * @version 0.1 + */ +public class BWTFiles { + /** + * ANN (?) file name. + */ + public final File annFile; + + /** + * AMB (?) file name. + */ + public final File ambFile; + + /** + * Packed reference sequence file. + */ + public final File pacFile; + + /** + * Reverse of packed reference sequence file. + */ + public final File rpacFile; + + /** + * Forward BWT file. + */ + public final File forwardBWTFile; + + /** + * Forward suffix array file. + */ + public final File forwardSAFile; + + /** + * Reverse BWT file. + */ + public final File reverseBWTFile; + + /** + * Reverse suffix array file. + */ + public final File reverseSAFile; + + /** + * Where these files autogenerated on the fly? + */ + public final boolean autogenerated; + + /** + * Create a new BWA configuration file using the given prefix. + * @param prefix Prefix to use when creating the configuration. Must not be null. + */ + public BWTFiles(String prefix) { + if(prefix == null) + throw new ReviewedStingException("Prefix must not be null."); + annFile = new File(prefix + ".ann"); + ambFile = new File(prefix + ".amb"); + pacFile = new File(prefix + ".pac"); + rpacFile = new File(prefix + ".rpac"); + forwardBWTFile = new File(prefix + ".bwt"); + forwardSAFile = new File(prefix + ".sa"); + reverseBWTFile = new File(prefix + ".rbwt"); + reverseSAFile = new File(prefix + ".rsa"); + autogenerated = false; + } + + /** + * Hand-create a new BWTFiles object, specifying a unique file object for each type. + * @param annFile ANN (alternate dictionary) file. + * @param ambFile AMB (holes) files. + * @param pacFile Packed representation of the forward reference sequence. + * @param forwardBWTFile BWT representation of the forward reference sequence. + * @param forwardSAFile SA representation of the forward reference sequence. + * @param rpacFile Packed representation of the reversed reference sequence. + * @param reverseBWTFile BWT representation of the reversed reference sequence. + * @param reverseSAFile SA representation of the reversed reference sequence. + */ + private BWTFiles(File annFile, + File ambFile, + File pacFile, + File forwardBWTFile, + File forwardSAFile, + File rpacFile, + File reverseBWTFile, + File reverseSAFile) { + this.annFile = annFile; + this.ambFile = ambFile; + this.pacFile = pacFile; + this.forwardBWTFile = forwardBWTFile; + this.forwardSAFile = forwardSAFile; + this.rpacFile = rpacFile; + this.reverseBWTFile = reverseBWTFile; + this.reverseSAFile = reverseSAFile; + autogenerated = true; + } + + /** + * Close out this files object, in the process deleting any temporary filse + * that were created. + */ + public void close() { + if(autogenerated) { + boolean success = true; + success = annFile.delete(); + success &= ambFile.delete(); + success &= pacFile.delete(); + success &= forwardBWTFile.delete(); + success &= forwardSAFile.delete(); + success &= rpacFile.delete(); + success &= reverseBWTFile.delete(); + success &= reverseSAFile.delete(); + + if(!success) + throw new ReviewedStingException("Unable to clean up autogenerated representation"); + } + } + + /** + * Create a new set of BWT files from the given reference sequence. + * @param referenceSequence Sequence from which to build metadata. + * @return A new object representing encoded representations of each sequence. + */ + public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) { + byte[] normalizedReferenceSequence = new byte[referenceSequence.length]; + System.arraycopy(referenceSequence,0,normalizedReferenceSequence,0,referenceSequence.length); + normalizeReferenceSequence(normalizedReferenceSequence); + + File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile; + try { + // Write the ann and amb for this reference sequence. + annFile = File.createTempFile("bwt",".ann"); + ambFile = File.createTempFile("bwt",".amb"); + + SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); + dictionary.addSequence(new SAMSequenceRecord("autogenerated",normalizedReferenceSequence.length)); + + ANNWriter annWriter = new ANNWriter(annFile); + annWriter.write(dictionary); + annWriter.close(); + + AMBWriter ambWriter = new AMBWriter(ambFile); + ambWriter.writeEmpty(dictionary); + ambWriter.close(); + + // Write the encoded files for the forward version of this reference sequence. + pacFile = File.createTempFile("bwt",".pac"); + bwtFile = File.createTempFile("bwt",".bwt"); + saFile = File.createTempFile("bwt",".sa"); + + writeEncodedReferenceSequence(normalizedReferenceSequence,pacFile,bwtFile,saFile); + + // Write the encoded files for the reverse version of this reference sequence. + byte[] reverseReferenceSequence = Utils.reverse(normalizedReferenceSequence); + + rpacFile = File.createTempFile("bwt",".rpac"); + rbwtFile = File.createTempFile("bwt",".rbwt"); + rsaFile = File.createTempFile("bwt",".rsa"); + + writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile); + } + catch(IOException ex) { + throw new ReviewedStingException("Unable to write autogenerated reference sequence to temporary files"); + } + + // Make sure that, at the very least, all temporary files are deleted on exit. + annFile.deleteOnExit(); + ambFile.deleteOnExit(); + pacFile.deleteOnExit(); + bwtFile.deleteOnExit(); + saFile.deleteOnExit(); + rpacFile.deleteOnExit(); + rbwtFile.deleteOnExit(); + rsaFile.deleteOnExit(); + + return new BWTFiles(annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile); + } + + /** + * Write the encoded form of the reference sequence. In the case of BWA, the encoded reference + * sequence is the reference itself in PAC format, the BWT, and the suffix array. + * @param referenceSequence The reference sequence to encode. + * @param pacFile Target for the PAC-encoded reference. + * @param bwtFile Target for the BWT representation of the reference. + * @param suffixArrayFile Target for the suffix array encoding of the reference. + * @throws java.io.IOException In case of issues writing to the file. + */ + private static void writeEncodedReferenceSequence(byte[] referenceSequence, + File pacFile, + File bwtFile, + File suffixArrayFile) throws IOException { + PackUtils.writeReferenceSequence(pacFile,referenceSequence); + + BWT bwt = BWT.createFromReferenceSequence(referenceSequence); + BWTWriter bwtWriter = new BWTWriter(bwtFile); + bwtWriter.write(bwt); + bwtWriter.close(); + + SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence); + SuffixArrayWriter suffixArrayWriter = new SuffixArrayWriter(suffixArrayFile); + suffixArrayWriter.write(suffixArray); + suffixArrayWriter.close(); + } + + /** + * Convert the given reference sequence into a form suitable for building into + * on-the-fly sequences. + * @param referenceSequence The reference sequence to normalize. + * @throws org.broadinstitute.sting.utils.exceptions.ReviewedStingException if normalized sequence cannot be generated. + */ + private static void normalizeReferenceSequence(byte[] referenceSequence) { + StringUtil.toUpperCase(referenceSequence); + for(byte base: referenceSequence) { + if(base != 'A' && base != 'C' && base != 'G' && base != 'T') + throw new ReviewedStingException(String.format("Base type %c is not supported when building references on-the-fly",(char)base)); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java new file mode 100644 index 000000000..165314259 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java @@ -0,0 +1,259 @@ +package org.broadinstitute.sting.alignment.bwa.c; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.alignment.bwa.BWAAligner; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Arrays; +import java.util.Iterator; + +/** + * An aligner using the BWA/C implementation. + * + * @author mhanna + * @version 0.1 + */ +public class BWACAligner extends BWAAligner { + static { + System.loadLibrary("bwa"); + } + + /** + * A pointer to the C++ object representing the BWA engine. + */ + private long thunkPointer = 0; + + public BWACAligner(BWTFiles bwtFiles, BWAConfiguration configuration) { + super(bwtFiles,configuration); + if(thunkPointer != 0) + throw new ReviewedStingException("BWA/C attempting to reinitialize."); + + if(!bwtFiles.annFile.exists()) throw new ReviewedStingException("ANN file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.ambFile.exists()) throw new ReviewedStingException("AMB file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.pacFile.exists()) throw new ReviewedStingException("PAC file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.forwardBWTFile.exists()) throw new ReviewedStingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.forwardSAFile.exists()) throw new ReviewedStingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.reverseBWTFile.exists()) throw new ReviewedStingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.reverseSAFile.exists()) throw new ReviewedStingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it."); + + thunkPointer = create(bwtFiles,configuration); + } + + /** + * Create an aligner object using an array of bytes as a reference. + * @param referenceSequence Reference sequence to encode ad-hoc. + * @param configuration Configuration for the given aligner. + */ + public BWACAligner(byte[] referenceSequence, BWAConfiguration configuration) { + this(BWTFiles.createFromReferenceSequence(referenceSequence),configuration); + // Now that the temporary files are created, the temporary files can be destroyed. + bwtFiles.close(); + } + + /** + * Update the configuration passed to the BWA aligner. + * @param configuration New configuration to set. + */ + @Override + public void updateConfiguration(BWAConfiguration configuration) { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C: attempting to update configuration of uninitialized aligner."); + updateConfiguration(thunkPointer,configuration); + } + + /** + * Close this instance of the BWA pointer and delete its resources. + */ + @Override + public void close() { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C close attempted, but BWA/C is not properly initialized."); + destroy(thunkPointer); + } + + /** + * Allow the aligner to choose one alignment randomly from the pile of best alignments. + * @param bases Bases to align. + * @return An align + */ + @Override + public Alignment getBestAlignment(final byte[] bases) { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C getBestAlignment attempted, but BWA/C is not properly initialized."); + return getBestAlignment(thunkPointer,bases); + } + + /** + * Get the best aligned read, chosen randomly from the pile of best alignments. + * @param read Read to align. + * @param newHeader New header to apply to this SAM file. Can be null, but if so, read header must be valid. + * @return Read with injected alignment data. + */ + @Override + public SAMRecord align(final SAMRecord read, final SAMFileHeader newHeader) { + if(bwtFiles.autogenerated) + throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable"); + return Alignment.convertToRead(getBestAlignment(read.getReadBases()),read,newHeader); + } + + /** + * Get a iterator of alignments, batched by mapping quality. + * @param bases List of bases. + * @return Iterator to alignments. + */ + @Override + public Iterable getAllAlignments(final byte[] bases) { + final BWAPath[] paths = getPaths(bases); + return new Iterable() { + public Iterator iterator() { + return new Iterator() { + /** + * The last position accessed. + */ + private int position = 0; + + /** + * Whether all alignments have been seen based on the current position. + * @return True if any more alignments are pending. False otherwise. + */ + public boolean hasNext() { return position < paths.length; } + + /** + * Return the next cross-section of alignments, based on mapping quality. + * @return Array of the next set of alignments of a given mapping quality. + */ + public Alignment[] next() { + if(position >= paths.length) + throw new UnsupportedOperationException("Out of alignments to return."); + int score = paths[position].score; + int startingPosition = position; + while(position < paths.length && paths[position].score == score) position++; + return convertPathsToAlignments(bases,Arrays.copyOfRange(paths,startingPosition,position)); + } + + /** + * Unsupported. + */ + public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); } + }; + } + }; + } + + /** + * Get a iterator of aligned reads, batched by mapping quality. + * @param read Read to align. + * @param newHeader Optional new header to use when aligning the read. If present, it must be null. + * @return Iterator to alignments. + */ + @Override + public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader) { + if(bwtFiles.autogenerated) + throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable"); + final Iterable alignments = getAllAlignments(read.getReadBases()); + return new Iterable() { + public Iterator iterator() { + final Iterator alignmentIterator = alignments.iterator(); + return new Iterator() { + /** + * Whether all alignments have been seen based on the current position. + * @return True if any more alignments are pending. False otherwise. + */ + public boolean hasNext() { return alignmentIterator.hasNext(); } + + /** + * Return the next cross-section of alignments, based on mapping quality. + * @return Array of the next set of alignments of a given mapping quality. + */ + public SAMRecord[] next() { + Alignment[] alignmentsOfQuality = alignmentIterator.next(); + SAMRecord[] reads = new SAMRecord[alignmentsOfQuality.length]; + for(int i = 0; i < alignmentsOfQuality.length; i++) { + reads[i] = Alignment.convertToRead(alignmentsOfQuality[i],read,newHeader); + } + return reads; + } + + /** + * Unsupported. + */ + public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); } + }; + } + }; + } + + /** + * Get the paths associated with the given base string. + * @param bases List of bases. + * @return A set of paths through the BWA. + */ + public BWAPath[] getPaths(byte[] bases) { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C getPaths attempted, but BWA/C is not properly initialized."); + return getPaths(thunkPointer,bases); + } + + /** + * Create a pointer to the BWA/C thunk. + * @param files BWT source files. + * @param configuration Configuration of the aligner. + * @return Pointer to the BWA/C thunk. + */ + protected native long create(BWTFiles files, BWAConfiguration configuration); + + /** + * Update the configuration passed to the BWA aligner. For internal use only. + * @param thunkPointer pointer to BWA object. + * @param configuration New configuration to set. + */ + protected native void updateConfiguration(long thunkPointer, BWAConfiguration configuration); + + /** + * Destroy the BWA/C thunk. + * @param thunkPointer Pointer to the allocated thunk. + */ + protected native void destroy(long thunkPointer); + + /** + * Do the extra steps involved in converting a local alignment to a global alignment. + * @param bases ASCII representation of byte array. + * @param paths Paths through the current BWT. + * @return A list of alignments. + */ + protected Alignment[] convertPathsToAlignments(byte[] bases, BWAPath[] paths) { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C convertPathsToAlignments attempted, but BWA/C is not properly initialized."); + return convertPathsToAlignments(thunkPointer,bases,paths); + } + + /** + * Caller to the path generation functionality within BWA/C. Call this method's getPaths() wrapper (above) instead. + * @param thunkPointer pointer to the C++ object managing BWA/C. + * @param bases ASCII representation of byte array. + * @return A list of paths through the specified BWT. + */ + protected native BWAPath[] getPaths(long thunkPointer, byte[] bases); + + /** + * Do the extra steps involved in converting a local alignment to a global alignment. + * Call this method's convertPathsToAlignments() wrapper (above) instead. + * @param thunkPointer pointer to the C++ object managing BWA/C. + * @param bases ASCII representation of byte array. + * @param paths Paths through the current BWT. + * @return A list of alignments. + */ + protected native Alignment[] convertPathsToAlignments(long thunkPointer, byte[] bases, BWAPath[] paths); + + /** + * Gets the best alignment from BWA/C, randomly selected from all best-aligned reads. + * @param thunkPointer Pointer to BWA thunk. + * @param bases bases to align. + * @return The best alignment from BWA/C. + */ + protected native Alignment getBestAlignment(long thunkPointer, byte[] bases); +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWAPath.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWAPath.java new file mode 100755 index 000000000..347d4344f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWAPath.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment.bwa.c; + +/** + * Models a BWA path. + * + * @author mhanna + * @version 0.1 + */ +public class BWAPath { + /** + * Number of mismatches encountered along this path. + */ + public final int numMismatches; + + /** + * Number of gap opens encountered along this path. + */ + public final int numGapOpens; + + /** + * Number of gap extensions along this path. + */ + public final int numGapExtensions; + + /** + * Whether this alignment was found on the positive or negative strand. + */ + public final boolean negativeStrand; + + /** + * Starting coordinate in the BWT. + */ + public final long k; + + /** + * Ending coordinate in the BWT. + */ + public final long l; + + /** + * The score of this path. + */ + public final int score; + + /** + * The number of best alignments seen along this path. + */ + public final int bestCount; + + /** + * The number of second best alignments seen along this path. + */ + public final int secondBestCount; + + /** + * Create a new path with the given attributes. + * @param numMismatches Number of mismatches along path. + * @param numGapOpens Number of gap opens along path. + * @param numGapExtensions Number of gap extensions along path. + * @param k Index to first coordinate within BWT. + * @param l Index to last coordinate within BWT. + * @param score Score of this alignment. Not the mapping quality. + */ + public BWAPath(int numMismatches, int numGapOpens, int numGapExtensions, boolean negativeStrand, long k, long l, int score, int bestCount, int secondBestCount) { + this.numMismatches = numMismatches; + this.numGapOpens = numGapOpens; + this.numGapExtensions = numGapExtensions; + this.negativeStrand = negativeStrand; + this.k = k; + this.l = l; + this.score = score; + this.bestCount = bestCount; + this.secondBestCount = secondBestCount; + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java new file mode 100644 index 000000000..2d568a96a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java @@ -0,0 +1,164 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.*; +import org.broadinstitute.sting.alignment.Aligner; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileNotFoundException; + +/** + * A test harness to ensure that the perfect aligner works. + * + * @author mhanna + * @version 0.1 + */ +public class AlignerTestHarness { + public static void main( String argv[] ) throws FileNotFoundException { + if( argv.length != 6 ) { + System.out.println("PerfectAlignerTestHarness "); + System.exit(1); + } + + File referenceFile = new File(argv[0]); + File bwtFile = new File(argv[1]); + File rbwtFile = new File(argv[2]); + File suffixArrayFile = new File(argv[3]); + File reverseSuffixArrayFile = new File(argv[4]); + File bamFile = new File(argv[5]); + + align(referenceFile,bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile,bamFile); + } + + private static void align(File referenceFile, File bwtFile, File rbwtFile, File suffixArrayFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException { + Aligner aligner = new BWAJavaAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile); + int count = 0; + + SAMFileReader reader = new SAMFileReader(bamFile); + reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); + + int mismatches = 0; + int failures = 0; + + for(SAMRecord read: reader) { + count++; + if( count > 200000 ) break; + //if( count < 366000 ) continue; + //if( count > 2 ) break; + //if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") ) + // continue; + //if( !read.getReadName().endsWith("SL-XBC:1:36:30:1926#0") ) + // continue; + //if( !read.getReadName().endsWith("SL-XBC:1:60:1342:1340#0") ) + // continue; + + SAMRecord alignmentCleaned = null; + try { + alignmentCleaned = (SAMRecord)read.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new ReviewedStingException("SAMRecord clone not supported", ex); + } + + if( alignmentCleaned.getReadNegativeStrandFlag() ) + alignmentCleaned.setReadBases(BaseUtils.simpleReverseComplement(alignmentCleaned.getReadBases())); + + alignmentCleaned.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); + alignmentCleaned.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); + alignmentCleaned.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY); + alignmentCleaned.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR); + + // Clear everything except flags pertaining to pairing and set 'unmapped' status to true. + alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C); + + Iterable alignments = aligner.getAllAlignments(alignmentCleaned.getReadBases()); + if(!alignments.iterator().hasNext() ) { + //throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count)); + System.out.printf("Unable to align read %s to reference; count = %d%n",read.getReadName(),count); + failures++; + } + + Alignment foundAlignment = null; + for(Alignment[] alignmentsOfQuality: alignments) { + for(Alignment alignment: alignmentsOfQuality) { + if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() ) + continue; + if( read.getAlignmentStart() != alignment.getAlignmentStart() ) + continue; + + foundAlignment = alignment; + } + } + + if( foundAlignment != null ) { + //System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions()); + } + else { + System.out.printf("Error aligning read %s%n", read.getReadName()); + + mismatches++; + + IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile); + + System.out.printf("read = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(read.getReadString(),read.getCigar(),CigarOperator.DELETION), + read.getAlignmentStart(), + read.getReadNegativeStrandFlag()); + int numDeletions = numDeletionsInCigar(read.getCigar()); + String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()+numDeletions-1).getBases()); + System.out.printf("expected ref = %s%n", formatBasesBasedOnCigar(expectedRef,read.getCigar(),CigarOperator.INSERTION)); + + for(Alignment[] alignmentsOfQuality: alignments) { + for(Alignment alignment: alignmentsOfQuality) { + System.out.println(); + + Cigar cigar = ((BWAAlignment)alignment).getCigar(); + + System.out.printf("read = %s%n", formatBasesBasedOnCigar(read.getReadString(),cigar,CigarOperator.DELETION)); + + int deletionCount = ((BWAAlignment)alignment).getNumberOfBasesMatchingState(AlignmentState.DELETION); + String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignment.getAlignmentStart(),alignment.getAlignmentStart()+read.getReadLength()+deletionCount-1).getBases()); + System.out.printf("actual ref = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(alignedRef,cigar,CigarOperator.INSERTION), + alignment.getAlignmentStart(), + alignment.isNegativeStrand()); + } + } + + //throw new StingException(String.format("Read %s was placed at incorrect location; count = %d%n",read.getReadName(),count)); + } + + + if( count % 1000 == 0 ) + System.out.printf("%d reads examined.%n",count); + } + + System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures); + } + + private static String formatBasesBasedOnCigar( String bases, Cigar cigar, CigarOperator toBlank ) { + StringBuilder formatted = new StringBuilder(); + int readIndex = 0; + for(CigarElement cigarElement: cigar.getCigarElements()) { + if(cigarElement.getOperator() == toBlank) { + int number = cigarElement.getLength(); + while( number-- > 0 ) formatted.append(' '); + } + else { + int number = cigarElement.getLength(); + while( number-- > 0 ) formatted.append(bases.charAt(readIndex++)); + } + } + return formatted.toString(); + } + + private static int numDeletionsInCigar( Cigar cigar ) { + int numDeletions = 0; + for(CigarElement cigarElement: cigar.getCigarElements()) { + if(cigarElement.getOperator() == CigarOperator.DELETION) + numDeletions += cigarElement.getLength(); + } + return numDeletions; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentMatchSequence.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentMatchSequence.java new file mode 100644 index 000000000..f1e3c31b6 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentMatchSequence.java @@ -0,0 +1,150 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Iterator; + +/** + * Represents a sequence of matches. + * + * @author mhanna + * @version 0.1 + */ +public class AlignmentMatchSequence implements Cloneable { + /** + * Stores the particular match entries in the order they occur. + */ + private Deque entries = new ArrayDeque(); + + /** + * Clone the given match sequence. + * @return A deep copy of the current match sequence. + */ + public AlignmentMatchSequence clone() { + AlignmentMatchSequence copy = null; + try { + copy = (AlignmentMatchSequence)super.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new ReviewedStingException("Unable to clone AlignmentMatchSequence."); + } + + copy.entries = new ArrayDeque(); + for( AlignmentMatchSequenceEntry entry: entries ) + copy.entries.add(entry.clone()); + + return copy; + } + + public Cigar convertToCigar(boolean negativeStrand) { + Cigar cigar = new Cigar(); + Iterator iterator = negativeStrand ? entries.descendingIterator() : entries.iterator(); + while( iterator.hasNext() ) { + AlignmentMatchSequenceEntry entry = iterator.next(); + CigarOperator operator; + switch( entry.getAlignmentState() ) { + case MATCH_MISMATCH: operator = CigarOperator.MATCH_OR_MISMATCH; break; + case INSERTION: operator = CigarOperator.INSERTION; break; + case DELETION: operator = CigarOperator.DELETION; break; + default: throw new ReviewedStingException("convertToCigar: cannot process state: " + entry.getAlignmentState()); + } + cigar.add( new CigarElement(entry.count,operator) ); + } + return cigar; + } + + /** + * All a new alignment of the given state. + * @param state State to add to the sequence. + */ + public void addNext( AlignmentState state ) { + AlignmentMatchSequenceEntry last = entries.peekLast(); + // If the last entry is the same as this one, increment it. Otherwise, add a new entry. + if( last != null && last.alignmentState == state ) + last.increment(); + else + entries.add(new AlignmentMatchSequenceEntry(state)); + } + + /** + * Gets the current state of this alignment (what's the state of the last base?) + * @return State of the most recently aligned base. + */ + public AlignmentState getCurrentState() { + if( entries.size() == 0 ) + return AlignmentState.MATCH_MISMATCH; + return entries.peekLast().getAlignmentState(); + } + + /** + * How many bases in the read match the given state. + * @param state State to test. + * @return number of bases which match that state. + */ + public int getNumberOfBasesMatchingState(AlignmentState state) { + int matches = 0; + for( AlignmentMatchSequenceEntry entry: entries ) { + if( entry.getAlignmentState() == state ) + matches += entry.count; + } + return matches; + } + + /** + * Stores an individual match sequence entry. + */ + private class AlignmentMatchSequenceEntry implements Cloneable { + /** + * The state of the alignment throughout a given point in the sequence. + */ + private final AlignmentState alignmentState; + + /** + * The number of bases having this particular state. + */ + private int count; + + /** + * Create a new sequence entry with the given state. + * @param alignmentState The state that this sequence should contain. + */ + AlignmentMatchSequenceEntry( AlignmentState alignmentState ) { + this.alignmentState = alignmentState; + this.count = 1; + } + + /** + * Clone the given match sequence entry. + * @return A deep copy of the current match sequence entry. + */ + public AlignmentMatchSequenceEntry clone() { + try { + return (AlignmentMatchSequenceEntry)super.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new ReviewedStingException("Unable to clone AlignmentMatchSequenceEntry."); + } + } + + /** + * Retrieves the current state of the alignment. + * @return The state of the current sequence. + */ + AlignmentState getAlignmentState() { + return alignmentState; + } + + /** + * Increment the count of alignments having this particular state. + */ + void increment() { + count++; + } + } +} + diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentState.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentState.java new file mode 100644 index 000000000..92c603335 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentState.java @@ -0,0 +1,13 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +/** + * The state of a given base in the alignment. + * + * @author mhanna + * @version 0.1 + */ +public enum AlignmentState { + MATCH_MISMATCH, + INSERTION, + DELETION +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java new file mode 100644 index 000000000..f3b515dba --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java @@ -0,0 +1,190 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import net.sf.samtools.Cigar; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +/** + * An alignment object to be used incrementally as the BWA aligner + * inspects the read. + * + * @author mhanna + * @version 0.1 + */ +public class BWAAlignment extends Alignment implements Cloneable { + /** + * Track the number of alignments that have been created. + */ + private static long numCreated; + + /** + * Which number alignment is this? + */ + private long creationNumber; + + /** + * The aligner performing the alignments. + */ + protected BWAJavaAligner aligner; + + /** + * The sequence of matches/mismatches/insertions/deletions. + */ + private AlignmentMatchSequence alignmentMatchSequence = new AlignmentMatchSequence(); + + /** + * Working variable. How many bases have been matched at this point. + */ + protected int position; + + /** + * Working variable. How many mismatches have been encountered at this point. + */ + private int mismatches; + + /** + * Number of gap opens in alignment. + */ + private int gapOpens; + + /** + * Number of gap extensions in alignment. + */ + private int gapExtensions; + + /** + * Working variable. The lower bound of the alignment within the BWT. + */ + protected long loBound; + + /** + * Working variable. The upper bound of the alignment within the BWT. + */ + protected long hiBound; + + protected void setAlignmentStart(long position) { + this.alignmentStart = position; + } + + protected void setNegativeStrand(boolean negativeStrand) { + this.negativeStrand = negativeStrand; + } + + /** + * Cache the score. + */ + private int score; + + public Cigar getCigar() { + return alignmentMatchSequence.convertToCigar(isNegativeStrand()); + } + + /** + * Gets the current state of this alignment (state of the last base viewed).. + * @return Current state of the alignment. + */ + public AlignmentState getCurrentState() { + return alignmentMatchSequence.getCurrentState(); + } + + /** + * Adds the given state to the current alignment. + * @param state State to add to the given alignment. + */ + public void addState( AlignmentState state ) { + alignmentMatchSequence.addNext(state); + } + + /** + * Gets the BWA score of this alignment. + * @return BWA-style scores. 0 is best. + */ + public int getScore() { + return score; + } + + public int getMismatches() { return mismatches; } + public int getGapOpens() { return gapOpens; } + public int getGapExtensions() { return gapExtensions; } + + public void incrementMismatches() { + this.mismatches++; + updateScore(); + } + + public void incrementGapOpens() { + this.gapOpens++; + updateScore(); + } + + public void incrementGapExtensions() { + this.gapExtensions++; + updateScore(); + } + + /** + * Updates the score based on new information about matches / mismatches. + */ + private void updateScore() { + score = mismatches*aligner.MISMATCH_PENALTY + gapOpens*aligner.GAP_OPEN_PENALTY + gapExtensions*aligner.GAP_EXTENSION_PENALTY; + } + + /** + * Create a new alignment with the given parent aligner. + * @param aligner Aligner being used. + */ + public BWAAlignment( BWAJavaAligner aligner ) { + this.aligner = aligner; + this.creationNumber = numCreated++; + } + + /** + * Clone the alignment. + * @return New instance of the alignment. + */ + public BWAAlignment clone() { + BWAAlignment newAlignment = null; + try { + newAlignment = (BWAAlignment)super.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new ReviewedStingException("Unable to clone BWAAlignment."); + } + newAlignment.creationNumber = numCreated++; + newAlignment.alignmentMatchSequence = alignmentMatchSequence.clone(); + + return newAlignment; + } + + /** + * How many bases in the read match the given state. + * @param state State to test. + * @return number of bases which match that state. + */ + public int getNumberOfBasesMatchingState(AlignmentState state) { + return alignmentMatchSequence.getNumberOfBasesMatchingState(state); + } + + /** + * Compare this alignment to another alignment. + * @param rhs Other alignment to which to compare. + * @return < 0 if this < other, == 0 if this == other, > 0 if this > other + */ + public int compareTo(Alignment rhs) { + BWAAlignment other = (BWAAlignment)rhs; + + // If the scores are different, disambiguate using the score. + if(score != other.score) + return score > other.score ? 1 : -1; + + // Otherwise, use the order in which the elements were created. + if(creationNumber != other.creationNumber) + return creationNumber > other.creationNumber ? -1 : 1; + + return 0; + } + + public String toString() { + return String.format("position: %d, strand: %b, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d, creationNumber: %d", position, negativeStrand, alignmentMatchSequence.getCurrentState(), mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore(), creationNumber); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java new file mode 100644 index 000000000..fbeac9192 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java @@ -0,0 +1,393 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.alignment.bwa.BWAAligner; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.reference.bwt.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.PriorityQueue; + +/** + * Create imperfect alignments from the read to the genome represented by the given BWT / suffix array. + * + * @author mhanna + * @version 0.1 + */ +public class BWAJavaAligner extends BWAAligner { + /** + * BWT in the forward direction. + */ + private BWT forwardBWT; + + /** + * BWT in the reverse direction. + */ + private BWT reverseBWT; + + /** + * Suffix array in the forward direction. + */ + private SuffixArray forwardSuffixArray; + + /** + * Suffix array in the reverse direction. + */ + private SuffixArray reverseSuffixArray; + + /** + * Maximum edit distance (-n option from original BWA). + */ + private final int MAXIMUM_EDIT_DISTANCE = 4; + + /** + * Maximum number of gap opens (-o option from original BWA). + */ + private final int MAXIMUM_GAP_OPENS = 1; + + /** + * Maximum number of gap extensions (-e option from original BWA). + */ + private final int MAXIMUM_GAP_EXTENSIONS = 6; + + /** + * Penalty for straight mismatches (-M option from original BWA). + */ + public final int MISMATCH_PENALTY = 3; + + /** + * Penalty for gap opens (-O option from original BWA). + */ + public final int GAP_OPEN_PENALTY = 11; + + /** + * Penalty for gap extensions (-E option from original BWA). + */ + public final int GAP_EXTENSION_PENALTY = 4; + + /** + * Skip the ends of indels. + */ + public final int INDEL_END_SKIP = 5; + + public BWAJavaAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) { + super(null,null); + forwardBWT = new BWTReader(forwardBWTFile).read(); + reverseBWT = new BWTReader(reverseBWTFile).read(); + forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile,forwardBWT).read(); + reverseSuffixArray = new SuffixArrayReader(reverseSuffixArrayFile,reverseBWT).read(); + } + + /** + * Close this instance of the BWA pointer and delete its resources. + */ + @Override + public void close() { + throw new UnsupportedOperationException("BWA aligner can't currently be closed."); + } + + /** + * Update the current parameters of this aligner. + * @param configuration New configuration to set. + */ + public void updateConfiguration(BWAConfiguration configuration) { + throw new UnsupportedOperationException("Configuration of the BWA aligner can't currently be changed."); + } + + /** + * Allow the aligner to choose one alignment randomly from the pile of best alignments. + * @param bases Bases to align. + * @return An align + */ + public Alignment getBestAlignment(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Align the read to the reference. + * @param read Read to align. + * @param header Optional header to drop in place. + * @return A list of the alignments. + */ + public SAMRecord align(final SAMRecord read, final SAMFileHeader header) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Get a iterator of alignments, batched by mapping quality. + * @param bases List of bases. + * @return Iterator to alignments. + */ + public Iterable getAllAlignments(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Get a iterator of aligned reads, batched by mapping quality. + * @param read Read to align. + * @param newHeader Optional new header to use when aligning the read. If present, it must be null. + * @return Iterator to alignments. + */ + public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + + public List align( SAMRecord read ) { + List successfulMatches = new ArrayList(); + + Byte[] uncomplementedBases = normalizeBases(read.getReadBases()); + Byte[] complementedBases = normalizeBases(Utils.reverse(BaseUtils.simpleReverseComplement(read.getReadBases()))); + + List forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT); + List reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT); + + // Seed the best score with any score that won't overflow on comparison. + int bestScore = Integer.MAX_VALUE - MISMATCH_PENALTY; + int bestDiff = MAXIMUM_EDIT_DISTANCE+1; + int maxDiff = MAXIMUM_EDIT_DISTANCE; + + PriorityQueue alignments = new PriorityQueue(); + + // Create a fictional initial alignment, with the position just off the end of the read, and the limits + // set as the entire BWT. + alignments.add(createSeedAlignment(reverseBWT)); + alignments.add(createSeedAlignment(forwardBWT)); + + while(!alignments.isEmpty()) { + BWAAlignment alignment = alignments.remove(); + + // From bwtgap.c in the original BWT; if the rank is worse than the best score + the mismatch PENALTY, move on. + if( alignment.getScore() > bestScore + MISMATCH_PENALTY ) + break; + + Byte[] bases = alignment.isNegativeStrand() ? complementedBases : uncomplementedBases; + BWT bwt = alignment.isNegativeStrand() ? forwardBWT : reverseBWT; + List lowerBounds = alignment.isNegativeStrand() ? reverseLowerBounds : forwardLowerBounds; + + // if z < D(i) then return {} + int mismatches = maxDiff - alignment.getMismatches() - alignment.getGapOpens() - alignment.getGapExtensions(); + if( alignment.position < lowerBounds.size()-1 && mismatches < lowerBounds.get(alignment.position+1).value ) + continue; + + if(mismatches == 0) { + exactMatch(alignment,bases,bwt); + if(alignment.loBound > alignment.hiBound) + continue; + } + + // Found a valid alignment; store it and move on. + if(alignment.position >= read.getReadLength()-1) { + for(long bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++) { + BWAAlignment finalAlignment = alignment.clone(); + + if( finalAlignment.isNegativeStrand() ) + finalAlignment.setAlignmentStart(forwardSuffixArray.get(bwtIndex) + 1); + else { + int sizeAlongReference = read.getReadLength() - + finalAlignment.getNumberOfBasesMatchingState(AlignmentState.INSERTION) + + finalAlignment.getNumberOfBasesMatchingState(AlignmentState.DELETION); + finalAlignment.setAlignmentStart(reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1); + } + + successfulMatches.add(finalAlignment); + + bestScore = Math.min(finalAlignment.getScore(),bestScore); + bestDiff = Math.min(finalAlignment.getMismatches()+finalAlignment.getGapOpens()+finalAlignment.getGapExtensions(),bestDiff); + maxDiff = bestDiff + 1; + } + + continue; + } + + //System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d, base = %s%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value, alignment.position >= 0 ? (char)bases[alignment.position].byteValue() : ""); + /* + System.out.printf("#1\t[%d,%d,%d,%c]\t[%d,%d,%d]\t[%d,%d]\t[%d,%d]%n",alignments.size(), + alignment.negativeStrand?1:0, + bases.length-alignment.position-1, + alignment.getCurrentState().toString().charAt(0), + alignment.getMismatches(), + alignment.getGapOpens(), + alignment.getGapExtensions(), + lowerBounds.get(alignment.position+1).value, + lowerBounds.get(alignment.position+1).width, + alignment.loBound, + alignment.hiBound); + */ + + // Temporary -- look ahead to see if the next alignment is bounded. + boolean allowDifferences = mismatches > 0; + boolean allowMismatches = mismatches > 0; + + if( allowDifferences && + alignment.position+1 >= INDEL_END_SKIP-1+alignment.getGapOpens()+alignment.getGapExtensions() && + read.getReadLength()-1-(alignment.position+1) >= INDEL_END_SKIP+alignment.getGapOpens()+alignment.getGapExtensions() ) { + if( alignment.getCurrentState() == AlignmentState.MATCH_MISMATCH ) { + if( alignment.getGapOpens() < MAXIMUM_GAP_OPENS ) { + // Add a potential insertion extension. + BWAAlignment insertionAlignment = createInsertionAlignment(alignment); + insertionAlignment.incrementGapOpens(); + alignments.add(insertionAlignment); + + // Add a potential deletion by marking a deletion and augmenting the position. + List deletionAlignments = createDeletionAlignments(bwt,alignment); + for( BWAAlignment deletionAlignment: deletionAlignments ) + deletionAlignment.incrementGapOpens(); + alignments.addAll(deletionAlignments); + } + } + else if( alignment.getCurrentState() == AlignmentState.INSERTION ) { + if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) { + // Add a potential insertion extension. + BWAAlignment insertionAlignment = createInsertionAlignment(alignment); + insertionAlignment.incrementGapExtensions(); + alignments.add(insertionAlignment); + } + } + else if( alignment.getCurrentState() == AlignmentState.DELETION ) { + if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) { + // Add a potential deletion by marking a deletion and augmenting the position. + List deletionAlignments = createDeletionAlignments(bwt,alignment); + for( BWAAlignment deletionAlignment: deletionAlignments ) + deletionAlignment.incrementGapExtensions(); + alignments.addAll(deletionAlignments); + } + } + } + + // Mismatches + alignments.addAll(createMatchedAlignments(bwt,alignment,bases,allowDifferences&&allowMismatches)); + } + + return successfulMatches; + } + + /** + * Create an seeding alignment to use as a starting point when traversing. + * @param bwt source BWT. + * @return Seed alignment. + */ + private BWAAlignment createSeedAlignment(BWT bwt) { + BWAAlignment seed = new BWAAlignment(this); + seed.setNegativeStrand(bwt == forwardBWT); + seed.position = -1; + seed.loBound = 0; + seed.hiBound = bwt.length(); + return seed; + } + + /** + * Creates a new alignments representing direct matches / mismatches. + * @param bwt Source BWT with which to work. + * @param alignment Alignment for the previous position. + * @param bases The bases in the read. + * @param allowMismatch Should mismatching bases be allowed? + * @return New alignment representing this position if valid; null otherwise. + */ + private List createMatchedAlignments( BWT bwt, BWAAlignment alignment, Byte[] bases, boolean allowMismatch ) { + List newAlignments = new ArrayList(); + + List baseChoices = new ArrayList(); + Byte thisBase = bases[alignment.position+1]; + + if( allowMismatch ) + baseChoices.addAll(Bases.allOf()); + else + baseChoices.add(thisBase); + + if( thisBase != null ) { + // Keep rotating the current base to the last position until we've hit the current base. + for( ;; ) { + baseChoices.add(baseChoices.remove(0)); + if( thisBase.equals(baseChoices.get(baseChoices.size()-1)) ) + break; + + } + } + + for(byte base: baseChoices) { + BWAAlignment newAlignment = alignment.clone(); + + newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + + // If this alignment is valid, skip it. + if( newAlignment.loBound > newAlignment.hiBound ) + continue; + + newAlignment.position++; + newAlignment.addState(AlignmentState.MATCH_MISMATCH); + if( bases[newAlignment.position] == null || base != bases[newAlignment.position] ) + newAlignment.incrementMismatches(); + + newAlignments.add(newAlignment); + } + + return newAlignments; + } + + /** + * Create a new alignment representing an insertion at this point in the read. + * @param alignment Alignment from which to derive the insertion. + * @return New alignment reflecting the insertion. + */ + private BWAAlignment createInsertionAlignment( BWAAlignment alignment ) { + // Add a potential insertion extension. + BWAAlignment newAlignment = alignment.clone(); + newAlignment.position++; + newAlignment.addState(AlignmentState.INSERTION); + return newAlignment; + } + + /** + * Create new alignments representing a deletion at this point in the read. + * @param bwt source BWT for inferring deletion info. + * @param alignment Alignment from which to derive the deletion. + * @return New alignments reflecting all possible deletions. + */ + private List createDeletionAlignments( BWT bwt, BWAAlignment alignment) { + List newAlignments = new ArrayList(); + for(byte base: Bases.instance) { + BWAAlignment newAlignment = alignment.clone(); + + newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + + // If this alignment is valid, skip it. + if( newAlignment.loBound > newAlignment.hiBound ) + continue; + + newAlignment.addState(AlignmentState.DELETION); + + newAlignments.add(newAlignment); + } + + return newAlignments; + } + + /** + * Exactly match the given alignment against the given BWT. + * @param alignment Alignment to match. + * @param bases Bases to use. + * @param bwt BWT to use. + */ + private void exactMatch( BWAAlignment alignment, Byte[] bases, BWT bwt ) { + while( ++alignment.position < bases.length ) { + byte base = bases[alignment.position]; + alignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + alignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + if( alignment.loBound > alignment.hiBound ) + return; + } + } + + /** + * Make each base into A/C/G/T or null if unknown. + * @param bases Base string to normalize. + * @return Array of normalized bases. + */ + private Byte[] normalizeBases( byte[] bases ) { + Byte[] normalBases = new Byte[bases.length]; + for(int i = 0; i < bases.length; i++) + normalBases[i] = Bases.fromASCII(bases[i]); + return normalBases; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/LowerBound.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/LowerBound.java new file mode 100644 index 000000000..be7514255 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/LowerBound.java @@ -0,0 +1,88 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import org.broadinstitute.sting.alignment.reference.bwt.BWT; + +import java.util.ArrayList; +import java.util.List; + +/** + * At any point along the given read, what is a good lower bound for the + * total number of differences? + * + * @author mhanna + * @version 0.1 + */ +public class LowerBound { + /** + * Lower bound of the suffix array. + */ + public final long loIndex; + + /** + * Upper bound of the suffix array. + */ + public final long hiIndex; + + /** + * Width of the bwt from loIndex -> hiIndex, inclusive. + */ + public final long width; + + /** + * The lower bound at the given point. + */ + public final int value; + + /** + * Create a new lower bound with the given value. + * @param loIndex The lower bound of the BWT. + * @param hiIndex The upper bound of the BWT. + * @param value Value for the lower bound at this site. + */ + private LowerBound(long loIndex, long hiIndex, int value) { + this.loIndex = loIndex; + this.hiIndex = hiIndex; + this.width = hiIndex - loIndex + 1; + this.value = value; + } + + /** + * Create a non-optimal bound according to the algorithm specified in Figure 3 of the BWA paper. + * @param bases Bases of the read to use when creating a new BWT. + * @param bwt BWT to check against. + * @return A list of lower bounds at every point in the reference. + * + */ + public static List create(Byte[] bases, BWT bwt) { + List bounds = new ArrayList(); + + long loIndex = 0, hiIndex = bwt.length(); + int mismatches = 0; + for( int i = bases.length-1; i >= 0; i-- ) { + Byte base = bases[i]; + + // Ignore non-ACGT bases. + if( base != null ) { + loIndex = bwt.counts(base) + bwt.occurrences(base,loIndex-1) + 1; + hiIndex = bwt.counts(base) + bwt.occurrences(base,hiIndex); + } + + if( base == null || loIndex > hiIndex ) { + loIndex = 0; + hiIndex = bwt.length(); + mismatches++; + } + bounds.add(0,new LowerBound(loIndex,hiIndex,mismatches)); + } + + return bounds; + } + + /** + * Create a string representation of this bound. + * @return String version of this bound. + */ + public String toString() { + return String.format("LowerBound: w = %d, value = %d",width,value); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/package-info.java b/public/java/src/org/broadinstitute/sting/alignment/package-info.java new file mode 100644 index 000000000..60cf1e425 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/package-info.java @@ -0,0 +1,4 @@ +/** + * Analyses used to validate the correctness and performance the BWA Java bindings. + */ +package org.broadinstitute.sting.alignment; \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/AMBWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/AMBWriter.java new file mode 100644 index 000000000..ec10415dd --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/AMBWriter.java @@ -0,0 +1,68 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; + +/** + * Writes .amb files - a file indicating where 'holes' (indeterminant bases) + * exist in the contig. Currently, only empty, placeholder AMBs are supported. + * + * @author mhanna + * @version 0.1 + */ +public class AMBWriter { + /** + * Number of holes is fixed at zero. + */ + private static final int NUM_HOLES = 0; + + /** + * Input stream from which to read BWT data. + */ + private final PrintStream out; + + /** + * Create a new ANNWriter targeting the given file. + * @param file file into which ANN data should be written. + * @throws java.io.IOException if there is a problem opening the output file. + */ + public AMBWriter(File file) throws IOException { + out = new PrintStream(file); + } + + /** + * Create a new ANNWriter targeting the given OutputStream. + * @param stream Stream into which ANN data should be written. + */ + public AMBWriter(OutputStream stream) { + out = new PrintStream(stream); + } + + /** + * Write the contents of the given dictionary into the AMB file. + * Assumes that there are no holes in the dictionary. + * @param dictionary Dictionary to write. + */ + public void writeEmpty(SAMSequenceDictionary dictionary) { + long genomeLength = 0L; + for(SAMSequenceRecord sequence: dictionary.getSequences()) + genomeLength += sequence.getSequenceLength(); + + int sequences = dictionary.getSequences().size(); + + // Write the header + out.printf("%d %d %d%n",genomeLength,sequences,NUM_HOLES); + } + + /** + * Close the given output stream. + */ + public void close() { + out.close(); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/ANNWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/ANNWriter.java new file mode 100644 index 000000000..8d692a9e7 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/ANNWriter.java @@ -0,0 +1,95 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; + +/** + * Writes .ann files - an alternate sequence dictionary format + * used by BWA/C. For best results, the input sequence dictionary + * should be created with Picard's CreateSequenceDictionary.jar, + * TRUNCATE_NAMES_AT_WHITESPACE=false. + * + * @author mhanna + * @version 0.1 + */ +public class ANNWriter { + /** + * BWA uses a fixed seed of 11, written into every file. + */ + private static final int BNS_SEED = 11; + + /** + * A seemingly unused value that appears in every contig in the ANN. + */ + private static final int GI = 0; + + /** + * Input stream from which to read BWT data. + */ + private final PrintStream out; + + /** + * Create a new ANNWriter targeting the given file. + * @param file file into which ANN data should be written. + * @throws IOException if there is a problem opening the output file. + */ + public ANNWriter(File file) throws IOException { + out = new PrintStream(file); + } + + /** + * Create a new ANNWriter targeting the given OutputStream. + * @param stream Stream into which ANN data should be written. + */ + public ANNWriter(OutputStream stream) { + out = new PrintStream(stream); + } + + /** + * Write the contents of the given dictionary into the ANN file. + * Assumes that no ambs (blocks of indeterminate base) are present in the dictionary. + * @param dictionary Dictionary to write. + */ + public void write(SAMSequenceDictionary dictionary) { + long genomeLength = 0L; + for(SAMSequenceRecord sequence: dictionary.getSequences()) + genomeLength += sequence.getSequenceLength(); + + int sequences = dictionary.getSequences().size(); + + // Write the header + out.printf("%d %d %d%n",genomeLength,sequences,BNS_SEED); + + for(SAMSequenceRecord sequence: dictionary.getSequences()) { + String fullSequenceName = sequence.getSequenceName(); + String trimmedSequenceName = fullSequenceName; + String sequenceComment = "(null)"; + + long offset = 0; + + // Separate the sequence name from the sequence comment, based on BWA's definition. + // BWA's definition appears to accept a zero-length contig name, so mimic that behavior. + if(fullSequenceName.indexOf(' ') >= 0) { + trimmedSequenceName = fullSequenceName.substring(0,fullSequenceName.indexOf(' ')); + sequenceComment = fullSequenceName.substring(fullSequenceName.indexOf(' ')+1); + } + + // Write the sequence GI (?), name, and comment. + out.printf("%d %s %s%n",GI,trimmedSequenceName,sequenceComment); + // Write the sequence offset, length, and ambs (currently fixed at 0). + out.printf("%d %d %d%n",offset,sequence.getSequenceLength(),0); + } + } + + /** + * Close the given output stream. + */ + public void close() { + out.close(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWT.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWT.java new file mode 100644 index 000000000..7f8c48253 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWT.java @@ -0,0 +1,172 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +/** + * Represents the Burrows-Wheeler Transform of a reference sequence. + * + * @author mhanna + * @version 0.1 + */ +public class BWT { + /** + * Write an occurrence table after every SEQUENCE_BLOCK_SIZE bases. + * For this implementation to behave correctly, SEQUENCE_BLOCK_SIZE % 8 == 0 + */ + public static final int SEQUENCE_BLOCK_SIZE = 128; + + /** + * The inverse SA, used as a placeholder for determining where the special EOL character sits. + */ + protected final long inverseSA0; + + /** + * Cumulative counts for the entire BWT. + */ + protected final Counts counts; + + /** + * The individual sequence blocks, modelling how they appear on disk. + */ + protected final SequenceBlock[] sequenceBlocks; + + /** + * Creates a new BWT with the given inverse SA, counts, and sequence (in ASCII). + * @param inverseSA0 Inverse SA entry for the first element. Will be missing from the BWT sequence. + * @param counts Cumulative count of bases, in A,C,G,T order. + * @param sequenceBlocks The full BWT sequence, sans the '$'. + */ + public BWT( long inverseSA0, Counts counts, SequenceBlock[] sequenceBlocks ) { + this.inverseSA0 = inverseSA0; + this.counts = counts; + this.sequenceBlocks = sequenceBlocks; + } + + /** + * Creates a new BWT with the given inverse SA, occurrences, and sequence (in ASCII). + * @param inverseSA0 Inverse SA entry for the first element. Will be missing from the BWT sequence. + * @param counts Count of bases, in A,C,G,T order. + * @param sequence The full BWT sequence, sans the '$'. + */ + public BWT( long inverseSA0, Counts counts, byte[] sequence ) { + this(inverseSA0,counts,generateSequenceBlocks(sequence)); + } + + /** + * Extract the full sequence from the list of block. + * @return The full BWT string as a byte array. + */ + public byte[] getSequence() { + byte[] sequence = new byte[(int)counts.getTotal()]; + for( SequenceBlock block: sequenceBlocks ) + System.arraycopy(block.sequence,0,sequence,block.sequenceStart,block.sequenceLength); + return sequence; + } + + /** + * Get the total counts of bases lexicographically smaller than the given base, for Ferragina and Manzini's search. + * @param base The base. + * @return Total counts for all bases lexicographically smaller than this base. + */ + public long counts(byte base) { + return counts.getCumulative(base); + } + + /** + * Get the total counts of bases lexicographically smaller than the given base, for Ferragina and Manzini's search. + * @param base The base. + * @param index The position to search within the BWT. + * @return Total counts for all bases lexicographically smaller than this base. + */ + public long occurrences(byte base,long index) { + SequenceBlock block = getSequenceBlock(index); + int position = getSequencePosition(index); + long accumulator = block.occurrences.get(base); + for(int i = 0; i <= position; i++) { + if(base == block.sequence[i]) + accumulator++; + } + return accumulator; + } + + /** + * The number of bases in the BWT as a whole. + * @return Number of bases. + */ + public long length() { + return counts.getTotal(); + } + + /** + * Create a new BWT from the given reference sequence. + * @param referenceSequence Sequence from which to derive the BWT. + * @return reference sequence-derived BWT. + */ + public static BWT createFromReferenceSequence(byte[] referenceSequence) { + SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence); + + byte[] bwt = new byte[(int)suffixArray.length()-1]; + int bwtIndex = 0; + for(long suffixArrayIndex = 0; suffixArrayIndex < suffixArray.length(); suffixArrayIndex++) { + if(suffixArray.get(suffixArrayIndex) == 0) + continue; + bwt[bwtIndex++] = referenceSequence[(int)suffixArray.get(suffixArrayIndex)-1]; + } + + return new BWT(suffixArray.inverseSA0,suffixArray.occurrences,bwt); + } + + /** + * Gets the base at a given position in the BWT. + * @param index The index to use. + * @return The base at that location. + */ + protected byte getBase(long index) { + if(index == inverseSA0) + throw new ReviewedStingException(String.format("Base at index %d does not have a text representation",index)); + + SequenceBlock block = getSequenceBlock(index); + int position = getSequencePosition(index); + return block.sequence[position]; + } + + private SequenceBlock getSequenceBlock(long index) { + // If the index is above the SA-1[0], remap it to the appropriate coordinate space. + if(index > inverseSA0) index--; + return sequenceBlocks[(int)(index/SEQUENCE_BLOCK_SIZE)]; + } + + private int getSequencePosition(long index) { + // If the index is above the SA-1[0], remap it to the appropriate coordinate space. + if(index > inverseSA0) index--; + return (int)(index%SEQUENCE_BLOCK_SIZE); + } + + /** + * Create a set of sequence blocks from one long sequence. + * @param sequence Sequence from which to derive blocks. + * @return Array of sequence blocks containing data from the sequence. + */ + private static SequenceBlock[] generateSequenceBlocks( byte[] sequence ) { + Counts occurrences = new Counts(); + + int numSequenceBlocks = PackUtils.numberOfPartitions(sequence.length,SEQUENCE_BLOCK_SIZE); + SequenceBlock[] sequenceBlocks = new SequenceBlock[numSequenceBlocks]; + + for( int block = 0; block < numSequenceBlocks; block++ ) { + int blockStart = block*SEQUENCE_BLOCK_SIZE; + int blockLength = Math.min(SEQUENCE_BLOCK_SIZE, sequence.length-blockStart); + byte[] subsequence = new byte[blockLength]; + + System.arraycopy(sequence,blockStart,subsequence,0,blockLength); + + sequenceBlocks[block] = new SequenceBlock(blockStart,blockLength,occurrences.clone(),subsequence); + + for( byte base: subsequence ) + occurrences.increment(base); + } + + return sequenceBlocks; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTReader.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTReader.java new file mode 100644 index 000000000..5c4f6d39d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTReader.java @@ -0,0 +1,89 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.BasePackedInputStream; +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedInputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteOrder; +/** + * Reads a BWT from a given file. + * + * @author mhanna + * @version 0.1 + */ +public class BWTReader { + /** + * Input stream from which to read BWT data. + */ + private FileInputStream inputStream; + + /** + * Create a new BWT reader. + * @param inputFile File in which the BWT is stored. + */ + public BWTReader( File inputFile ) { + try { + this.inputStream = new FileInputStream(inputFile); + } + catch( FileNotFoundException ex ) { + throw new ReviewedStingException("Unable to open input file", ex); + } + } + + /** + * Read a BWT from the input stream. + * @return The BWT stored in the input stream. + */ + public BWT read() { + UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN); + BasePackedInputStream basePackedInputStream = new BasePackedInputStream(Integer.class, inputStream, ByteOrder.LITTLE_ENDIAN); + + long inverseSA0; + long[] count; + SequenceBlock[] sequenceBlocks; + + try { + inverseSA0 = uintPackedInputStream.read(); + count = new long[PackUtils.ALPHABET_SIZE]; + uintPackedInputStream.read(count); + + long bwtSize = count[PackUtils.ALPHABET_SIZE-1]; + sequenceBlocks = new SequenceBlock[PackUtils.numberOfPartitions(bwtSize,BWT.SEQUENCE_BLOCK_SIZE)]; + + for( int block = 0; block < sequenceBlocks.length; block++ ) { + int sequenceStart = block* BWT.SEQUENCE_BLOCK_SIZE; + int sequenceLength = (int)Math.min(BWT.SEQUENCE_BLOCK_SIZE,bwtSize-sequenceStart); + + long[] occurrences = new long[PackUtils.ALPHABET_SIZE]; + byte[] bwt = new byte[sequenceLength]; + + uintPackedInputStream.read(occurrences); + basePackedInputStream.read(bwt); + + sequenceBlocks[block] = new SequenceBlock(sequenceStart,sequenceLength,new Counts(occurrences,false),bwt); + } + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to read BWT from input stream.", ex); + } + + return new BWT(inverseSA0, new Counts(count,true), sequenceBlocks); + } + + /** + * Close the input stream. + */ + public void close() { + try { + inputStream.close(); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to close input file", ex); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTSupplementaryFileGenerator.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTSupplementaryFileGenerator.java new file mode 100644 index 000000000..3370f79c8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTSupplementaryFileGenerator.java @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMSequenceDictionary; + +import java.io.File; +import java.io.IOException; + +/** + * Generate BWA supplementary files (.ann, .amb) from the command line. + * + * @author mhanna + * @version 0.1 + */ +public class BWTSupplementaryFileGenerator { + enum SupplementaryFileType { ANN, AMB } + + public static void main(String[] args) throws IOException { + if(args.length < 3) + usage("Incorrect number of arguments supplied"); + + File fastaFile = new File(args[0]); + File outputFile = new File(args[1]); + SupplementaryFileType outputType = null; + try { + outputType = Enum.valueOf(SupplementaryFileType.class,args[2]); + } + catch(IllegalArgumentException ex) { + usage("Invalid output type: " + args[2]); + } + + ReferenceSequenceFile sequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(fastaFile); + SAMSequenceDictionary dictionary = sequenceFile.getSequenceDictionary(); + + switch(outputType) { + case ANN: + ANNWriter annWriter = new ANNWriter(outputFile); + annWriter.write(dictionary); + annWriter.close(); + break; + case AMB: + AMBWriter ambWriter = new AMBWriter(outputFile); + ambWriter.writeEmpty(dictionary); + ambWriter.close(); + break; + default: + usage("Unsupported output type: " + outputType); + } + } + + /** + * Print usage information and exit. + */ + private static void usage(String message) { + System.err.println(message); + System.err.println("Usage: BWTSupplementaryFileGenerator "); + System.exit(1); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTWriter.java new file mode 100644 index 000000000..a813cdc9a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTWriter.java @@ -0,0 +1,71 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.BasePackedOutputStream; +import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedOutputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.nio.ByteOrder; + +/** + * Writes an in-memory BWT to an outputstream. + * + * @author mhanna + * @version 0.1 + */ +public class BWTWriter { + /** + * Input stream from which to read BWT data. + */ + private final OutputStream outputStream; + + /** + * Create a new BWT writer. + * @param outputFile File in which the BWT is stored. + */ + public BWTWriter( File outputFile ) { + try { + this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + } + catch( FileNotFoundException ex ) { + throw new ReviewedStingException("Unable to open output file", ex); + } + } + + /** + * Write a BWT to the output stream. + * @param bwt Transform to be written to the output stream. + */ + public void write( BWT bwt ) { + UnsignedIntPackedOutputStream intPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN); + BasePackedOutputStream basePackedOutputStream = new BasePackedOutputStream(Integer.class, outputStream, ByteOrder.LITTLE_ENDIAN); + + try { + intPackedOutputStream.write(bwt.inverseSA0); + intPackedOutputStream.write(bwt.counts.toArray(true)); + + for( SequenceBlock block: bwt.sequenceBlocks ) { + intPackedOutputStream.write(block.occurrences.toArray(false)); + basePackedOutputStream.write(block.sequence); + } + + // The last block is the last set of counts in the structure. + intPackedOutputStream.write(bwt.counts.toArray(false)); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to read BWT from input stream.", ex); + } + } + + /** + * Close the input stream. + */ + public void close() { + try { + outputStream.close(); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to close input file", ex); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java new file mode 100644 index 000000000..bc0a5b63d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java @@ -0,0 +1,108 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.*; + +/** + * Enhanced enum representation of a base. + * + * @author mhanna + * @version 0.1 + */ +public class Bases implements Iterable +{ + public static byte A = 'A'; + public static byte C = 'C'; + public static byte G = 'G'; + public static byte T = 'T'; + + public static final Bases instance = new Bases(); + + private static final List allBases; + + /** + * Representation of the base broken down by packed value. + */ + private static final Map basesByPack = new HashMap(); + + static { + List bases = new ArrayList(); + bases.add(A); + bases.add(C); + bases.add(G); + bases.add(T); + allBases = Collections.unmodifiableList(bases); + + for(int i = 0; i < allBases.size(); i++) + basesByPack.put(i,allBases.get(i)); + } + + /** + * Create a new base with the given ascii representation and + * pack value. + */ + private Bases() { + } + + /** + * Return all possible bases. + * @return Byte representation of all bases. + */ + public static Collection allOf() { + return allBases; + } + + /** + * Gets the number of known bases. + * @return The number of known bases. + */ + public static int size() { + return allBases.size(); + } + + /** + * Gets an iterator over the total number of known base types. + * @return Iterator over all known bases. + */ + public Iterator iterator() { + return basesByPack.values().iterator(); + } + + /** + * Get the given base from the packed representation. + * @param pack Packed representation. + * @return base. + */ + public static byte fromPack( int pack ) { return basesByPack.get(pack); } + + /** + * Convert the given base to its packed value. + * @param ascii ASCII representation of the base. + * @return Packed value. + */ + public static int toPack( byte ascii ) + { + for( Map.Entry entry: basesByPack.entrySet() ) { + if( entry.getValue().equals(ascii) ) + return entry.getKey(); + } + throw new ReviewedStingException(String.format("Base %c is an invalid base to pack", (char)ascii)); + } + + /** + * Convert the ASCII representation of a base to its 'normalized' representation. + * @param base The base itself. + * @return The byte, if present. Null if unknown. + */ + public static Byte fromASCII( byte base ) { + Byte found = null; + for( Byte normalized: allBases ) { + if( normalized.equals(base) ) { + found = normalized; + break; + } + } + return found; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Counts.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Counts.java new file mode 100644 index 000000000..268b11ac4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Counts.java @@ -0,0 +1,151 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.HashMap; +import java.util.Map; + +/** + * Counts of how many bases of each type have been seen. + * + * @author mhanna + * @version 0.1 + */ +public class Counts implements Cloneable { + /** + * Internal representation of counts, broken down by ASCII value. + */ + private Map counts = new HashMap(); + + /** + * Internal representation of cumulative counts, broken down by ASCII value. + */ + private Map cumulativeCounts = new HashMap(); + + /** + * Create an empty Counts object with values A=0,C=0,G=0,T=0. + */ + public Counts() + { + for(byte base: Bases.instance) { + counts.put(base,0L); + cumulativeCounts.put(base,0L); + } + } + + /** + * Create a counts data structure with the given initial values. + * @param data Count data, broken down by base. + * @param cumulative Whether the counts are cumulative, (count_G=numA+numC+numG,for example). + */ + public Counts( long[] data, boolean cumulative ) { + if(cumulative) { + long priorCount = 0; + for(byte base: Bases.instance) { + long count = data[Bases.toPack(base)]; + counts.put(base,count-priorCount); + cumulativeCounts.put(base,priorCount); + priorCount = count; + } + } + else { + long priorCount = 0; + for(byte base: Bases.instance) { + long count = data[Bases.toPack(base)]; + counts.put(base,count); + cumulativeCounts.put(base,priorCount); + priorCount += count; + } + } + } + + /** + * Convert to an array for persistence. + * @param cumulative Use a cumulative representation. + * @return Array of count values. + */ + public long[] toArray(boolean cumulative) { + long[] countArray = new long[counts.size()]; + if(cumulative) { + int index = 0; + boolean first = true; + for(byte base: Bases.instance) { + if(first) { + first = false; + continue; + } + countArray[index++] = getCumulative(base); + } + countArray[countArray.length-1] = getTotal(); + } + else { + int index = 0; + for(byte base: Bases.instance) + countArray[index++] = counts.get(base); + } + return countArray; + } + + /** + * Create a unique copy of the current object. + * @return A duplicate of this object. + */ + public Counts clone() { + Counts other; + try { + other = (Counts)super.clone(); + } + catch(CloneNotSupportedException ex) { + throw new ReviewedStingException("Unable to clone counts object", ex); + } + other.counts = new HashMap(counts); + other.cumulativeCounts = new HashMap(cumulativeCounts); + return other; + } + + /** + * Increment the number of bases seen at the given location. + * @param base Base to increment. + */ + public void increment(byte base) { + counts.put(base,counts.get(base)+1); + boolean increment = false; + for(byte cumulative: Bases.instance) { + if(increment) cumulativeCounts.put(cumulative,cumulativeCounts.get(cumulative)+1); + increment |= (cumulative == base); + } + } + + /** + * Gets a count of the number of bases seen at a given location. + * Note that counts in this case are not cumulative (counts for A,C,G,T + * are independent). + * @param base Base for which to query counts. + * @return Number of bases of this type seen. + */ + public long get(byte base) { + return counts.get(base); + } + + /** + * Gets a count of the number of bases seen before this base. + * Note that counts in this case are cumulative. + * @param base Base for which to query counts. + * @return Number of bases of this type seen. + */ + public long getCumulative(byte base) { + return cumulativeCounts.get(base); + } + + /** + * How many total bases are represented by this count structure? + * @return Total bases represented. + */ + public long getTotal() { + int accumulator = 0; + for(byte base: Bases.instance) { + accumulator += get(base); + } + return accumulator; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/CreateBWTFromReference.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/CreateBWTFromReference.java new file mode 100755 index 000000000..801ab3a0b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/CreateBWTFromReference.java @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITHoc THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.picard.reference.ReferenceSequence; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.IOException; + +/** + * Create a suffix array data structure. + * + * @author mhanna + * @version 0.1 + */ +public class CreateBWTFromReference { + private byte[] loadReference( File inputFile ) { + // Read in the first sequence in the input file + ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile); + ReferenceSequence sequence = reference.nextSequence(); + return sequence.getBases(); + } + + private byte[] loadReverseReference( File inputFile ) { + ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile); + ReferenceSequence sequence = reference.nextSequence(); + PackUtils.reverse(sequence.getBases()); + return sequence.getBases(); + } + + private Counts countOccurrences( byte[] sequence ) { + Counts occurrences = new Counts(); + for( byte base: sequence ) + occurrences.increment(base); + return occurrences; + } + + private long[] createSuffixArray( byte[] sequence ) { + return SuffixArray.createFromReferenceSequence(sequence).sequence; + } + + private long[] invertSuffixArray( long[] suffixArray ) { + long[] inverseSuffixArray = new long[suffixArray.length]; + for( int i = 0; i < suffixArray.length; i++ ) + inverseSuffixArray[(int)suffixArray[i]] = i; + return inverseSuffixArray; + } + + private long[] createCompressedSuffixArray( int[] suffixArray, int[] inverseSuffixArray ) { + long[] compressedSuffixArray = new long[suffixArray.length]; + compressedSuffixArray[0] = inverseSuffixArray[0]; + for( int i = 1; i < suffixArray.length; i++ ) + compressedSuffixArray[i] = inverseSuffixArray[suffixArray[i]+1]; + return compressedSuffixArray; + } + + private long[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) { + long[] inverseCompressedSuffixArray = new long[compressedSuffixArray.length]; + for( int i = 0; i < compressedSuffixArray.length; i++ ) + inverseCompressedSuffixArray[compressedSuffixArray[i]] = i; + return inverseCompressedSuffixArray; + } + + public static void main( String argv[] ) throws IOException { + if( argv.length != 5 ) { + System.out.println("USAGE: CreateBWTFromReference .fasta "); + return; + } + + String inputFileName = argv[0]; + File inputFile = new File(inputFileName); + + String bwtFileName = argv[1]; + File bwtFile = new File(bwtFileName); + + String rbwtFileName = argv[2]; + File rbwtFile = new File(rbwtFileName); + + String saFileName = argv[3]; + File saFile = new File(saFileName); + + String rsaFileName = argv[4]; + File rsaFile = new File(rsaFileName); + + CreateBWTFromReference creator = new CreateBWTFromReference(); + + byte[] sequence = creator.loadReference(inputFile); + byte[] reverseSequence = creator.loadReverseReference(inputFile); + + // Count the occurences of each given base. + Counts occurrences = creator.countOccurrences(sequence); + System.out.printf("Occurrences: a=%d, c=%d, g=%d, t=%d%n",occurrences.getCumulative(Bases.A), + occurrences.getCumulative(Bases.C), + occurrences.getCumulative(Bases.G), + occurrences.getCumulative(Bases.T)); + + // Generate the suffix array and print diagnostics. + long[] suffixArrayData = creator.createSuffixArray(sequence); + long[] reverseSuffixArrayData = creator.createSuffixArray(reverseSequence); + + // Invert the suffix array and print diagnostics. + long[] inverseSuffixArray = creator.invertSuffixArray(suffixArrayData); + long[] reverseInverseSuffixArray = creator.invertSuffixArray(reverseSuffixArrayData); + + SuffixArray suffixArray = new SuffixArray( inverseSuffixArray[0], occurrences, suffixArrayData ); + SuffixArray reverseSuffixArray = new SuffixArray( reverseInverseSuffixArray[0], occurrences, reverseSuffixArrayData ); + + /* + // Create the data structure for the compressed suffix array and print diagnostics. + int[] compressedSuffixArray = creator.createCompressedSuffixArray(suffixArray.sequence,inverseSuffixArray); + int reconstructedInverseSA = compressedSuffixArray[0]; + for( int i = 0; i < 8; i++ ) { + System.out.printf("compressedSuffixArray[%d] = %d (SA-1[%d] = %d)%n", i, compressedSuffixArray[i], i, reconstructedInverseSA); + reconstructedInverseSA = compressedSuffixArray[reconstructedInverseSA]; + } + + // Create the data structure for the inverse compressed suffix array and print diagnostics. + int[] inverseCompressedSuffixArray = creator.createInversedCompressedSuffixArray(compressedSuffixArray); + for( int i = 0; i < 8; i++ ) { + System.out.printf("inverseCompressedSuffixArray[%d] = %d%n", i, inverseCompressedSuffixArray[i]); + } + */ + + // Create the BWT. + BWT bwt = BWT.createFromReferenceSequence(sequence); + BWT reverseBWT = BWT.createFromReferenceSequence(reverseSequence); + + byte[] bwtSequence = bwt.getSequence(); + System.out.printf("BWT: %s... (length = %d)%n", new String(bwtSequence,0,80),bwt.length()); + + BWTWriter bwtWriter = new BWTWriter(bwtFile); + bwtWriter.write(bwt); + bwtWriter.close(); + + BWTWriter reverseBWTWriter = new BWTWriter(rbwtFile); + reverseBWTWriter.write(reverseBWT); + reverseBWTWriter.close(); + + /* + SuffixArrayWriter saWriter = new SuffixArrayWriter(saFile); + saWriter.write(suffixArray); + saWriter.close(); + + SuffixArrayWriter reverseSAWriter = new SuffixArrayWriter(rsaFile); + reverseSAWriter.write(reverseSuffixArray); + reverseSAWriter.close(); + */ + + File existingBWTFile = new File(inputFileName+".bwt"); + BWTReader existingBWTReader = new BWTReader(existingBWTFile); + BWT existingBWT = existingBWTReader.read(); + + byte[] existingBWTSequence = existingBWT.getSequence(); + System.out.printf("Existing BWT: %s... (length = %d)%n",new String(existingBWTSequence,0,80),existingBWT.length()); + + for( int i = 0; i < bwt.length(); i++ ) { + if( bwtSequence[i] != existingBWTSequence[i] ) + throw new ReviewedStingException("BWT mismatch at " + i); + } + + File existingSAFile = new File(inputFileName+".sa"); + SuffixArrayReader existingSuffixArrayReader = new SuffixArrayReader(existingSAFile,existingBWT); + SuffixArray existingSuffixArray = existingSuffixArrayReader.read(); + + for(int i = 0; i < suffixArray.length(); i++) { + if( i % 10000 == 0 ) + System.out.printf("Validating suffix array entry %d%n", i); + if( suffixArray.get(i) != existingSuffixArray.get(i) ) + throw new ReviewedStingException(String.format("Suffix array mismatch at %d; SA is %d; should be %d",i,existingSuffixArray.get(i),suffixArray.get(i))); + } + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SequenceBlock.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SequenceBlock.java new file mode 100644 index 000000000..13714de1e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SequenceBlock.java @@ -0,0 +1,41 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +/** + * Models a block of bases within the BWT. + */ +public class SequenceBlock { + /** + * Start position of this sequence within the BWT. + */ + public final int sequenceStart; + + /** + * Length of this sequence within the BWT. + */ + public final int sequenceLength; + + + /** + * Occurrences of each letter up to this sequence block. + */ + public final Counts occurrences; + + /** + * Sequence for this segment. + */ + public final byte[] sequence; + + /** + * Create a new block within this BWT. + * @param sequenceStart Starting position of this sequence within the BWT. + * @param sequenceLength Length of this sequence. + * @param occurrences How many of each base has been seen before this sequence began. + * @param sequence The actual sequence from the BWT. + */ + public SequenceBlock( int sequenceStart, int sequenceLength, Counts occurrences, byte[] sequence ) { + this.sequenceStart = sequenceStart; + this.sequenceLength = sequenceLength; + this.occurrences = occurrences; + this.sequence = sequence; + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java new file mode 100644 index 000000000..49af98bb9 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java @@ -0,0 +1,158 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Comparator; +import java.util.TreeSet; + +/** + * An in-memory representation of a suffix array. + * + * @author mhanna + * @version 0.1 + */ +public class SuffixArray { + public final long inverseSA0; + public final Counts occurrences; + + /** + * The elements of the sequence actually stored in memory. + */ + protected final long[] sequence; + + /** + * How often are individual elements in the sequence actually stored + * in memory, as opposed to being calculated on the fly? + */ + protected final int sequenceInterval; + + /** + * The BWT used to calculate missing portions of the sequence. + */ + protected final BWT bwt; + + public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence) { + this(inverseSA0,occurrences,sequence,1,null); + } + + /** + * Creates a new sequence array with the given inverse SA, occurrences, and values. + * @param inverseSA0 Inverse SA entry for the first element. + * @param occurrences Cumulative number of occurrences of A,C,G,T, in order. + * @param sequence The full suffix array. + * @param sequenceInterval How frequently is the sequence interval stored. + * @param bwt bwt used to infer the remaining entries in the BWT. + */ + public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence, int sequenceInterval, BWT bwt) { + this.inverseSA0 = inverseSA0; + this.occurrences = occurrences; + this.sequence = sequence; + this.sequenceInterval = sequenceInterval; + this.bwt = bwt; + + if(sequenceInterval != 1 && bwt == null) + throw new ReviewedStingException("A BWT must be provided if the sequence interval is not 1"); + } + + /** + * Retrieves the length of the sequence array. + * @return Length of the suffix array. + */ + public long length() { + if( bwt != null ) + return bwt.length()+1; + else + return sequence.length; + } + + /** + * Get the suffix array value at a given sequence. + * @param index Index at which to retrieve the suffix array vaule. + * @return The suffix array value at that entry. + */ + public long get(long index) { + int iterations = 0; + while(index%sequenceInterval != 0) { + // The inverseSA0 ('$') doesn't have a usable ASCII representation; it must be treated as a special case. + if(index == inverseSA0) + index = 0; + else { + byte base = bwt.getBase(index); + index = bwt.counts(base) + bwt.occurrences(base,index); + } + iterations++; + } + return (sequence[(int)(index/sequenceInterval)]+iterations) % length(); + } + + /** + * Create a suffix array from a given reference sequence. + * @param sequence The reference sequence to use when building the suffix array. + * @return a constructed suffix array. + */ + public static SuffixArray createFromReferenceSequence(byte[] sequence) { + // The builder for the suffix array. Use an integer in this case because + // Java arrays can only hold an integer. + TreeSet suffixArrayBuilder = new TreeSet(new SuffixArrayComparator(sequence)); + + Counts occurrences = new Counts(); + for( byte base: sequence ) + occurrences.increment(base); + + // Build out the suffix array using a custom comparator. + for( int i = 0; i <= sequence.length; i++ ) + suffixArrayBuilder.add(i); + + // Copy the suffix array into an array. + long[] suffixArray = new long[suffixArrayBuilder.size()]; + int i = 0; + for( Integer element: suffixArrayBuilder ) + suffixArray[i++] = element; + + // Find the first element in the inverse suffix array. + long inverseSA0 = -1; + for(i = 0; i < suffixArray.length; i++) { + if(suffixArray[i] == 0) + inverseSA0 = i; + } + if(inverseSA0 < 0) + throw new ReviewedStingException("Unable to find first inverse SA entry in generated suffix array."); + + return new SuffixArray(inverseSA0,occurrences,suffixArray); + } + + /** + * Compares two suffix arrays of the given sequence. Will return whichever string appears + * first in lexicographic order. + */ + private static class SuffixArrayComparator implements Comparator { + /** + * The data source for all suffix arrays. + */ + private final String sequence; + + /** + * Create a new comparator. + * @param sequence Reference sequence to use as basis for comparison. + */ + public SuffixArrayComparator( byte[] sequence ) { + // Processing the suffix array tends to be easier as a string. + this.sequence = StringUtil.bytesToString(sequence); + } + + /** + * Compare the two given suffix arrays. Criteria for comparison is the lexicographic order of + * the two substrings sequence[lhs:], sequence[rhs:]. + * @param lhs Left-hand side of comparison. + * @param rhs Right-hand side of comparison. + * @return How the suffix arrays represented by lhs, rhs compare. + */ + public int compare( Integer lhs, Integer rhs ) { + String lhsSuffixArray = sequence.substring(lhs); + String rhsSuffixArray = sequence.substring(rhs); + return lhsSuffixArray.compareTo(rhsSuffixArray); + } + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayReader.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayReader.java new file mode 100644 index 000000000..b48e4c69c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayReader.java @@ -0,0 +1,85 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedInputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteOrder; + +/** + * A reader for suffix arrays in permanent storage. + * + * @author mhanna + * @version 0.1 + */ +public class SuffixArrayReader { + /** + * Input stream from which to read suffix array data. + */ + private FileInputStream inputStream; + + /** + * BWT to use to fill in missing data. + */ + private BWT bwt; + + /** + * Create a new suffix array reader. + * @param inputFile File in which the suffix array is stored. + * @param bwt BWT to use when filling in missing data. + */ + public SuffixArrayReader(File inputFile, BWT bwt) { + try { + this.inputStream = new FileInputStream(inputFile); + this.bwt = bwt; + } + catch( FileNotFoundException ex ) { + throw new ReviewedStingException("Unable to open input file", ex); + } + } + + /** + * Read a suffix array from the input stream. + * @return The suffix array stored in the input stream. + */ + public SuffixArray read() { + UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN); + + long inverseSA0; + long[] occurrences; + long[] suffixArray; + int suffixArrayInterval; + + try { + inverseSA0 = uintPackedInputStream.read(); + occurrences = new long[PackUtils.ALPHABET_SIZE]; + uintPackedInputStream.read(occurrences); + // Throw away the suffix array size in bytes and use the occurrences table directly. + suffixArrayInterval = (int)uintPackedInputStream.read(); + suffixArray = new long[(int)((occurrences[occurrences.length-1]+suffixArrayInterval-1)/suffixArrayInterval)]; + uintPackedInputStream.read(suffixArray); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to read BWT from input stream.", ex); + } + + return new SuffixArray(inverseSA0, new Counts(occurrences,true), suffixArray, suffixArrayInterval, bwt); + } + + + /** + * Close the input stream. + */ + public void close() { + try { + inputStream.close(); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to close input file", ex); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayWriter.java new file mode 100644 index 000000000..b6f79be2f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayWriter.java @@ -0,0 +1,67 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedOutputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.nio.ByteOrder; + +/** + * Javadoc goes here. + * + * @author mhanna + * @version 0.1 + */ +public class SuffixArrayWriter { + /** + * Input stream from which to read suffix array data. + */ + private OutputStream outputStream; + + /** + * Create a new suffix array reader. + * @param outputFile File in which the suffix array is stored. + */ + public SuffixArrayWriter( File outputFile ) { + try { + this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + } + catch( FileNotFoundException ex ) { + throw new ReviewedStingException("Unable to open input file", ex); + } + } + + /** + * Write a suffix array to the output stream. + * @param suffixArray suffix array to write. + */ + public void write(SuffixArray suffixArray) { + UnsignedIntPackedOutputStream uintPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN); + + try { + uintPackedOutputStream.write(suffixArray.inverseSA0); + uintPackedOutputStream.write(suffixArray.occurrences.toArray(true)); + // How frequently the suffix array entry is placed. + uintPackedOutputStream.write(1); + // Length of the suffix array. + uintPackedOutputStream.write(suffixArray.length()-1); + uintPackedOutputStream.write(suffixArray.sequence,1,suffixArray.sequence.length-1); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to read BWT from input stream.", ex); + } + } + + + /** + * Close the input stream. + */ + public void close() { + try { + outputStream.close(); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to close input file", ex); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedInputStream.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedInputStream.java new file mode 100644 index 000000000..174a9853b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedInputStream.java @@ -0,0 +1,95 @@ +package org.broadinstitute.sting.alignment.reference.packing; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; + +/** + * Reads a packed version of the input stream. + * + * @author mhanna + * @version 0.1 + */ +public class BasePackedInputStream { + /** + * Type of object to unpack. + */ + private final Class type; + + /** + * Ultimate source for packed bases. + */ + private final FileInputStream targetInputStream; + + /** + * Channel source for packed bases. + */ + private final FileChannel targetInputChannel; + + /** + * A fixed-size buffer for word-packed data. + */ + private final ByteOrder byteOrder; + + /** + * How many bases are in a given packed word. + */ + private final int basesPerPackedWord = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BASE; + + /** + * How many bytes in an integer? + */ + private final int bytesPerInteger = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE; + + + public BasePackedInputStream( Class type, File inputFile, ByteOrder byteOrder ) throws FileNotFoundException { + this(type,new FileInputStream(inputFile),byteOrder); + } + + public BasePackedInputStream( Class type, FileInputStream inputStream, ByteOrder byteOrder ) { + if( type != Integer.class ) + throw new ReviewedStingException("Only bases packed into 32-bit words are currently supported by this input stream. Type specified: " + type.getName()); + this.type = type; + this.targetInputStream = inputStream; + this.targetInputChannel = inputStream.getChannel(); + this.byteOrder = byteOrder; + } + + /** + * Read the entire contents of the input stream. + * @param bwt array into which bases should be read. + * @throws IOException if an I/O error occurs. + */ + public void read(byte[] bwt) throws IOException { + read(bwt,0,bwt.length); + } + + /** + * Read the next length bases into the bwt array, starting at the given offset. + * @param bwt array holding the given data. + * @param offset target position in the bases array into which bytes should be written. + * @param length number of bases to read from the stream. + * @throws IOException if an I/O error occurs. + */ + public void read(byte[] bwt, int offset, int length) throws IOException { + int bufferWidth = ((bwt.length+basesPerPackedWord-1)/basesPerPackedWord)*bytesPerInteger; + ByteBuffer buffer = ByteBuffer.allocate(bufferWidth).order(byteOrder); + targetInputChannel.read(buffer); + targetInputChannel.position(targetInputChannel.position()+buffer.remaining()); + buffer.flip(); + + int packedWord = 0; + int i = 0; + while(i < length) { + if(i % basesPerPackedWord == 0) packedWord = buffer.getInt(); + int position = basesPerPackedWord - i%basesPerPackedWord - 1; + bwt[offset+i++] = PackUtils.unpackBase((byte)((packedWord >> position*PackUtils.BITS_PER_BASE) & 0x3)); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedOutputStream.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedOutputStream.java new file mode 100644 index 000000000..c62f40e51 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedOutputStream.java @@ -0,0 +1,140 @@ +package org.broadinstitute.sting.alignment.reference.packing; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * A general-purpose stream for writing packed bases. + * + * @author mhanna + * @version 0.1 + */ +public class BasePackedOutputStream { + /** + * Type of object to pack. + */ + private final Class type; + + /** + * How many bases can be stored in the given data structure? + */ + private final int basesPerType; + + /** + * Ultimate target for the packed bases. + */ + private final OutputStream targetOutputStream; + + /** + * A fixed-size buffer for word-packed data. + */ + private final ByteBuffer buffer; + + public BasePackedOutputStream( Class type, File outputFile, ByteOrder byteOrder ) throws FileNotFoundException { + this(type,new BufferedOutputStream(new FileOutputStream(outputFile)),byteOrder); + } + + /** + * Write packed bases to the given output stream. + * @param type Type of data to pack bases into. + * @param outputStream Output stream to which to write packed bases. + * @param byteOrder Switch between big endian / little endian when reading / writing files. + */ + public BasePackedOutputStream( Class type, OutputStream outputStream, ByteOrder byteOrder) { + this.targetOutputStream = outputStream; + this.type = type; + basesPerType = PackUtils.bitsInType(type)/PackUtils.BITS_PER_BASE; + this.buffer = ByteBuffer.allocate(basesPerType/PackUtils.ALPHABET_SIZE).order(byteOrder); + } + + /** + * Writes the given base to the output stream. Will write only this base; no packing will be performed. + * @param base List of bases to write. + * @throws IOException if an I/O error occurs. + */ + public void write( int base ) throws IOException { + write( new byte[] { (byte)base } ); + } + + /** + * Writes an array of bases to the target output stream. + * @param bases List of bases to write. + * @throws IOException if an I/O error occurs. + */ + public void write( byte[] bases ) throws IOException { + write(bases,0,bases.length); + } + + /** + * Writes a subset of the array of bases to the output stream. + * @param bases List of bases to write. + * @param offset site at which to start writing. + * @param length number of bases to write. + * @throws IOException if an I/O error occurs. + */ + public void write( byte[] bases, int offset, int length ) throws IOException { + int packedBases = 0; + int positionInPack = 0; + + for( int base = offset; base < offset+length; base++ ) { + packedBases = packBase(bases[base], packedBases, positionInPack); + + // Increment the packed counter. If all possible bases have been squeezed into this byte, write it out. + positionInPack = ++positionInPack % basesPerType; + if( positionInPack == 0 ) { + writePackedBases(packedBases); + packedBases = 0; + } + } + + if( positionInPack > 0 ) + writePackedBases(packedBases); + } + + /** + * Flush the contents of the OutputStream to disk. + * @throws IOException if an I/O error occurs. + */ + public void flush() throws IOException { + targetOutputStream.flush(); + } + + /** + * Closes the given output stream. + * @throws IOException if an I/O error occurs. + */ + public void close() throws IOException { + targetOutputStream.close(); + } + + /** + * Pack the given base into the basepack. + * @param base The base to pack. + * @param basePack Target for the pack operation. + * @param position Position within the pack to which to add the base. + * @return The packed integer. + */ + private int packBase( byte base, int basePack, int position ) { + basePack |= (PackUtils.packBase(base) << 2*(basesPerType-position-1)); + return basePack; + } + + /** + * Write the given packed base structure to the output file. + * @param packedBases Packed bases to write. + * @throws IOException on error writing to the file. + */ + private void writePackedBases(int packedBases) throws IOException { + buffer.rewind(); + if( type == Integer.class ) + buffer.putInt(packedBases); + else if( type == Byte.class ) + buffer.put((byte)packedBases); + else + throw new ReviewedStingException("Cannot pack bases into type " + type.getName()); + targetOutputStream.write(buffer.array()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/CreatePACFromReference.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/CreatePACFromReference.java new file mode 100755 index 000000000..561535e29 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/CreatePACFromReference.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment.reference.packing; + +import net.sf.picard.reference.ReferenceSequence; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; + +import java.io.File; +import java.io.IOException; + +/** + * Generate a .PAC file from a given reference. + * + * @author hanna + * @version 0.1 + */ + +public class CreatePACFromReference { + public static void main( String argv[] ) throws IOException { + if( argv.length != 3 ) { + System.out.println("USAGE: CreatePACFromReference .fasta "); + return; + } + + // Read in the first sequence in the input file + String inputFileName = argv[0]; + File inputFile = new File(inputFileName); + ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile); + ReferenceSequence sequence = reference.nextSequence(); + + // Target file for output + PackUtils.writeReferenceSequence( new File(argv[1]), sequence.getBases() ); + + // Reverse the bases in the reference + PackUtils.reverse(sequence.getBases()); + + // Target file for output + PackUtils.writeReferenceSequence( new File(argv[2]), sequence.getBases() ); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/PackUtils.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/PackUtils.java new file mode 100644 index 000000000..972e31cf0 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/PackUtils.java @@ -0,0 +1,135 @@ +package org.broadinstitute.sting.alignment.reference.packing; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteOrder; + +/** + * Utilities designed for packing / unpacking bases. + * + * @author mhanna + * @version 0.1 + */ +public class PackUtils { + /** + * How many possible bases can be encoded? + */ + public static final int ALPHABET_SIZE = 4; + + /** + * How many bits does it take to store a single base? + */ + public static final int BITS_PER_BASE = (int)(Math.log(ALPHABET_SIZE)/Math.log(2)); + + /** + * How many bits fit into a single byte? + */ + public static final int BITS_PER_BYTE = 8; + + /** + * Writes a reference sequence to a PAC file. + * @param outputFile Filename for the PAC file. + * @param referenceSequence Reference sequence to write. + * @throws IOException If there's a problem writing to the output file. + */ + public static void writeReferenceSequence( File outputFile, byte[] referenceSequence ) throws IOException { + OutputStream outputStream = new FileOutputStream(outputFile); + + BasePackedOutputStream basePackedOutputStream = new BasePackedOutputStream(Byte.class, outputStream, ByteOrder.BIG_ENDIAN); + basePackedOutputStream.write(referenceSequence); + + outputStream.write(referenceSequence.length%PackUtils.ALPHABET_SIZE); + + outputStream.close(); + } + + + /** + * How many bits can a given type hold? + * @param type Type to test. + * @return Number of bits that the given type can hold. + */ + public static int bitsInType( Class type ) { + try { + long typeSize = type.getField("MAX_VALUE").getLong(null) - type.getField("MIN_VALUE").getLong(null)+1; + long intTypeSize = (long)Integer.MAX_VALUE - (long)Integer.MIN_VALUE + 1; + if( typeSize > intTypeSize ) + throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName()); + return (int)(Math.log(typeSize)/Math.log(2)); + } + catch( NoSuchFieldException ex ) { + throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName(),ex); + } + catch( IllegalAccessException ex ) { + throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName(),ex); + } + } + + /** + * Gets the two-bit representation of a base. A=00b, C=01b, G=10b, T=11b. + * @param base ASCII value for the base to pack. + * @return A byte from 0-3 indicating the base's packed value. + */ + public static byte packBase(byte base) { + switch( base ) { + case 'A': + return 0; + case 'C': + return 1; + case 'G': + return 2; + case 'T': + return 3; + default: + throw new ReviewedStingException("Unknown base type: " + base); + } + } + + /** + * Converts a two-bit representation of a base into an ASCII representation of a base. + * @param pack Byte from 0-3 indicating which base is represented. + * @return An ASCII value representing the packed base. + */ + public static byte unpackBase(byte pack) { + switch( pack ) { + case 0: + return 'A'; + case 1: + return 'C'; + case 2: + return 'G'; + case 3: + return 'T'; + default: + throw new ReviewedStingException("Unknown pack type: " + pack); + } + } + + /** + * Reverses an unpacked sequence of bases. + * @param bases bases to reverse. + */ + public static void reverse( byte[] bases ) { + for( int i = 0, j = bases.length-1; i < j; i++, j-- ) { + byte temp = bases[j]; + bases[j] = bases[i]; + bases[i] = temp; + } + } + + /** + * Given a structure of size size that should be split + * into partitionSize partitions, how many partitions should + * be created? Size of last partition will be <= partitionSize. + * @param size Total size of the data structure. + * @param partitionSize Size of an individual partition. + * @return Number of partitions that would be created. + */ + public static int numberOfPartitions( long size, long partitionSize ) { + return (int)((size+partitionSize-1) / partitionSize); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedInputStream.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedInputStream.java new file mode 100644 index 000000000..999e54451 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedInputStream.java @@ -0,0 +1,104 @@ +package org.broadinstitute.sting.alignment.reference.packing; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; + +/** + * Read a set of integers packed into + * + * @author mhanna + * @version 0.1 + */ +public class UnsignedIntPackedInputStream { + /** + * Ultimate target for the occurrence array. + */ + private final FileInputStream targetInputStream; + + /** + * Target channel from which to pull file data. + */ + private final FileChannel targetInputChannel; + + /** + * The byte order in which integer input data appears. + */ + private final ByteOrder byteOrder; + + /** + * How many bytes are required to store an integer? + */ + private final int bytesPerInteger = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE; + + /** + * Create a new PackedIntInputStream, writing to the given target file. + * @param inputFile target input file. + * @param byteOrder Endianness to use when writing a list of integers. + * @throws java.io.IOException if an I/O error occurs. + */ + public UnsignedIntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException { + this(new FileInputStream(inputFile),byteOrder); + } + + /** + * Read ints from the given InputStream. + * @param inputStream Input stream from which to read ints. + * @param byteOrder Endianness to use when writing a list of integers. + */ + public UnsignedIntPackedInputStream(FileInputStream inputStream, ByteOrder byteOrder) { + this.targetInputStream = inputStream; + this.targetInputChannel = inputStream.getChannel(); + this.byteOrder = byteOrder; + } + + /** + * Read a datum from the input stream. + * @return The next input datum in the stream. + * @throws IOException if an I/O error occurs. + */ + public long read() throws IOException { + long[] data = new long[1]; + read(data); + return data[0]; + } + + /** + * Read the data from the input stream. + * @param data placeholder for input data. + * @throws IOException if an I/O error occurs. + */ + public void read( long[] data ) throws IOException { + read( data, 0, data.length ); + } + + /** + * Read the data from the input stream, starting at the given offset. + * @param data placeholder for input data. + * @param offset place in the array to start reading in data. + * @param length number of ints to read in. + * @throws IOException if an I/O error occurs. + */ + public void read( long[] data, int offset, int length ) throws IOException { + ByteBuffer readBuffer = ByteBuffer.allocate(bytesPerInteger*length).order(byteOrder); + + targetInputChannel.read(readBuffer,targetInputChannel.position()); + readBuffer.flip(); + targetInputChannel.position(targetInputChannel.position()+readBuffer.remaining()); + + int i = 0; + while(i < length) + data[offset+i++] = readBuffer.getInt() & 0xFFFFFFFFL; + } + + /** + * Closes the given output stream. + * @throws IOException if an I/O error occurs. + */ + public void close() throws IOException { + targetInputStream.close(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedOutputStream.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedOutputStream.java new file mode 100755 index 000000000..b02024366 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedOutputStream.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment.reference.packing; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * Writes an list of integers to the output file. + * + * @author mhanna + * @version 0.1 + */ +public class UnsignedIntPackedOutputStream { + /** + * Ultimate target for the occurrence array. + */ + private final OutputStream targetOutputStream; + + /** + * A fixed-size buffer for int-packed data. + */ + private final ByteBuffer buffer; + + /** + * Create a new PackedIntOutputStream, writing to the given target file. + * @param outputFile target output file. + * @param byteOrder Endianness to use when writing a list of integers. + * @throws IOException if an I/O error occurs. + */ + public UnsignedIntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException { + this(new FileOutputStream(outputFile),byteOrder); + } + + /** + * Write packed ints to the given OutputStream. + * @param outputStream Output stream to which to write packed ints. + * @param byteOrder Endianness to use when writing a list of integers. + */ + public UnsignedIntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) { + this.targetOutputStream = outputStream; + buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder); + } + + /** + * Write the data to the output stream. + * @param datum datum to write. + * @throws IOException if an I/O error occurs. + */ + public void write( long datum ) throws IOException { + buffer.rewind(); + buffer.putInt((int)datum); + targetOutputStream.write(buffer.array()); + } + + /** + * Write the data to the output stream. + * @param data data to write. occurrences.length must match alphabet size. + * @throws IOException if an I/O error occurs. + */ + public void write( long[] data ) throws IOException { + for(long datum: data) + write(datum); + } + + /** + * Write the given chunk of data to the input stream. + * @param data data to write. + * @param offset position at which to start. + * @param length number of ints to write. + * @throws IOException if an I/O error occurs. + */ + public void write( long[] data, int offset, int length ) throws IOException { + for( int i = offset; i < offset+length; i++ ) + write(data[i]); + } + + /** + * Flush the contents of the OutputStream to disk. + * @throws IOException if an I/O error occurs. + */ + public void flush() throws IOException { + targetOutputStream.flush(); + } + + /** + * Closes the given output stream. + * @throws IOException if an I/O error occurs. + */ + public void close() throws IOException { + targetOutputStream.close(); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index f8e298d88..b9e380295 100755 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -25,20 +25,21 @@ package org.broadinstitute.sting.analyzecovariates; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.gatk.walkers.recalibration.*; +import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; +import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum; +import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.commandline.CommandLineProgram; -import org.broadinstitute.sting.commandline.Argument; +import java.io.*; import java.util.ArrayList; import java.util.Collection; -import java.util.List; import java.util.Map; import java.util.regex.Pattern; -import java.io.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/commandline/Argument.java b/public/java/src/org/broadinstitute/sting/commandline/Argument.java index b2ee9d1fc..33592287d 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/Argument.java +++ b/public/java/src/org/broadinstitute/sting/commandline/Argument.java @@ -25,12 +25,7 @@ package org.broadinstitute.sting.commandline; -import java.lang.annotation.Documented; -import java.lang.annotation.ElementType; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; +import java.lang.annotation.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java index c36a8e04f..b47677b08 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java @@ -27,10 +27,10 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.List; -import java.util.Collections; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; +import java.util.List; /** * A group of argument definitions. diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java index 39e698ca3..8e3f753a8 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java @@ -27,10 +27,10 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.Set; -import java.util.HashSet; import java.util.Collection; +import java.util.HashSet; import java.util.Iterator; +import java.util.Set; /** * A collection of argument definitions. @@ -174,7 +174,8 @@ public class ArgumentDefinitions implements Iterable { static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() { public boolean matches( ArgumentDefinition definition, Object key ) { - return definition.validation != null; + // We can perform some sort of validation for anything that isn't a flag. + return !definition.isFlag; } }; } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java index 60ed8c899..351583c07 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java @@ -44,7 +44,7 @@ public class ArgumentMatch implements Iterable { public final String label; /** - * Maps indicies of command line arguments to values paired with that argument. + * Maps indices of command line arguments to values paired with that argument. */ public final SortedMap> indices = new TreeMap>(); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 6c50e1784..9c33e084d 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -25,12 +25,12 @@ package org.broadinstitute.sting.commandline; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.walkers.Multiplex; import org.broadinstitute.sting.gatk.walkers.Multiplexer; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.lang.annotation.Annotation; diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index d404a2b6e..d88e7030e 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -25,19 +25,25 @@ package org.broadinstitute.sting.commandline; -import org.apache.log4j.*; +import org.apache.log4j.FileAppender; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.HelpFormatter; import java.io.IOException; -import java.util.*; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.Locale; public abstract class CommandLineProgram { /** The command-line program and the arguments it returned. */ - protected ParsingEngine parser = null; + public ParsingEngine parser = null; /** the default log level */ @Argument(fullName = "logging_level", @@ -138,6 +144,11 @@ public abstract class CommandLineProgram { public static int result = -1; + @SuppressWarnings("unchecked") + public static void start(CommandLineProgram clp, String[] args) throws Exception { + start(clp, args, false); + } + /** * This function is called to start processing the command line, and kick * off the execute message of the program. @@ -147,7 +158,7 @@ public abstract class CommandLineProgram { * @throws Exception when an exception occurs */ @SuppressWarnings("unchecked") - public static void start(CommandLineProgram clp, String[] args) throws Exception { + public static void start(CommandLineProgram clp, String[] args, boolean dryRun) throws Exception { try { // setup our log layout @@ -174,8 +185,9 @@ public abstract class CommandLineProgram { // - InvalidArgument in case these arguments are specified by plugins. // - MissingRequiredArgument in case the user requested help. Handle that later, once we've // determined the full complement of arguments. - parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument, - ParsingEngine.ValidationType.InvalidArgument)); + if ( ! dryRun ) + parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument, + ParsingEngine.ValidationType.InvalidArgument)); parser.loadArgumentsIntoObject(clp); // Initialize the logger using the loaded command line. @@ -189,36 +201,40 @@ public abstract class CommandLineProgram { if (isHelpPresent(parser)) printHelpAndExit(clp, parser); - parser.validate(); + if ( ! dryRun ) parser.validate(); } else { parser.parse(args); - if (isHelpPresent(parser)) - printHelpAndExit(clp, parser); + if ( ! dryRun ) { + if (isHelpPresent(parser)) + printHelpAndExit(clp, parser); - parser.validate(); + parser.validate(); + } parser.loadArgumentsIntoObject(clp); // Initialize the logger using the loaded command line. clp.setupLoggerLevel(layout); } - // if they specify a log location, output our data there - if (clp.toFile != null) { - FileAppender appender; - try { - appender = new FileAppender(layout, clp.toFile, false); - logger.addAppender(appender); - } catch (IOException e) { - throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists"); + if ( ! dryRun ) { + // if they specify a log location, output our data there + if (clp.toFile != null) { + FileAppender appender; + try { + appender = new FileAppender(layout, clp.toFile, false); + logger.addAppender(appender); + } catch (IOException e) { + throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists"); + } } + + // regardless of what happens next, generate the header information + HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args); + + // call the execute + CommandLineProgram.result = clp.execute(); } - - // regardless of what happens next, generate the header information - HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args); - - // call the execute - CommandLineProgram.result = clp.execute(); } catch (ArgumentException e) { clp.parser.printHelp(clp.getApplicationDetails()); diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java index 99608f167..bd2006388 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java @@ -25,11 +25,17 @@ package org.broadinstitute.sting.commandline; -import org.apache.log4j.*; +import org.apache.log4j.Appender; +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; import java.lang.annotation.Annotation; +import java.util.Collections; +import java.util.Enumeration; +import java.util.LinkedHashMap; +import java.util.Map; /** * Static utility methods for working with command-line arguments. diff --git a/public/java/src/org/broadinstitute/sting/commandline/MissingArgumentValueException.java b/public/java/src/org/broadinstitute/sting/commandline/MissingArgumentValueException.java index 8029db7b3..4e6c3a16f 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/MissingArgumentValueException.java +++ b/public/java/src/org/broadinstitute/sting/commandline/MissingArgumentValueException.java @@ -26,8 +26,6 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.utils.Utils; -import java.util.Collection; - /** * Specifies that a value was missing when attempting to populate an argument. */ diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 717c5c522..b7efcd278 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -25,27 +25,32 @@ package org.broadinstitute.sting.commandline; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.HelpFormatter; -import org.apache.log4j.Logger; -import java.lang.reflect.*; +import java.lang.reflect.Field; import java.util.*; /** * A parser for Sting command-line arguments. */ public class ParsingEngine { + /** + * The loaded argument sources along with their back definitions. + */ + private Map argumentSourcesByDefinition = new HashMap(); + /** * A list of defined arguments against which command lines are matched. * Package protected for testing access. */ - ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions(); + public ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions(); /** * A list of matches from defined arguments to command-line text. @@ -107,8 +112,13 @@ public class ParsingEngine { */ public void addArgumentSource( String sourceName, Class sourceClass ) { List argumentsFromSource = new ArrayList(); - for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) - argumentsFromSource.addAll( argumentSource.createArgumentDefinitions() ); + for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) { + List argumentDefinitions = argumentSource.createArgumentDefinitions(); + for(ArgumentDefinition argumentDefinition: argumentDefinitions) { + argumentSourcesByDefinition.put(argumentDefinition,argumentSource); + argumentsFromSource.add( argumentDefinition ); + } + } argumentDefinitions.add( new ArgumentDefinitionGroup(sourceName, argumentsFromSource) ); } @@ -199,16 +209,25 @@ public class ParsingEngine { throw new InvalidArgumentException( invalidArguments ); } - // Find invalid argument values (arguments that fail the regexp test. + // Find invalid argument values -- invalid arguments are either completely missing or fail the specified 'validation' regular expression. if( !skipValidationOf.contains(ValidationType.InvalidArgumentValue) ) { Collection verifiableArguments = argumentDefinitions.findArgumentDefinitions( null, ArgumentDefinitions.VerifiableDefinitionMatcher ); Collection> invalidValues = new ArrayList>(); for( ArgumentDefinition verifiableArgument: verifiableArguments ) { ArgumentMatches verifiableMatches = argumentMatches.findMatches( verifiableArgument ); + // Check to see whether an argument value was specified. Argument values must be provided + // when the argument name is specified and the argument is not a flag type. + for(ArgumentMatch verifiableMatch: verifiableMatches) { + ArgumentSource argumentSource = argumentSourcesByDefinition.get(verifiableArgument); + if(verifiableMatch.values().size() == 0 && !verifiableArgument.isFlag && argumentSource.createsTypeDefault()) + invalidValues.add(new Pair(verifiableArgument,null)); + } + + // Ensure that the field contents meet the validation criteria specified by the regular expression. for( ArgumentMatch verifiableMatch: verifiableMatches ) { for( String value: verifiableMatch.values() ) { - if( !value.matches(verifiableArgument.validation) ) + if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) ) invalidValues.add( new Pair(verifiableArgument, value) ); } } @@ -515,10 +534,14 @@ class InvalidArgumentValueException extends ArgumentException { private static String formatArguments( Collection> invalidArgumentValues ) { StringBuilder sb = new StringBuilder(); for( Pair invalidValue: invalidArgumentValues ) { - sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)", - invalidValue.first.fullName, - invalidValue.second, - invalidValue.first.validation) ); + if(invalidValue.getSecond() == null) + sb.append( String.format("%nArgument '--%s' requires a value but none was provided", + invalidValue.first.fullName) ); + else + sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)", + invalidValue.first.fullName, + invalidValue.second, + invalidValue.first.validation) ); } return sb.toString(); } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java index d1cda3ed9..a070cb5a1 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java @@ -27,10 +27,8 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.utils.Utils; -import java.util.regex.Pattern; import java.util.regex.Matcher; -import java.util.List; -import java.util.ArrayList; +import java.util.regex.Pattern; /** * Holds a pattern, along with how to get to the argument definitions that could match that pattern. diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index fd7e749c3..a080ab439 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -25,30 +25,21 @@ package org.broadinstitute.sting.gatk; -import org.broadinstitute.sting.commandline.Tags; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; -import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; -import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; -import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.Walker; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.*; - -import net.sf.picard.filter.SamRecordFilter; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.ListFileUtils; -import org.broadinstitute.sting.utils.text.XReadLines; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; /** * @author aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 7982f61e2..2af29ea70 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -26,27 +26,31 @@ package org.broadinstitute.sting.gatk; import org.broad.tribble.TribbleException; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.walkers.Attribution; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; -import org.broadinstitute.sting.utils.help.ApplicationDetails; -import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.ApplicationDetails; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; /** - * @author aaron - * @version 1.0 - * @date May 8, 2009 - *

- * Class CommandLineGATK - *

+ * The GATK engine itself. Manages map/reduce data access and runs walkers. + * * We run command line GATK programs using this class. It gets the command line args, parses them, and hands the * gatk all the parsed out information. Pretty much anything dealing with the underlying system should go here, * the gatk engine should deal with any data related information. */ +@DocumentedGATKFeature( + groupName = "GATK Engine", + summary = "Features and arguments for the GATK engine itself, available to all walkers.", + extraDocs = { ReadFilter.class, UserException.class }) public class CommandLineGATK extends CommandLineExecutable { @Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run") private String analysisName = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index f8527c33b..918bc1251 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -28,24 +28,14 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.*; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.ArgumentException; -import org.broadinstitute.sting.commandline.ArgumentSource; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.commandline.CommandLineUtils; -import org.broadinstitute.sting.commandline.ParsingEngine; -import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.reads.*; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; -import org.broadinstitute.sting.gatk.datasources.reads.MonolithicShardStrategy; -import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory; -import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.executive.MicroScheduler; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; @@ -65,6 +55,7 @@ import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java index 27a86ab3f..7cb615f7f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java @@ -25,13 +25,12 @@ package org.broadinstitute.sting.gatk; import net.sf.picard.filter.SamRecordFilter; - -import java.util.Map; -import java.util.HashMap; -import java.util.Collections; - import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + /** * Holds a bunch of basic information about the traversal. */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java index 1a361029a..93fa2d146 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk; -import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index 9553f651e..6aeb42faa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -25,19 +25,16 @@ package org.broadinstitute.sting.gatk; -import net.sf.picard.filter.SamRecordFilter; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; -import org.broadinstitute.sting.utils.help.DescriptionTaglet; -import org.broadinstitute.sting.utils.help.DisplayNameTaglet; -import org.broadinstitute.sting.utils.help.SummaryTaglet; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.help.ResourceBundleExtractorDoclet; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; @@ -83,19 +80,10 @@ public class WalkerManager extends PluginManager { * @return A suitable display name for the package. */ public String getPackageDisplayName(String packageName) { - // Try to find an override for the display name of this package. - String displayNameKey = String.format("%s.%s",packageName,DisplayNameTaglet.NAME); - String displayName; - if(helpText.containsKey(displayNameKey)) { - displayName = helpText.getString(displayNameKey); - } - else { - // If no override exists... - // ...try to compute the override from the text of the package name, while accounting for - // unpackaged walkers. - displayName = packageName.substring(packageName.lastIndexOf('.')+1); - if(displayName.trim().equals("")) displayName = ""; - } + // ...try to compute the override from the text of the package name, while accounting for + // unpackaged walkers. + String displayName = packageName.substring(packageName.lastIndexOf('.')+1); + if (displayName.trim().equals("")) displayName = ""; return displayName; } @@ -105,7 +93,7 @@ public class WalkerManager extends PluginManager { * @return Package help text, or "" if none exists. */ public String getPackageSummaryText(String packageName) { - String key = String.format("%s.%s",packageName,SummaryTaglet.NAME); + String key = String.format("%s.%s",packageName, ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME); if(!helpText.containsKey(key)) return ""; return helpText.getString(key); @@ -117,7 +105,7 @@ public class WalkerManager extends PluginManager { * @return Walker summary description, or "" if none exists. */ public String getWalkerSummaryText(Class walkerType) { - String walkerSummary = String.format("%s.%s",walkerType.getName(), SummaryTaglet.NAME); + String walkerSummary = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME); if(!helpText.containsKey(walkerSummary)) return ""; return helpText.getString(walkerSummary); @@ -138,7 +126,7 @@ public class WalkerManager extends PluginManager { * @return Walker full description, or "" if none exists. */ public String getWalkerDescriptionText(Class walkerType) { - String walkerDescription = String.format("%s.%s",walkerType.getName(), DescriptionTaglet.NAME); + String walkerDescription = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.DESCRIPTION_TAGLET_NAME); if(!helpText.containsKey(walkerDescription)) return ""; return helpText.getString(walkerDescription); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 93638f21d..ee2e85025 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -26,17 +26,16 @@ package org.broadinstitute.sting.gatk.arguments; import net.sf.samtools.SAMFileReader; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; -import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.DownsamplingMethod; -import org.broadinstitute.sting.utils.interval.IntervalSetRule; +import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.simpleframework.xml.*; import org.simpleframework.xml.core.Persister; import org.simpleframework.xml.stream.Format; diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java index 337c2664c..17e4a0743 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java @@ -26,13 +26,13 @@ package org.broadinstitute.sting.gatk.contexts; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.*; +import java.util.List; /** * Useful class for forwarding on locusContext data from this iterator diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java index 6a0d30837..1f9a7d705 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java @@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.contexts; import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java index 760b3a7bc..376064cdb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java @@ -25,11 +25,12 @@ package org.broadinstitute.sting.gatk.contexts; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.BaseUtils; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.samtools.util.StringUtil; -import com.google.java.contract.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * The section of the reference that overlaps with the given diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java index b36c59a2c..e92599494 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java @@ -1,16 +1,14 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; + +import java.util.Collections; import java.util.List; import java.util.NoSuchElementException; -import java.util.ArrayList; -import java.util.Collections; - -import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.utils.GenomeLoc; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; /** * User: hanna * Date: May 13, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java index 330a9e4f7..ff312bcac 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java @@ -1,13 +1,12 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; +import net.sf.picard.reference.ReferenceSequence; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Window; -import org.broadinstitute.sting.gatk.walkers.Reference; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import net.sf.picard.reference.ReferenceSequence; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; /* * Copyright (c) 2009 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java index 72b962522..55304da34 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java @@ -1,16 +1,15 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.iterators.LocusIterator; -import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Collection; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.GenomeLocParser; - /** * Presents data sharded by locus to the traversal engine. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index b467a2ab5..f9ed0cb74 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.DownsampleType; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java index a91e169c1..223659a46 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java @@ -5,10 +5,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.utils.GenomeLoc; -import java.util.List; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.List; /** * User: hanna * Date: May 21, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java index d2c097f5d..3d62faf49 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java @@ -1,9 +1,8 @@ package org.broadinstitute.sting.gatk.datasources.providers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.GenomeLoc; /* * Copyright (c) 2009 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java index 5a672b09f..7843e7518 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java @@ -1,14 +1,13 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Collection; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.GenomeLocParser; - /** * Present data sharded by read to a traversal engine. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java index 0c4b78a7c..e809092d4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import java.util.Collection; import java.util.Arrays; - -import net.sf.samtools.SAMRecord; +import java.util.Collection; /** * User: hanna * Date: May 22, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java index d6c938f36..efb92235f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.utils.*; - -import java.util.Collections; -import java.util.Collection; -import java.util.Arrays; - -import net.sf.samtools.SAMSequenceRecord; -import net.sf.samtools.SAMRecord; -import net.sf.picard.reference.ReferenceSequence; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.reference.ReferenceSequence; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; /** * User: hanna * Date: May 22, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index feed2ab85..39c632539 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java index a0ea32f9b..803bd885b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java @@ -1,15 +1,14 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; -import java.util.List; import java.util.Collection; - -import net.sf.picard.reference.IndexedFastaSequenceFile; +import java.util.List; /** * User: hanna * Date: May 8, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java index a9e04e357..de938e845 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java @@ -24,9 +24,6 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; -import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.utils.exceptions.StingException; import java.io.File; @@ -35,9 +32,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; -import java.util.ArrayList; import java.util.Iterator; -import java.util.List; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java index 34693d501..521bcd5a3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java @@ -39,12 +39,7 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; +import java.util.*; /** * Writes schedules for a single BAM file to a target output file. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java index 266232c0f..467aebac5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java @@ -26,20 +26,10 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.picard.util.PeekableIterator; import net.sf.samtools.GATKBAMFileSpan; -import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import java.io.File; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; +import java.util.*; /** * Assign intervals to the most appropriate blocks, keeping as little as possible in memory at once. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java index c014c1995..e4141f61c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java @@ -29,19 +29,11 @@ import net.sf.samtools.GATKBAMFileSpan; import net.sf.samtools.SAMFileSpan; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; +import java.util.*; /** * Represents a small section of a BAM file, and every associated interval. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java index 8ebb8b1a8..5d0c38b78 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java @@ -23,24 +23,18 @@ */ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.Bin; - -import net.sf.samtools.GATKBAMFileSpan; -import net.sf.samtools.GATKBin; -import net.sf.samtools.GATKChunk; -import net.sf.samtools.LinearIndex; -import net.sf.samtools.SAMException; -import net.sf.samtools.util.RuntimeIOException; +import net.sf.samtools.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.lang.ref.SoftReference; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; /** * A basic interface for querying BAM indices. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexData.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexData.java index f9b998a60..daf1b77e3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexData.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexData.java @@ -24,11 +24,7 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.Bin; -import net.sf.samtools.GATKBAMFileSpan; -import net.sf.samtools.GATKBin; -import net.sf.samtools.GATKChunk; -import net.sf.samtools.LinearIndex; +import net.sf.samtools.*; import java.util.ArrayList; import java.util.Collections; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java index fc3f76ab7..4ddf28dce 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java @@ -35,16 +35,7 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.PriorityQueue; -import java.util.Queue; +import java.util.*; /** * Shard intervals based on position within the BAM file. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java index 26af890b4..19d33aa6b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import net.sf.samtools.SAMFileSpan; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; @@ -7,8 +8,6 @@ import org.broadinstitute.sting.utils.Utils; import java.util.List; import java.util.Map; -import net.sf.samtools.SAMFileSpan; - /** * Handles locus shards of BAM information. * @author aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java index 950d67428..a5ca07853 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java @@ -24,17 +24,18 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.GATKBAMFileSpan; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileSpan; +import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import java.util.*; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMSequenceRecord; -import net.sf.samtools.SAMFileSpan; -import net.sf.picard.reference.IndexedFastaSequenceFile; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; /** * A sharding strategy for loci based on reading of the index. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java index 7579c22f6..278eeb898 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java index 10228ecd7..28b737f28 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java @@ -4,8 +4,8 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Iterator; -import java.util.NoSuchElementException; import java.util.List; +import java.util.NoSuchElementException; /** * Create a giant shard representing all the data in the input BAM(s). diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java index 9aecd7779..4d9c9092d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java @@ -1,15 +1,17 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; - -import java.util.*; - import net.sf.samtools.SAMFileSpan; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; + /** * * User: aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java index da70a615b..c2235ec73 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java @@ -25,12 +25,11 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.samtools.SAMFileSpan; - -import java.util.*; - import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import java.util.*; + /** * The sharding strategy for reads using a simple counting mechanism. Each read shard * has a specific number of reads (default to 10K) which is configured in the constructor. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index c2aa5f18e..572970349 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -24,21 +24,19 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.sam.MergingSamRecordIterator; +import net.sf.picard.sam.SamFileHeaderMerger; import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.picard.sam.SamFileHeaderMerger; -import net.sf.picard.sam.MergingSamRecordIterator; -import net.sf.picard.reference.IndexedFastaSequenceFile; - import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.DownsamplingMethod; -import org.broadinstitute.sting.gatk.filters.ReadFilter; -import org.broadinstitute.sting.gatk.iterators.*; -import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.ReadMetrics; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator; +import org.broadinstitute.sting.gatk.filters.ReadFilter; +import org.broadinstitute.sting.gatk.iterators.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.baq.BAQ; @@ -895,6 +893,7 @@ public class SAMDataSource { * Custom representation of interval bounds. * Makes it simpler to track current position. */ + private int[] intervalContigIndices; private int[] intervalStarts; private int[] intervalEnds; @@ -919,12 +918,14 @@ public class SAMDataSource { if(foundMappedIntervals) { if(keepOnlyUnmappedReads) throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals. Please apply this filter to only mapped or only unmapped reads"); + this.intervalContigIndices = new int[intervals.size()]; this.intervalStarts = new int[intervals.size()]; this.intervalEnds = new int[intervals.size()]; int i = 0; for(GenomeLoc interval: intervals) { - intervalStarts[i] = (int)interval.getStart(); - intervalEnds[i] = (int)interval.getStop(); + intervalContigIndices[i] = interval.getContigIndex(); + intervalStarts[i] = interval.getStart(); + intervalEnds[i] = interval.getStop(); i++; } } @@ -963,11 +964,10 @@ public class SAMDataSource { while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) { if(!keepOnlyUnmappedReads) { // Mapped read filter; check against GenomeLoc-derived bounds. - if(candidateRead.getAlignmentEnd() >= intervalStarts[currentBound] || - (candidateRead.getReadUnmappedFlag() && candidateRead.getAlignmentStart() >= intervalStarts[currentBound])) { - // This read ends after the current interval begins (or, if unmapped, starts within the bounds of the interval. + if(readEndsOnOrAfterStartingBound(candidateRead)) { + // This read ends after the current interval begins. // Promising, but this read must be checked against the ending bound. - if(candidateRead.getAlignmentStart() <= intervalEnds[currentBound]) { + if(readStartsOnOrBeforeEndingBound(candidateRead)) { // Yes, this read is within both bounds. This must be our next read. nextRead = candidateRead; break; @@ -995,6 +995,37 @@ public class SAMDataSource { candidateRead = iterator.next(); } } + + /** + * Check whether the read lies after the start of the current bound. If the read is unmapped but placed, its + * end will be distorted, so rely only on the alignment start. + * @param read The read to position-check. + * @return True if the read starts after the current bounds. False otherwise. + */ + private boolean readEndsOnOrAfterStartingBound(final SAMRecord read) { + return + // Read ends on a later contig, or... + read.getReferenceIndex() > intervalContigIndices[currentBound] || + // Read ends of this contig... + (read.getReferenceIndex() == intervalContigIndices[currentBound] && + // either after this location, or... + (read.getAlignmentEnd() >= intervalStarts[currentBound] || + // read is unmapped but positioned and alignment start is on or after this start point. + (read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound]))); + } + + /** + * Check whether the read lies before the end of the current bound. + * @param read The read to position-check. + * @return True if the read starts after the current bounds. False otherwise. + */ + private boolean readStartsOnOrBeforeEndingBound(final SAMRecord read) { + return + // Read starts on a prior contig, or... + read.getReferenceIndex() < intervalContigIndices[currentBound] || + // Read starts on this contig and the alignment start is registered before this end point. + (read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]); + } } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java index 418f5d3ee..f8d941784 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java @@ -2,17 +2,13 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.samtools.SAMFileSpan; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.io.Serializable; import java.util.Collections; import java.util.List; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java index fa733ce12..780b41ef7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.SAMSequenceDictionary; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; /** * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java index ae0f1cf43..07c13a76e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java @@ -25,17 +25,17 @@ package org.broadinstitute.sting.gatk.datasources.reads.utilities; -import org.broadinstitute.sting.commandline.CommandLineProgram; +import net.sf.samtools.BAMIndex; +import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.instrumentation.Sizeof; import java.io.File; import java.lang.reflect.Field; -import java.util.Map; import java.util.List; - -import net.sf.samtools.*; +import java.util.Map; /** * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java index f03e2a44f..54de04379 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java @@ -24,12 +24,12 @@ package org.broadinstitute.sting.gatk.datasources.reads.utilities; -import org.broadinstitute.sting.commandline.CommandLineProgram; -import org.broadinstitute.sting.commandline.Argument; -import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileWriterFactory; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.CommandLineProgram; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/PrintBGZFBounds.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/PrintBGZFBounds.java index df7dccaa9..773541d11 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/PrintBGZFBounds.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/PrintBGZFBounds.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.datasources.reads.utilities; -import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.CommandLineProgram; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java index c2d64ddd8..ef69a8e5f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java @@ -25,16 +25,15 @@ package org.broadinstitute.sting.gatk.datasources.reference; -import net.sf.picard.reference.ReferenceSequenceFileFactory; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import net.sf.picard.reference.FastaSequenceIndexBuilder; -import net.sf.picard.sam.CreateSequenceDictionary; -import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.FastaSequenceIndex; +import net.sf.picard.reference.FastaSequenceIndexBuilder; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.sam.CreateSequenceDictionary; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.file.FSLockWithShared; import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException; -import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ResourcePool.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ResourcePool.java index 934f4f997..21f58d480 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ResourcePool.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ResourcePool.java @@ -28,11 +28,7 @@ import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; +import java.util.*; /** * A pool of open resources, all of which can create a closeable iterator. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java index 0a5981a1b..067bf3f72 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java @@ -3,11 +3,10 @@ package org.broadinstitute.sting.gatk.datasources.sample; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.yaml.snakeyaml.TypeDescription; import org.yaml.snakeyaml.Yaml; import org.yaml.snakeyaml.constructor.Constructor; diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java index a1419ba70..5dbd90405 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java @@ -1,15 +1,20 @@ package org.broadinstitute.sting.gatk.examples; -import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.utils.pileup.*; +import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.*; import java.io.PrintStream; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; /** * Computes the coverage per sample. diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java index b96d0ffbf..26205a203 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.examples; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -32,8 +34,6 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidSNPGenotypePriors; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java b/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java index 3e335733d..b23782563 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java @@ -25,18 +25,18 @@ package org.broadinstitute.sting.gatk.executive; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; -import java.util.List; import java.util.Iterator; +import java.util.List; /** * Manages the */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 8a27e008c..59fb4aa9e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -1,27 +1,27 @@ package org.broadinstitute.sting.gatk.executive; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.io.*; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.io.OutputTracker; +import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor; -import java.util.Queue; -import java.util.LinkedList; import java.util.Collection; -import java.util.concurrent.Executors; +import java.util.LinkedList; +import java.util.Queue; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; -import net.sf.picard.reference.IndexedFastaSequenceFile; - /** * A microscheduler that schedules shards according to a tree-like structure. * Requires a special walker tagged with a 'TreeReducible' interface. diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 4cb571c45..48fd73e0b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -1,22 +1,21 @@ package org.broadinstitute.sting.gatk.executive; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.io.DirectOutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; import java.util.Collection; -import net.sf.picard.reference.IndexedFastaSequenceFile; - /** A micro-scheduling manager for single-threaded execution of a traversal. */ public class LinearMicroScheduler extends MicroScheduler { @@ -50,7 +49,7 @@ public class LinearMicroScheduler extends MicroScheduler { Accumulator accumulator = Accumulator.create(engine,walker); int counter = 0; - for (Shard shard : processingTracker.onlyOwned(shardStrategy, engine.getName())) { + for (Shard shard : shardStrategy ) { if ( shard == null ) // we ran out of shards that aren't owned break; diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index cfe0f8187..e731b9864 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -25,33 +25,26 @@ package org.broadinstitute.sting.gatk.executive; +import net.sf.picard.reference.IndexedFastaSequenceFile; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.io.OutputTracker; +import org.broadinstitute.sting.gatk.iterators.NullSAMIterator; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.io.OutputTracker; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.iterators.NullSAMIterator; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.ReadMetrics; - -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.PrintStream; -import java.lang.management.ManagementFactory; -import java.util.*; - -import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.threading.*; import javax.management.JMException; import javax.management.MBeanServer; import javax.management.ObjectName; +import java.lang.management.ManagementFactory; +import java.util.Collection; /** @@ -86,8 +79,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { private final MBeanServer mBeanServer; private final ObjectName mBeanName; - protected GenomeLocProcessingTracker processingTracker; - /** * MicroScheduler factory function. Create a microscheduler appropriate for reducing the * selected walker. @@ -101,11 +92,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { * @return The best-fit microscheduler. */ public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods, int nThreadsToUse) { - if (engine.getArguments().processingTrackerFile != null) { - if ( walker instanceof ReadWalker ) - throw new UserException.BadArgumentValue("C", String.format("Distributed GATK processing not enabled for read walkers")); - } - if (walker instanceof TreeReducible && nThreadsToUse > 1) { if(walker.isReduceByInterval()) throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass()))); @@ -160,33 +146,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { catch (JMException ex) { throw new ReviewedStingException("Unable to register microscheduler with JMX", ex); } - - // - // create the processing tracker - // - if ( engine.getArguments().processingTrackerFile != null ) { - logger.warn("Distributed GATK is an experimental engine feature, and is likely to not work correctly or reliably."); - if ( engine.getArguments().restartProcessingTracker && engine.getArguments().processingTrackerFile.exists() ) { - engine.getArguments().processingTrackerFile.delete(); - logger.info("Deleting ProcessingTracker file " + engine.getArguments().processingTrackerFile); - } - - PrintStream statusStream = null; - if ( engine.getArguments().processingTrackerStatusFile != null ) { - try { - statusStream = new PrintStream(new FileOutputStream(engine.getArguments().processingTrackerStatusFile)); - } catch ( FileNotFoundException e) { - throw new UserException.CouldNotCreateOutputFile(engine.getArguments().processingTrackerStatusFile, e); - } - } - - ClosableReentrantLock lock = new SharedFileThreadSafeLock(engine.getArguments().processingTrackerFile, engine.getArguments().processTrackerID); - processingTracker = new FileBackedGenomeLocProcessingTracker(engine.getArguments().processingTrackerFile, engine.getGenomeLocParser(), lock, statusStream) ; - logger.info("Creating ProcessingTracker using shared file " + engine.getArguments().processingTrackerFile + " process.id = " + engine.getName() + " CID = " + engine.getArguments().processTrackerID); - } else { - // create a NoOp version that doesn't do anything but say "yes" - processingTracker = new NoOpGenomeLocProcessingTracker(); - } } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/OutputMergeTask.java b/public/java/src/org/broadinstitute/sting/gatk/executive/OutputMergeTask.java index 76e0c1c8a..7be37a616 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/OutputMergeTask.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/OutputMergeTask.java @@ -1,10 +1,9 @@ package org.broadinstitute.sting.gatk.executive; import org.broadinstitute.sting.gatk.io.storage.Storage; -import org.broadinstitute.sting.gatk.io.OutputTracker; -import java.util.Collection; import java.util.ArrayList; +import java.util.Collection; /** * User: hanna diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/ReduceTree.java b/public/java/src/org/broadinstitute/sting/gatk/executive/ReduceTree.java index 151a1ba26..7aac70b47 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/ReduceTree.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ReduceTree.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.executive; -import java.util.Queue; -import java.util.List; import java.util.ArrayList; import java.util.LinkedList; +import java.util.List; +import java.util.Queue; import java.util.concurrent.Future; /** * User: hanna diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index b78a4edc9..6136bd68d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.executive; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; -import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.concurrent.Callable; diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java index 8fb4adb5d..d36a3b576 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java @@ -4,8 +4,8 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.concurrent.Callable; -import java.util.concurrent.Future; import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; /** * User: hanna * Date: Apr 29, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index 5c341bb02..cfbce58ee 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -1,17 +1,20 @@ package org.broadinstitute.sting.gatk.executive; -import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.iterators.*; +import net.sf.picard.util.PeekableIterator; import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; - -import java.util.*; - -import net.sf.picard.util.PeekableIterator; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; +import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + /** * Buffer shards of data which may or may not contain multiple loci into * iterators of all data which cover an interval. Its existence is an homage diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java index b8a3ee977..0987c5d74 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java @@ -24,8 +24,10 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.*; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; /** * Filter out reads with wonky cigar strings. diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/BadMateFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/BadMateFilter.java index 3b988c8fb..8596e18eb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/BadMateFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/BadMateFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java index 84390c173..03fc2063b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java @@ -23,17 +23,16 @@ */ package org.broadinstitute.sting.gatk.filters; -import net.sf.samtools.util.CloserUtil; - -import java.util.Iterator; -import java.util.NoSuchElementException; -import java.util.Collection; - +import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; -import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.util.CloserUtil; import org.broadinstitute.sting.gatk.ReadMetrics; +import java.util.Collection; +import java.util.Iterator; +import java.util.NoSuchElementException; + /** * Filtering Iterator which takes a filter and an iterator and iterates * through only those records which are not rejected by the filter. diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java index fb3c38582..589910fc7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java index b7806524a..cd77a9e7e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java b/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java index cda08fb66..67f82235d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java @@ -25,11 +25,8 @@ package org.broadinstitute.sting.gatk.filters; -import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.classloader.PluginManager; -import net.sf.picard.filter.SamRecordFilter; - import java.util.Collection; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java index 4deeb09ee..74deace9a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java @@ -24,8 +24,8 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java index bcd473b15..75369b306 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java index cecbedda8..1afec36d1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.util.QualityUtil; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.QualityUtils; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java index 584783d34..7bcee033f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java index 8a2fd5af3..cd31da61a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MissingReadGroupFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MissingReadGroupFilter.java index d5d40ec38..490a55040 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MissingReadGroupFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MissingReadGroupFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java index fc21538e8..29738e499 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java index cfbba0383..31c2144ce 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java index dcddebd55..8ad91ac1c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java @@ -25,9 +25,7 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; - import org.broadinstitute.sting.utils.sam.ReadUtils; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java index 691fd95f0..30b2f828d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java @@ -25,11 +25,9 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; - -import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.utils.sam.ReadUtils; /** * Filter out PL matching reads. diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java index 4ef88402d..81044b888 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java @@ -1,14 +1,12 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; - -import java.util.Set; -import java.util.HashSet; - +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.util.HashSet; +import java.util.Set; + /** * Created by IntelliJ IDEA. * User: asivache diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java index 1d74ee20e..bf3ce352a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java @@ -1,12 +1,15 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; /** * A SamRecordFilter that also depends on the header. */ +@DocumentedGATKFeature( + groupName = "Read filters", + summary = "GATK Engine arguments that filter or transfer incoming SAM/BAM data files" ) public abstract class ReadFilter implements SamRecordFilter { /** * Sets the header for use by this filter. diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilter.java index 69ebc8b04..0e5e8800c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilter.java @@ -25,17 +25,16 @@ package org.broadinstitute.sting.gatk.filters; -import java.util.*; -import java.util.Map.Entry; -import java.io.File; -import java.io.FileNotFoundException; - -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; +import java.io.File; +import java.io.FileNotFoundException; +import java.util.*; +import java.util.Map.Entry; + /** * Removes records matching the read group tag and exact match string. * For example, this filter value: diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadStrandFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadStrandFilter.java index 1b2a77f45..16eeed3cc 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadStrandFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadStrandFilter.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java index 682b22b1f..99d6bc154 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java @@ -25,9 +25,8 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java index 05b472036..2f93cbcae 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java @@ -25,9 +25,8 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; -import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java index ac4f4853a..e7ee345d2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/DirectOutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/DirectOutputTracker.java index 865528688..658a28fbd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/DirectOutputTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/DirectOutputTracker.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.gatk.io; -import org.broadinstitute.sting.gatk.io.stubs.Stub; -import org.broadinstitute.sting.gatk.io.storage.StorageFactory; import org.broadinstitute.sting.gatk.io.storage.Storage; +import org.broadinstitute.sting.gatk.io.storage.StorageFactory; +import org.broadinstitute.sting.gatk.io.stubs.Stub; /** * Maps creation of storage directly to output streams in parent. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java index b68013aa4..f39ba2d8d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java @@ -26,20 +26,20 @@ package org.broadinstitute.sting.gatk.io; import net.sf.samtools.SAMFileReader; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.commandline.ArgumentSource; -import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; -import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.io.storage.Storage; +import org.broadinstitute.sting.gatk.io.storage.StorageFactory; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; import org.broadinstitute.sting.gatk.io.stubs.Stub; -import org.broadinstitute.sting.gatk.io.storage.StorageFactory; -import org.broadinstitute.sting.gatk.io.storage.Storage; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; -import java.io.*; +import java.io.OutputStream; import java.lang.reflect.Field; -import java.util.Map; import java.util.HashMap; +import java.util.Map; /** * Manages the output and err streams that are created specifically for walker diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/StingSAMFileWriter.java b/public/java/src/org/broadinstitute/sting/gatk/io/StingSAMFileWriter.java index 8701ecf3c..a9a74925d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/StingSAMFileWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/StingSAMFileWriter.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.io; -import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileWriter; /** * A writer that will allow unsorted BAM files to be written diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java index 36960246a..999deddd1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java @@ -25,15 +25,16 @@ package org.broadinstitute.sting.gatk.io; -import org.broadinstitute.sting.gatk.io.stubs.Stub; -import org.broadinstitute.sting.gatk.io.storage.StorageFactory; -import org.broadinstitute.sting.gatk.io.storage.Storage; import org.broadinstitute.sting.gatk.executive.OutputMergeTask; +import org.broadinstitute.sting.gatk.io.storage.Storage; +import org.broadinstitute.sting.gatk.io.storage.StorageFactory; +import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.*; import java.io.File; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; /** * An output tracker that can either track its output per-thread or directly, diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/OutputStreamStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/OutputStreamStorage.java index 4dc976289..56c9c0465 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/OutputStreamStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/OutputStreamStorage.java @@ -25,14 +25,14 @@ package org.broadinstitute.sting.gatk.io.storage; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; +import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.channels.WritableByteChannel; -import java.nio.channels.Channels; public class OutputStreamStorage extends OutputStream implements Storage { /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java index 610db1d76..cb8786be1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java @@ -27,17 +27,16 @@ package org.broadinstitute.sting.gatk.io.storage; import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; - -import java.io.*; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; - import net.sf.samtools.util.RuntimeIOException; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.SimplifyingSAMFileWriter; +import java.io.File; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; + /** * Provides temporary storage for SAMFileWriters. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java index ee5c56524..66907dd6b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.gatk.io.storage; -import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub; +import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java index 74176ec35..1da03e9c2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java @@ -1,16 +1,21 @@ package org.broadinstitute.sting.gatk.io.storage; +import net.sf.samtools.util.BlockCompressedOutputStream; import org.apache.log4j.Logger; import org.broad.tribble.source.BasicFeatureSource; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; - -import java.io.*; - -import net.sf.samtools.util.BlockCompressedOutputStream; +import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter; +import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; /** * Provides temporary and permanent storage for genotypes in VCF format. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java index 00e78f391..8bc97c886 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java @@ -30,8 +30,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.io.OutputStream; import java.io.File; +import java.io.OutputStream; import java.lang.reflect.Constructor; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamStub.java index 5cf84c5a2..27bcb8a1c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamStub.java @@ -27,9 +27,9 @@ package org.broadinstitute.sting.gatk.io.stubs; import org.broadinstitute.sting.gatk.io.OutputTracker; -import java.io.OutputStream; -import java.io.IOException; import java.io.File; +import java.io.IOException; +import java.io.OutputStream; /** * A stub for routing and management of anything backed by an OutputStream. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java index d847015ed..f124c2302 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java @@ -24,14 +24,14 @@ package org.broadinstitute.sting.gatk.io.stubs; -import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; -import org.broadinstitute.sting.commandline.ArgumentSource; +import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.commandline.ArgumentMatches; +import org.broadinstitute.sting.commandline.ArgumentSource; +import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; import org.broadinstitute.sting.commandline.ParsingEngine; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import net.sf.samtools.SAMFileReader; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index a9a272220..38640eda0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -25,18 +25,17 @@ package org.broadinstitute.sting.gatk.io.stubs; +import net.sf.samtools.SAMFileWriter; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; -import net.sf.samtools.SAMFileWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import java.lang.annotation.Annotation; -import java.util.List; -import java.util.Arrays; import java.io.File; import java.io.OutputStream; +import java.lang.annotation.Annotation; +import java.util.Arrays; +import java.util.List; /** * Insert a SAMFileWriterStub instead of a full-fledged concrete OutputStream implementations. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java index f5c1e0efc..d8e59a3dd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java @@ -25,20 +25,19 @@ package org.broadinstitute.sting.gatk.io.stubs; +import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileHeader; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.io.OutputTracker; +import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; import java.io.OutputStream; -import org.broadinstitute.sting.gatk.io.OutputTracker; -import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.baq.BAQ; - /** * A stub for routing and management of SAM file reading and writing. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 7521e754d..615841f02 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -25,14 +25,17 @@ package org.broadinstitute.sting.gatk.io.stubs; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.OutputStream; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; /** * Injects new command-line arguments into the system providing support for the genotype writer. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java index 1a79d2785..bb84f9457 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java @@ -25,19 +25,19 @@ package org.broadinstitute.sting.gatk.io.stubs; -import java.io.File; -import java.io.PrintStream; -import java.io.OutputStream; -import java.util.Collection; - -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.Collection; /** * A stub for routing and management of genotype reading and writing. diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java index b5643f834..478675f9d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java @@ -1,13 +1,11 @@ package org.broadinstitute.sting.gatk.iterators; +import net.sf.picard.sam.MergingSamRecordIterator; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; -import net.sf.picard.sam.MergingSamRecordIterator; import java.util.Iterator; -import org.broadinstitute.sting.gatk.ReadProperties; - /* * Copyright (c) 2009 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/BufferingReadIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/BufferingReadIterator.java index f3a060be1..7eaf4be41 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/BufferingReadIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/BufferingReadIterator.java @@ -26,12 +26,11 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.Queue; import java.util.LinkedList; import java.util.NoSuchElementException; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import java.util.Queue; /** * Buffers access to a large stream of reads, replenishing the buffer only when the reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java index 1342f11fd..835748ff0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import java.util.Iterator; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; - public class DownsampleIterator implements StingSAMIterator { diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java index aa376a12a..240564d34 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java @@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.iterators; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import java.util.NoSuchElementException; import java.util.Iterator; +import java.util.NoSuchElementException; /** * User: hanna * Date: May 12, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java index 30c1cf512..e177984ca 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.util.CloseableIterator; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.Iterator; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; - /** * Iterator that traverses a SAM File, accumulating information on a per-locus basis */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 4e58813f5..e13c5a764 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -25,19 +25,27 @@ package org.broadinstitute.sting.gatk.iterators; -import net.sf.samtools.*; import net.sf.picard.util.PeekableIterator; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.gatk.DownsampleType; +import org.broadinstitute.sting.gatk.DownsamplingMethod; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.ReservoirDownsampler; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.pileup.*; +import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java index ff458467f..21b71c9e6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.iterators; -import org.broadinstitute.sting.gatk.ReadProperties; import net.sf.samtools.SAMRecord; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java index c2d3976ea..cc499b247 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.iterators; -import org.broadinstitute.sting.gatk.ReadProperties; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java index 239392eec..2f30d12a8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java @@ -1,13 +1,8 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMTag; -import net.sf.samtools.SAMReadGroupRecord; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.apache.log4j.Logger; - -import java.util.List; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** * An iterator which does post-processing of a read, including potentially wrapping diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java index d294993d4..1b248d097 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.ReadProperties; /** * * User: aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index a51ca9292..acee1a6a3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -26,19 +26,16 @@ package org.broadinstitute.sting.gatk.phonehome; import org.apache.log4j.Level; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.jets3t.service.S3Service; import org.jets3t.service.S3ServiceException; import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Bucket; import org.jets3t.service.model.S3Object; import org.jets3t.service.security.AWSCredentials; import org.simpleframework.xml.Element; @@ -157,9 +154,13 @@ public class GATKRunReport { private long nReads; public enum PhoneHomeOption { + /** Disable phone home */ NO_ET, + /** Standard option. Writes to local repository if it can be found, or S3 otherwise */ STANDARD, + /** Force output to STDOUT. For debugging only */ STDOUT, + /** Force output to S3. For debugging only */ AWS_S3 // todo -- remove me -- really just for testing purposes } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java index fa0c323b5..ce924fd87 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java @@ -25,17 +25,17 @@ package org.broadinstitute.sting.gatk.refdata; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import java.io.File; +import java.io.FileNotFoundException; +import java.lang.reflect.Constructor; import java.util.Iterator; import java.util.regex.Pattern; -import java.io.FileNotFoundException; -import java.io.File; -import java.lang.reflect.Constructor; /** * This is a low-level iterator designed to provide system-wide generic support for reading record-oriented data diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java index cff97e4ee..59b273d38 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java @@ -2,9 +2,9 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 43bf6f8e0..d03b122e2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java index b3cb22a03..b7437e6e8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java @@ -6,9 +6,9 @@ import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index c7c0468e7..ba9a10d8b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -3,17 +3,13 @@ package org.broadinstitute.sting.gatk.refdata; import org.broad.tribble.Feature; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.gelitext.GeliTextFeature; -import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.MutableGenotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; @@ -116,24 +112,29 @@ public class VariantContextAdaptors { alleles.add(refAllele); // add all of the alt alleles + boolean sawNullAllele = false; for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) { if ( ! Allele.acceptableAlleleBases(alt) ) { //System.out.printf("Excluding dbsnp record %s%n", dbsnp); return null; } - alleles.add(Allele.create(alt, false)); + Allele altAllele = Allele.create(alt, false); + alleles.add(altAllele); + if ( altAllele.isNull() ) + sawNullAllele = true; } Map attributes = new HashMap(); attributes.put(VariantContext.ID_KEY, dbsnp.getRsID()); - if ( DbSNPHelper.isDeletion(dbsnp) ) { + + if ( sawNullAllele ) { int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; if ( index < 0 ) return null; // we weren't given enough reference context to create the VariantContext attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index])); } Collection genotypes = null; - VariantContext vc = new VariantContext(name, dbsnp.getChr(),dbsnp.getStart() - (DbSNPHelper.isDeletion(dbsnp) ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); + VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); return vc; } else return null; // can't handle anything else diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java index 59cd14a22..6bba754be 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java @@ -25,12 +25,6 @@ package org.broadinstitute.sting.gatk.refdata.features.annotator; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.StringTokenizer; - import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broad.tribble.exception.CodecLineParsingException; @@ -41,6 +35,12 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.StringTokenizer; + public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec { private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java index d97e378fb..d12badd28 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.gatk.refdata.features.annotator; +import org.broad.tribble.Feature; + import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Map; -import org.broad.tribble.Feature; - /** * This class represents a single record in an AnnotatorInputTable. */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java index 7f97451cf..5e536d4c1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java @@ -26,19 +26,19 @@ package org.broadinstitute.sting.gatk.refdata.features.beagle; import org.broad.tribble.Feature; +import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; -import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.broad.tribble.exception.CodecLineParsingException; -import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; public class BeagleCodec implements ReferenceDependentFeatureCodec { private String[] header; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java index c7bf86569..e6832754d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java @@ -25,14 +25,11 @@ package org.broadinstitute.sting.gatk.refdata.features.beagle; import org.broad.tribble.Feature; - +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.ArrayList; import java.util.Map; -import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.utils.variantcontext.Allele; - public class BeagleFeature implements Feature { private String chr; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java index 4648efd1e..d12114f9a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java @@ -4,9 +4,8 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.refdata.Transcript; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java index 00b7c45d9..43e2c3ff5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java @@ -25,15 +25,15 @@ package org.broadinstitute.sting.gatk.refdata.features.sampileup; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; import java.util.ArrayList; -import java.util.regex.Pattern; import java.util.regex.Matcher; +import java.util.regex.Pattern; import static org.broadinstitute.sting.gatk.refdata.features.sampileup.SAMPileupFeature.VariantType; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java index a794c2704..378f26934 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java @@ -25,12 +25,11 @@ package org.broadinstitute.sting.gatk.refdata.features.sampileup; +import net.sf.samtools.util.StringUtil; import org.broad.tribble.Feature; import java.util.List; -import net.sf.samtools.util.StringUtil; - /** * A tribble feature representing a SAM pileup. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java index 15f559d46..039b8adde 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java @@ -24,14 +24,14 @@ package org.broadinstitute.sting.gatk.refdata.features.samread; +import net.sf.samtools.Cigar; +import net.sf.samtools.TextCigarCodec; +import net.sf.samtools.util.StringUtil; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; -import net.sf.samtools.util.StringUtil; -import net.sf.samtools.TextCigarCodec; -import net.sf.samtools.Cigar; /** * Decodes a simple SAM text string. diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java index b831606a3..745ccdd9f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java @@ -1,13 +1,8 @@ package org.broadinstitute.sting.gatk.refdata.features.table; import org.broad.tribble.Feature; -import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; -import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java index 6f0a712bf..ab1ac59d8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java @@ -1,16 +1,14 @@ package org.broadinstitute.sting.gatk.refdata.features.table; import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.interval.IntervalUtils; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; /** * implementation of a simple table (tab or comma delimited format) input files diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java index 4b4ebe450..ca73ee960 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.refdata.features.table; import org.broad.tribble.Feature; import org.broadinstitute.sting.utils.GenomeLoc; -import java.util.*; +import java.util.List; /** * A feature representing a single row out of a text table diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java index 5bb65f9a2..085d6b5b3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java @@ -10,7 +10,6 @@ import org.broad.tribble.util.LittleEndianOutputStream; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java index 19050ae11..731df997d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java @@ -28,7 +28,6 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.utils.GenomeLoc; import java.io.IOException; -import java.util.Iterator; /** * @author aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java index 3b9f8243f..ba1ca674e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java @@ -35,8 +35,6 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; import java.io.IOException; -import java.lang.reflect.Type; -import java.util.Iterator; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java index c2057ad5e..19c91be1b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java @@ -42,15 +42,17 @@ import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SequenceDictionaryUtils; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.file.FSLockWithShared; import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException; import org.broadinstitute.sting.utils.instrumentation.Sizeof; -import java.io.*; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java index 462bf98df..104ba87b5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java @@ -28,8 +28,6 @@ import org.broad.tribble.Feature; import org.broad.tribble.iterators.CloseableTribbleIterator; import org.broadinstitute.sting.utils.GenomeLocParser; -import java.util.Iterator; - /** * diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java index 1553402a5..59e8471a3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java @@ -28,8 +28,6 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java index 83aa5f056..96086598a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java @@ -2,12 +2,8 @@ package org.broadinstitute.sting.gatk.refdata.utils; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.utils.GenomeLoc; -import java.util.Iterator; -import java.util.List; - /** * @author aaron *

diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java index 101784d97..fc7f7c58f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.gatk.refdata.utils; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.bed.BedParser; -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java index 75e7c1a32..35b0f73c6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java @@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.refdata.utils.helpers; import net.sf.samtools.util.SequenceUtil; import org.broad.tribble.annotation.Strand; import org.broad.tribble.dbsnp.DbSNPFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Arrays; @@ -117,7 +117,11 @@ public class DbSNPHelper { } public static boolean isIndel(DbSNPFeature feature) { - return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || feature.getVariantType().contains("in-del"); + return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || DbSNPHelper.isComplexIndel(feature); + } + + public static boolean isComplexIndel(DbSNPFeature feature) { + return feature.getVariantType().contains("in-del"); } public static boolean isHapmap(DbSNPFeature feature) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index f4c565318..59d496828 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -100,7 +100,11 @@ public class GATKReport { * @param tableDescription the description of the table */ public void addTable(String tableName, String tableDescription) { - GATKReportTable table = new GATKReportTable(tableName, tableDescription); + addTable(tableName, tableDescription, true); + } + + public void addTable(String tableName, String tableDescription, boolean sortByPrimaryKey) { + GATKReportTable table = new GATKReportTable(tableName, tableDescription, sortByPrimaryKey); tables.put(tableName, table); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 0e503f92a..f7ea25696 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -3,9 +3,7 @@ package org.broadinstitute.sting.gatk.report; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.PrintStream; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.TreeSet; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -96,8 +94,9 @@ public class GATKReportTable { private String tableDescription; private String primaryKeyName; - private TreeSet primaryKeyColumn; + private Collection primaryKeyColumn; private boolean primaryKeyDisplay; + boolean sortByPrimaryKey = true; private LinkedHashMap columns; @@ -121,12 +120,17 @@ public class GATKReportTable { * @param tableDescription the description of the table */ public GATKReportTable(String tableName, String tableDescription) { - if (!isValidName(tableName)) { + this(tableName, tableDescription, true); + } + + public GATKReportTable(String tableName, String tableDescription, boolean sortByPrimaryKey) { + if (!isValidName(tableName)) { throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed."); } this.tableName = tableName; this.tableDescription = tableDescription; + this.sortByPrimaryKey = sortByPrimaryKey; columns = new LinkedHashMap(); } @@ -137,20 +141,14 @@ public class GATKReportTable { * @param primaryKeyName the name of the primary key column */ public void addPrimaryKey(String primaryKeyName) { - if (!isValidName(primaryKeyName)) { - throw new ReviewedStingException("Attempted to set a GATKReportTable primary key name of '" + primaryKeyName + "'. GATKReportTable primary key names must be purely alphanumeric - no spaces or special characters are allowed."); - } - - this.primaryKeyName = primaryKeyName; - - primaryKeyColumn = new TreeSet(); - primaryKeyDisplay = true; + addPrimaryKey(primaryKeyName, true); } /** * Add an optionally visible primary key column. This becomes the unique identifier for every column in the table, and will always be printed as the first column. * * @param primaryKeyName the name of the primary key column + * @param display should this primary key be displayed? */ public void addPrimaryKey(String primaryKeyName, boolean display) { if (!isValidName(primaryKeyName)) { @@ -159,7 +157,7 @@ public class GATKReportTable { this.primaryKeyName = primaryKeyName; - primaryKeyColumn = new TreeSet(); + primaryKeyColumn = sortByPrimaryKey ? new TreeSet() : new LinkedList(); primaryKeyDisplay = display; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index e1085c6b2..89a179d0e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -25,11 +25,11 @@ package org.broadinstitute.sting.gatk.traversals; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.ReadMetrics; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -38,7 +38,10 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.PrintStream; -import java.util.*; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; public abstract class TraversalEngine,ProviderType extends ShardDataProvider> { // Time in milliseconds since we initialized this engine diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index 89ff688a7..1ba48ca5f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -28,12 +28,11 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.walkers.DuplicateWalker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 240176f2f..232989fb0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java index f15a20cd3..196d54036 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java @@ -1,16 +1,18 @@ package org.broadinstitute.sting.gatk.traversals; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.ReadPairWalker; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordCoordinateComparator; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.apache.log4j.Logger; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMRecordCoordinateComparator; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.ReadPairWalker; +import org.broadinstitute.sting.gatk.walkers.Requires; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; /** * Traverse over a collection of read pairs, assuming that a given shard will contain all pairs. diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index 670676b48..06e4539c4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -2,14 +2,16 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.ReadMetrics; +import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.providers.*; +import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView; +import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView; +import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java index a29e51189..2541921e9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java @@ -1,11 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; -import java.lang.annotation.ElementType; +import java.lang.annotation.*; /** * User: hanna * Date: May 19, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java index 99dd46cbe..03097887d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java @@ -1,11 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; -import java.lang.annotation.ElementType; +import java.lang.annotation.*; /** * User: hanna diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/By.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/By.java index 25455b587..8fa6a4c1b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/By.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/By.java @@ -1,11 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; -import java.lang.annotation.ElementType; +import java.lang.annotation.*; /** * User: hanna * Date: May 14, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index 1a3f87a7a..ca4e3f5e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -25,29 +25,29 @@ package org.broadinstitute.sting.gatk.walkers; -import net.sf.samtools.*; -import net.sf.picard.reference.ReferenceSequenceFileFactory; -import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.ReferenceSequence; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.util.StringUtil; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.clipreads.ClippingOp; import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation; import org.broadinstitute.sting.utils.clipreads.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.sam.ReadUtils; -import java.util.*; -import java.util.regex.Pattern; -import java.util.regex.Matcher; import java.io.File; import java.io.PrintStream; - -import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.utils.sam.ReadUtils; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * This ReadWalker provides simple, yet powerful read clipping capabilities. It allows the user to clip bases in reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java index 1fc606f07..4bfedb672 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java @@ -1,16 +1,13 @@ package org.broadinstitute.sting.gatk.walkers; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; +import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.utils.GenomeLoc; import java.util.List; import java.util.Set; -import java.util.Arrays; - -import net.sf.samtools.SAMRecord; -import net.sf.picard.filter.SamRecordFilter; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java index a272150c7..56287df31 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMFileWriter; -import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java index 13a55eaac..fcfcb81b5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import java.io.PrintStream; import java.text.DecimalFormat; import java.text.NumberFormat; -import java.io.PrintStream; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java index c0f469973..68bea4dba 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java @@ -28,13 +28,12 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import java.io.PrintStream; -import java.util.*; +import java.util.List; /** * Walks along reference and calculates the GC content for each interval. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index fc4b403c5..b0b2687f4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -1,9 +1,12 @@ package org.broadinstitute.sting.gatk.walkers; -import org.broadinstitute.sting.gatk.filters.*; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; +import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckReadFilter; +import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; +import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index 84d868c1a..508d1f6ee 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -26,22 +26,22 @@ package org.broadinstitute.sting.gatk.walkers; import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import java.io.PrintStream; import java.util.ArrayList; import java.util.List; -import java.io.PrintStream; /** * Prints the alignment in the pileup format. In the pileup format, each line represents a genomic position, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java index 9ac3fc0e6..158992a22 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java @@ -25,15 +25,15 @@ package org.broadinstitute.sting.gatk.walkers; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.commandline.Output; -import java.util.Iterator; import java.io.PrintStream; +import java.util.Iterator; /** * Prints out all of the RODs in the input data set. Data is rendered using the toString() method diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index 07938d322..7e1dcd707 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -28,14 +28,18 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; -import java.io.PrintStream; +import java.io.File; +import java.util.Collection; +import java.util.Set; +import java.util.TreeSet; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; /** * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear * in the input file. It can dynamically merge the contents of multiple input BAM files, resulting @@ -54,6 +58,13 @@ public class PrintReadsWalker extends ReadWalker { String platform = null; // E.g. ILLUMINA, 454 @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false) int nReadsToPrint = -1; + @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false) + public Set sampleFile = new TreeSet(); + @Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false) + public Set sampleNames = new TreeSet(); + + private TreeSet samplesToChoose = new TreeSet(); + private boolean SAMPLES_SPECIFIED = false; /** * The initialize function. @@ -61,6 +72,20 @@ public class PrintReadsWalker extends ReadWalker { public void initialize() { if ( platform != null ) platform = platform.toUpperCase(); + + Collection samplesFromFile; + if (!sampleFile.isEmpty()) { + samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFile); + samplesToChoose.addAll(samplesFromFile); + } + + if (!sampleNames.isEmpty()) + samplesToChoose.addAll(sampleNames); + + if(!samplesToChoose.isEmpty()) { + SAMPLES_SPECIFIED = true; + } + } /** @@ -87,6 +112,14 @@ public class PrintReadsWalker extends ReadWalker { if ( readPlatformAttr == null || !readPlatformAttr.toString().toUpperCase().contains(platform)) return false; } + if (SAMPLES_SPECIFIED ) { + // user specified samples to select + // todo - should be case-agnostic but for simplicity and speed this is ignored. + // todo - can check at initialization intersection of requested samples and samples in BAM header to further speedup. + if (!samplesToChoose.contains(read.getReadGroup().getSample())) + return false; + } + // check if we've reached the output limit if ( nReadsToPrint == 0 ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadFilters.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadFilters.java index ff3b6d82f..5f11686a1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadFilters.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadFilters.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.picard.filter.SamRecordFilter; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.lang.annotation.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java index a5486fd9a..db2038aa3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Requires.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Requires.java index 6c1e64c4e..e9a381a85 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Requires.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Requires.java @@ -1,11 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; -import java.lang.annotation.ElementType; +import java.lang.annotation.*; /** * User: hanna * Date: May 19, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java index 014acff9c..486d233b7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java @@ -25,14 +25,20 @@ package org.broadinstitute.sting.gatk.walkers; -import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.utils.sam.ReadUtils; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.utils.sam.ReadUtils; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * Divides the input data set into separate BAM files, one for each sample in the input data set. The split diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index f0ba8bb46..9e261a0b1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -25,14 +25,17 @@ package org.broadinstitute.sting.gatk.walkers; -import java.util.List; - +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.collections.Pair; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.GenericDocumentationHandler; + +import java.util.List; /** * Created by IntelliJ IDEA. @@ -44,6 +47,10 @@ import org.apache.log4j.Logger; @ReadFilters(MalformedReadFilter.class) @PartitionBy(PartitionType.NONE) @BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) +@DocumentedGATKFeature( + groupName = "GATK walkers", + summary = "General tools available for running on the command line as part of the GATK package", + extraDocs = {CommandLineGATK.class}) public abstract class Walker { final protected static Logger logger = Logger.getLogger(Walker.class); private GenomeAnalysisEngine toolkit; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/WalkerName.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/WalkerName.java index 0e4d40675..4d46607e5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/WalkerName.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/WalkerName.java @@ -1,10 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; +import java.lang.annotation.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Window.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Window.java index 0b718071d..9827fdf09 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Window.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Window.java @@ -25,7 +25,10 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.*; +import java.lang.annotation.Documented; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; /** * Describes the size of the window into the genome. Has differing semantics based on diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index b02dcd8e2..784927ab4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -25,24 +25,24 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; -public class AlleleBalance implements InfoFieldAnnotation { +public class AlleleBalance extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index 51d290763..f70a87dc5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -1,17 +1,21 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAnnotation { +public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { Double ratio = annotateSNP(stratifiedContext, vc, g); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java index 51b5381dc..dc41dbc81 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java @@ -1,13 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.variantcontext.Genotype; + import java.util.Map; -public abstract class AnnotationByDepth implements InfoFieldAnnotation { +public abstract class AnnotationByDepth extends InfoFieldAnnotation { protected int annotationByVariantDepth(final Map genotypes, Map stratifiedContexts) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index 244627154..7cd159c5d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -31,22 +31,22 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; -public class BaseCounts implements InfoFieldAnnotation { +public class BaseCounts extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index ff916bedd..2a5c996f7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; -import java.util.Arrays; public class BaseQualityRankSumTest extends RankSumTest { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index f3ec2b1df..9b30079d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -25,21 +25,25 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; -public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation { +public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation { private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"), diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index e56825dbe..d8907c57f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -1,21 +1,22 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; -public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation { +public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index ee66b50ee..20513421d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -1,21 +1,20 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -23,7 +22,7 @@ import java.util.List; import java.util.Map; -public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnotation { +public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { private static String REF_ALLELE = "REF"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 2115526a6..e71febece 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -24,25 +24,25 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import cern.jet.math.Arithmetic; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import cern.jet.math.Arithmetic; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -public class FisherStrand implements InfoFieldAnnotation, StandardAnnotation { +public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation { private static final String FS = "FS"; private static final double MIN_PVALUE = 1E-320; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index 5eaa30bf3..588d3e98a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -1,21 +1,22 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; -public class GCContent implements InfoFieldAnnotation, ExperimentalAnnotation { +public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { double content = computeGCContent(ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java index 3a5db2884..862e12f7d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java @@ -1,19 +1,20 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; /** * Created by IntelliJ IDEA. @@ -22,7 +23,7 @@ import java.util.Arrays; */ // A set of annotations calculated directly from the GLs -public class GLstats implements InfoFieldAnnotation, StandardAnnotation { +public class GLstats extends InfoFieldAnnotation implements StandardAnnotation { private static final int MIN_SAMPLES = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index bd8c51a41..2196de389 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -24,27 +24,31 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.genotype.Haplotype; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.genotype.Haplotype; -import org.broadinstitute.sting.utils.pileup.*; import java.util.*; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; -public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { +public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation { private final static boolean DEBUG = false; private final static int MIN_CONTEXT_WING_SIZE = 10; private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index ecebfbbd2..2d9424e98 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -1,23 +1,24 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; -public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotation { +public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation { private static final int MIN_SAMPLES = 10; private static final int MIN_GENOTYPE_QUALITY = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 099780fa7..870e9992b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -1,22 +1,22 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; -public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation { +public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation { private boolean ANNOTATE_INDELS = true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index ee8b01d7d..b1c16ba0d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.IndelUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; @@ -19,16 +19,32 @@ import java.util.*; * Time: 11:47:33 AM * To change this template use File | Settings | File Templates. */ -public class IndelType implements InfoFieldAnnotation, ExperimentalAnnotation { +public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { int run; - if ( vc.isIndel() && vc.isBiallelic() ) { + if (vc.isMixed()) { + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%s", "MIXED")); + return map; + + } + else if ( vc.isIndel() ) { String type=""; - ArrayList inds = IndelUtils.findEventClassificationIndex(vc, ref); - for (int k : inds) { - type = type+ IndelUtils.getIndelClassificationName(k)+"."; + if (!vc.isBiallelic()) + type = "MULTIALLELIC_INDEL"; + else { + if (vc.isInsertion()) + type = "INS."; + else if (vc.isDeletion()) + type = "DEL."; + else + type = "OTHER."; + ArrayList inds = IndelUtils.findEventClassificationIndex(vc, ref); + for (int k : inds) { + type = type+ IndelUtils.getIndelClassificationName(k)+"."; + } } Map map = new HashMap(); map.put(getKeyNames().get(0), String.format("%s", type)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index f23433bb5..5de9aaa3b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -1,22 +1,22 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; -public class LowMQ implements InfoFieldAnnotation { +public class LowMQ extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 8260a5a81..cc62580a9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -1,17 +1,17 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; -import java.util.Arrays; public class MappingQualityRankSumTest extends RankSumTest { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index 25a7b286d..60bfe945f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -18,7 +18,7 @@ import java.util.List; import java.util.Map; -public class MappingQualityZero implements InfoFieldAnnotation, StandardAnnotation { +public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java index 00cc30309..f2b7b72b9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java @@ -25,22 +25,22 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; /** * Created by IntelliJ IDEA. @@ -49,7 +49,7 @@ import java.util.Arrays; * Time: 6:46:25 PM * To change this template use File | Settings | File Templates. */ -public class MappingQualityZeroBySample implements GenotypeAnnotation { +public class MappingQualityZeroBySample extends GenotypeAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) { if ( g == null || !g.isCalled() ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java index dc4934ade..3a6c9dce9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java @@ -1,15 +1,15 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -18,7 +18,7 @@ import java.util.Map; -public class MappingQualityZeroFraction implements InfoFieldAnnotation, ExperimentalAnnotation { +public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index 3b64abfff..9f67acf65 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -21,7 +21,7 @@ import java.util.Map; * Date: 5/16/11 */ -public class NBaseCount implements InfoFieldAnnotation { +public class NBaseCount extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 720984835..20bee9008 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -1,22 +1,22 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; -public class QualByDepth extends AnnotationByDepth implements InfoFieldAnnotation, StandardAnnotation { +public class QualByDepth extends AnnotationByDepth implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 1ef7ccd0b..d1d9871e7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -1,23 +1,26 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; -public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotation { +public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index f00abd6a1..643056c1d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -1,26 +1,27 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.MannWhitneyU; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.List; import java.util.ArrayList; -import java.util.Map; import java.util.HashMap; +import java.util.List; +import java.util.Map; -public abstract class RankSumTest implements InfoFieldAnnotation, StandardAnnotation { +public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation { static final double INDEL_LIKELIHOOD_THRESH = 0.1; static final boolean DEBUG = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java index a670532af..f3e99235a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java @@ -26,25 +26,24 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; -import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; /** * Created by IntelliJ IDEA. @@ -53,7 +52,7 @@ import java.util.Arrays; * Time: 3:59:27 PM * To change this template use File | Settings | File Templates. */ -public class ReadDepthAndAllelicFractionBySample implements GenotypeAnnotation { +public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation { private static String REF_ALLELE = "REF"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 727904a3b..aabfb2970 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -1,14 +1,17 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import net.sf.samtools.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java index fc769ac54..a5ebd8db2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index e2fd2a3d4..3712ca8ae 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -25,15 +25,15 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -41,7 +41,7 @@ import java.util.List; import java.util.Map; -public class SampleList implements InfoFieldAnnotation { +public class SampleList extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( vc.isMonomorphic() || !vc.hasGenotypes() ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index 0b6cbcc2e..332b0226b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -16,7 +16,7 @@ import java.util.List; import java.util.Map; -public class SpanningDeletions implements InfoFieldAnnotation, StandardAnnotation { +public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java index 351117809..626142cd2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java @@ -5,7 +5,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.utils.IndelUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -13,7 +12,10 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * Created by IntelliJ IDEA. @@ -22,7 +24,7 @@ import java.util.*; * Time: 3:14 PM * To change this template use File | Settings | File Templates. */ -public class TechnologyComposition implements ExperimentalAnnotation,InfoFieldAnnotation { +public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation { private String nSLX = "NumSLX"; private String n454 ="Num454"; private String nSolid = "NumSOLiD"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index cd5b6694b..acbeee3b2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -25,24 +25,23 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 54c8be73a..fdf498a3d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -25,30 +25,27 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Map.Entry; - -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.*; +import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.GenomicAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.JoinTable; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationInterfaceManager; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; +import java.util.Map.Entry; public class VariantAnnotatorEngine { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java index e02c62baf..0e8360484 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java @@ -25,13 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; -import java.util.*; -import java.util.Map.Entry; - -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -39,7 +32,14 @@ import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTa import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; +import java.util.Map.Entry; /** * This plugin for {@link VariantAnnotatorEngine} serves as the core @@ -48,7 +48,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; * * For details, see: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator */ -public class GenomicAnnotation implements InfoFieldAnnotation { +public class GenomicAnnotation extends InfoFieldAnnotation { public static final String CHR_COLUMN = "chr"; public static final String START_COLUMN = "start"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java index 69a35a584..b42310780 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java @@ -26,28 +26,28 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; -import java.io.File; -import java.io.IOException; -import java.util.*; -import java.util.Map.Entry; - -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.IOException; +import java.util.*; +import java.util.Map.Entry; /** * Annotates variant calls with information from user-specified tabular files. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java index c57aacb5b..714f374cf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java @@ -25,6 +25,9 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + import java.io.BufferedReader; import java.io.File; import java.io.FileReader; @@ -33,9 +36,6 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - /** * This is a container that holds all data corresponding to a single join table as specified by one -J arg (ex: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2). * Some terminology: diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java index d3fcfd42a..3b6c87f90 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java @@ -25,16 +25,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.UserException; - /** * Used to parse files passed to the GenomicAnnotator via the -J arg. * The files must be tab-delimited, and the first non-empty/non-commented line diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java index 0ed61fc48..0bbfa51b4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java @@ -24,9 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; -import java.io.*; -import java.util.*; - import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -36,17 +33,15 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.gatk.walkers.By; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.RMD; -import org.broadinstitute.sting.gatk.walkers.Reference; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.io.IOException; +import java.io.PrintStream; +import java.util.*; + /** * Takes a table of transcripts (eg. UCSC refGene, knownGene, and CCDS tables) and generates the big table which contains * annotations for each possible variant at each transcript position (eg. 4 variants at each genomic position). diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java index 29b256479..e982582ee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java @@ -1,24 +1,21 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import java.util.Map; import java.util.List; +import java.util.Map; -public interface GenotypeAnnotation { +public abstract class GenotypeAnnotation extends VariantAnnotatorAnnotation { // return annotations for the given contexts/genotype split by sample - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g); - - // return the FORMAT keys - public List getKeyNames(); + public abstract Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g); // return the descriptions used for the VCF FORMAT meta field - public List getDescriptions(); - + public abstract List getDescriptions(); + } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java index 5b33395b5..84438ccd8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java @@ -1,23 +1,20 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; import java.util.List; +import java.util.Map; -public interface InfoFieldAnnotation { - +public abstract class InfoFieldAnnotation extends VariantAnnotatorAnnotation { // return annotations for the given contexts split by sample - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc); - - // return the INFO keys - public List getKeyNames(); + public abstract Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc); // return the descriptions used for the VCF INFO meta field - public List getDescriptions(); - + public abstract List getDescriptions(); } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java new file mode 100644 index 000000000..f33d61df9 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.List; +import java.util.Map; + +@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations") +public abstract class VariantAnnotatorAnnotation { + // return the INFO keys + public abstract List getKeyNames(); +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index aa23abc67..21c8ec430 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -25,26 +25,26 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; + import static java.lang.Math.log10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 93ee0b085..3eed12992 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -25,10 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Input; @@ -36,17 +32,20 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java index 18aa3e257..f6cd1d636 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java @@ -25,28 +25,29 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.PrintStream; -import java.util.*; +import java.util.Arrays; +import java.util.Set; /** * Produces an input file to Beagle imputation engine, listing unphased, hard-called genotypes for a single sample diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java index 2c67265d6..90e6fcd77 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java @@ -22,6 +22,8 @@ package org.broadinstitute.sting.gatk.walkers.coverage; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -31,12 +33,10 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import java.io.File; -import java.io.PrintStream; import java.io.FileNotFoundException; +import java.io.PrintStream; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoarseCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoarseCoverageWalker.java index ae947eac1..405a44c29 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoarseCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoarseCoverageWalker.java @@ -26,12 +26,11 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import net.sf.samtools.SAMRecord; - -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java index 5a9c62b7f..6b91b0198 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java @@ -22,19 +22,19 @@ package org.broadinstitute.sting.gatk.walkers.coverage; +import org.broad.tribble.bed.FullBEDFeature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broad.tribble.bed.FullBEDFeature; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.*; import java.io.PrintStream; +import java.util.Arrays; +import java.util.List; /** * Test routine for new VariantContext object diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java index 298aa90b9..a41e55166 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java @@ -8,7 +8,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; -import java.util.*; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; /** * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 91ae81cd5..c1956f1d7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -26,6 +26,8 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -34,12 +36,13 @@ import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.*; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java index a5ebf27bb..a1c043365 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java @@ -24,22 +24,13 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; -import net.sf.samtools.*; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordIterator; import net.sf.samtools.util.BlockCompressedInputStream; -import org.broad.tribble.readers.AsciiLineReader; -import org.broad.tribble.readers.LineReader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.DataInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; +import java.io.*; import java.util.Arrays; -import java.util.Map; -import java.util.zip.GZIPInputStream; /** @@ -109,8 +100,10 @@ public class BAMDiffableReader implements DiffableReader { final byte[] BAM_MAGIC = "BAM\1".getBytes(); final byte[] buffer = new byte[BAM_MAGIC.length]; try { - FileInputStream fstream = new FileInputStream(file); - new BlockCompressedInputStream(fstream).read(buffer,0,BAM_MAGIC.length); + InputStream fstream = new BufferedInputStream(new FileInputStream(file)); + if ( !BlockCompressedInputStream.isValidFile(fstream) ) + return false; + new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length); return Arrays.equals(buffer, BAM_MAGIC); } catch ( IOException e ) { return false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java index 4c3f7bd95..eb8a71c2c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java @@ -24,7 +24,9 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; -import com.google.java.contract.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 6d85df71d..4a4f6f6af 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -42,6 +42,7 @@ import java.util.*; * Date: 7/4/11 * Time: 12:51 PM * A generic engine for comparing tree-structured objects + * */ public class DiffEngine { final protected static Logger logger = Logger.getLogger(DiffEngine.class); @@ -58,7 +59,7 @@ public class DiffEngine { // // -------------------------------------------------------------------------------- - public List diff(DiffElement master, DiffElement test) { + public List diff(DiffElement master, DiffElement test) { DiffValue masterValue = master.getValue(); DiffValue testValue = test.getValue(); @@ -68,14 +69,14 @@ public class DiffEngine { return diff(masterValue, testValue); } else { // structural difference in types. one is node, other is leaf - return Arrays.asList(new SpecificDifference(master, test)); + return Arrays.asList(new Difference(master, test)); } } - public List diff(DiffNode master, DiffNode test) { + public List diff(DiffNode master, DiffNode test) { Set allNames = new HashSet(master.getElementNames()); allNames.addAll(test.getElementNames()); - List diffs = new ArrayList(); + List diffs = new ArrayList(); for ( String name : allNames ) { DiffElement masterElt = master.getElement(name); @@ -84,7 +85,7 @@ public class DiffEngine { throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name); } else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value // todo -- should one of these be a special MISSING item? - diffs.add(new SpecificDifference(masterElt, testElt)); + diffs.add(new Difference(masterElt, testElt)); } else { diffs.addAll(diff(masterElt, testElt)); } @@ -93,11 +94,11 @@ public class DiffEngine { return diffs; } - public List diff(DiffValue master, DiffValue test) { + public List diff(DiffValue master, DiffValue test) { if ( master.getValue().equals(test.getValue()) ) { return Collections.emptyList(); } else { - return Arrays.asList(new SpecificDifference(master.getBinding(), test.getBinding())); + return Arrays.asList(new Difference(master.getBinding(), test.getBinding())); } } @@ -143,13 +144,13 @@ public class DiffEngine { * Not that only pairs of the same length are considered as potentially equivalent * * @param params determines how we display the items - * @param diffs + * @param diffs the list of differences to summarize */ - public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { + public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { printSummaryReport(summarizeDifferences(diffs), params ); } - public List summarizeDifferences(List diffs) { + public List summarizeDifferences(List diffs) { return summarizedDifferencesOfPaths(diffs); } @@ -177,8 +178,12 @@ public class DiffEngine { Difference diffPath2 = singletonDiffs.get(j); if ( diffPath1.length() == diffPath2.length() ) { int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts()); - String path = lcp > 0 ? summarizedPath(diffPath2.getParts(), lcp) : diffPath2.getPath(); - addSummary(summaries, path, true); + String path = diffPath2.getPath(); + if ( lcp != 0 && lcp != diffPath1.length() ) + path = summarizedPath(diffPath2.getParts(), lcp); + Difference sumDiff = new Difference(path, diffPath2.getMaster(), diffPath2.getTest()); + sumDiff.setCount(0); + addSummaryIfMissing(summaries, sumDiff); } } } @@ -187,7 +192,7 @@ public class DiffEngine { for ( Difference diffPath : singletonDiffs ) { for ( Difference sumDiff : summaries.values() ) { if ( sumDiff.matches(diffPath.getParts()) ) - addSummary(summaries, sumDiff.getPath(), false); + sumDiff.incCount(); } } @@ -196,24 +201,14 @@ public class DiffEngine { return sortedSummaries; } - private static void addSummary(Map summaries, String path, boolean onlyCatalog) { - if ( summaries.containsKey(path) ) { - if ( ! onlyCatalog ) - summaries.get(path).incCount(); - } else { - Difference sumDiff = new Difference(path); - summaries.put(sumDiff.getPath(), sumDiff); + protected void addSummaryIfMissing(Map summaries, Difference diff) { + if ( ! summaries.containsKey(diff.getPath()) ) { + summaries.put(diff.getPath(), diff); } } protected void printSummaryReport(List sortedSummaries, SummaryReportParams params ) { - GATKReport report = new GATKReport(); - final String tableName = "diffences"; - report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffObjectsWalker_and_SummarizedDifferences for more information"); - GATKReportTable table = report.getTable(tableName); - table.addPrimaryKey("Difference", true); - table.addColumn("NumberOfOccurrences", 0); - + List toShow = new ArrayList(); int count = 0, count1 = 0; for ( Difference diff : sortedSummaries ) { if ( diff.getCount() < params.minSumDiffToShow ) @@ -229,9 +224,26 @@ public class DiffEngine { break; } - table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); + toShow.add(diff); } + // if we want it in descending order, reverse the list + if ( ! params.descending ) { + Collections.reverse(toShow); + } + + // now that we have a specific list of values we want to show, display them + GATKReport report = new GATKReport(); + final String tableName = "diffences"; + report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false); + GATKReportTable table = report.getTable(tableName); + table.addPrimaryKey("Difference", true); + table.addColumn("NumberOfOccurrences", 0); + table.addColumn("ExampleDifference", 0); + for ( Difference diff : toShow ) { + table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); + table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString()); + } table.write(params.out); } @@ -250,7 +262,7 @@ public class DiffEngine { * commonPostfixLength: how many parts are shared at the end, suppose its 2 * We want to create a string *.*.C.D * - * @param parts + * @param parts the separated path values [above without .] * @param commonPostfixLength * @return */ @@ -330,13 +342,13 @@ public class DiffEngine { return reader.readFromFile(file, maxElementsToRead); } - public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) { + public static boolean simpleDiffFiles(File masterFile, File testFile, int maxElementsToRead, DiffEngine.SummaryReportParams params) { DiffEngine diffEngine = new DiffEngine(); if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) { - DiffElement master = diffEngine.createDiffableFromFile(masterFile); - DiffElement test = diffEngine.createDiffableFromFile(testFile); - List diffs = diffEngine.diff(master, test); + DiffElement master = diffEngine.createDiffableFromFile(masterFile, maxElementsToRead); + DiffElement test = diffEngine.createDiffableFromFile(testFile, maxElementsToRead); + List diffs = diffEngine.diff(master, test); diffEngine.reportSummarizedDifferences(diffs, params); return true; } else { @@ -349,6 +361,7 @@ public class DiffEngine { int maxItemsToDisplay = 0; int maxCountOneItems = 0; int minSumDiffToShow = 0; + boolean descending = true; public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) { this.out = out; @@ -356,5 +369,9 @@ public class DiffEngine { this.maxCountOneItems = maxCountOneItems; this.minSumDiffToShow = minSumDiffToShow; } + + public void setDescending(boolean descending) { + this.descending = descending; + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index ecb836af9..f43d1342d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -37,42 +37,171 @@ import java.io.PrintStream; import java.util.List; /** - * Compares two record-oriented files, itemizing specific difference between equivalent - * records in the two files. Reports both itemized and summarized differences. + * A generic engine for comparing tree-structured objects + * + *

+ * Compares two record-oriented files, itemizing specific difference between equivalent + * records in the two files. Reports both itemized and summarized differences. + *

+ * + *

What are the summarized differences and the DiffObjectsWalker?

+ * + *

+ * The GATK contains a summarizing difference engine that compares hierarchical data structures to emit: + *

    + *
  • A list of specific differences between the two data structures. This is similar to saying the value in field A in record 1 in file F differences from the value in field A in record 1 in file G. + *
  • A summarized list of differences ordered by frequency of the difference. This output is similar to saying field A in 50 records in files F and G differed. + *
+ *

+ * + *

+ * The GATK contains a private walker DiffObjects that allows you access to the DiffEngine capabilities on the command line. Simply provide the walker with the master and test files and it will emit summarized differences for you. + *

+ * + *

Why?

+ * + *

+ * The reason for this system is that it allows you to compare two structured files -- such as BAMs and VCFs -- for common differences among them. This is primarily useful in regression testing or optimization, where you want to ensure that the differences are those that you expect and not any others. + *

+ * + *

Input

+ *

+ * The DiffObjectsWalker works with BAM or VCF files. + *

+ * + *

Output

+ *

+ * The DiffEngine system compares to two hierarchical data structures for specific differences in the values of named + * nodes. Suppose I have two trees: + *

+ *     Tree1=(A=1 B=(C=2 D=3))
+ *     Tree2=(A=1 B=(C=3 D=3 E=4))
+ *     Tree3=(A=1 B=(C=4 D=3 E=4))
+ * 
+ *

+ * where every node in the tree is named, or is a raw value (here all leaf values are integers). The DiffEngine + * traverses these data structures by name, identifies equivalent nodes by fully qualified names + * (Tree1.A is distinct from Tree2.A, and determines where their values are equal (Tree1.A=1, Tree2.A=1, so they are). + * These itemized differences are listed as: + *

+ *     Tree1.B.C=2 != Tree2.B.C=3
+ *     Tree1.B.C=2 != Tree3.B.C=4
+ *     Tree2.B.C=3 != Tree3.B.C=4
+ *     Tree1.B.E=MISSING != Tree2.B.E=4
+ * 
+ * + *

+ * This conceptually very similar to the output of the unix command line tool diff. What's nice about DiffEngine though + * is that it computes similarity among the itemized differences and displays the count of differences names + * in the system. In the above example, the field C is not equal three times, while the missing E in Tree1 occurs + * only once. So the summary is: + * + *

+ *     *.B.C : 3
+ *     *.B.E : 1
+ * 
+ * + *

+ * where the * operator indicates that any named field matches. This output is sorted by counts, and provides an + * immediate picture of the commonly occurring differences among the files. + *

+ * Below is a detailed example of two VCF fields that differ because of a bug in the AC, AF, and AN counting routines, + * detected by the integrationtest integration (more below). You can see that in the although there are many specific + * instances of these differences between the two files, the summarized differences provide an immediate picture that + * the AC, AF, and AN fields are the major causes of the differences. + *

+ * + *

+   [testng] path                                                             count
+   [testng] *.*.*.AC                                                         6
+   [testng] *.*.*.AF                                                         6
+   [testng] *.*.*.AN                                                         6
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AC  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AF  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AN  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AC  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AF  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AN  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AC  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AF  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AN  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000598.AC  1
+
+ * * @author Mark DePristo - * @version 0.1 + * @since 7/4/11 */ @Requires(value={}) public class DiffObjectsWalker extends RodWalker { + /** + * Writes out a file of the DiffEngine format: + * + * http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine + */ @Output(doc="File to which results should be written",required=true) protected PrintStream out; - @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) - int MAX_OBJECTS_TO_READ = -1; - - @Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false) - int MAX_DIFFS = 0; - - @Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false) - int MAX_COUNT1_DIFFS = 0; - - @Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false) - int minCountForDiff = 1; - - @Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false) - boolean showItemizedDifferences = false; - + /** + * The master file against which we will compare test. This is one of the two required + * files to do the comparison. Conceptually master is the original file contained the expected + * results, but this doesn't currently have an impact on the calculations, but might in the future. + */ @Argument(fullName="master", shortName="m", doc="Master file: expected results", required=true) File masterFile; + /** + * The test file against which we will compare to the master. This is one of the two required + * files to do the comparison. Conceptually test is the derived file from master, but this + * doesn't currently have an impact on the calculations, but might in the future. + */ @Argument(fullName="test", shortName="t", doc="Test file: new results to compare to the master file", required=true) File testFile; - final DiffEngine diffEngine = new DiffEngine(); + /** + * The engine will read at most this number of objects from each of master and test files. This reduces + * the memory requirements for DiffObjects but does limit you to comparing at most this number of objects + */ + @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) + int MAX_OBJECTS_TO_READ = -1; + + /** + * The max number of differences to display when summarizing. For example, if there are 10M differences, but + * maxDiffs is 10, then the comparison aborts after first ten summarized differences are shown. Note that + * the system shows differences sorted by frequency, so these 10 would be the most common between the two files. + * A value of 0 means show all possible differences. + */ + @Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false) + int MAX_DIFFS = 0; + + /** + * The maximum number of singleton (occurs exactly once between the two files) to display when writing out + * the summary. Only applies if maxDiffs hasn't been exceeded. For example, if maxDiffs is 10 and maxCount1Diffs + * is 2 and there are 20 diffs with count > 1, then only 10 are shown, all of which have count above 1. + */ + @Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false) + int MAX_COUNT1_DIFFS = 0; + + /** + * Only differences that occur more than minCountForDiff are displayed. For example, if minCountForDiff is 10, then + * a difference must occur at least 10 times between the two files to be shown. + */ + @Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false) + int minCountForDiff = 1; + + /** + * If provided, the system will write out the summarized, individual differences. May lead to enormous outputs, + * depending on how many differences are found. Note these are not sorted in any way, so if you have 10M + * common differences in the files, you will see 10M records, whereas the final summarize will just list the + * difference and its count of 10M. + */ + @Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false) + boolean showItemizedDifferences = false; + + DiffEngine diffEngine; @Override public void initialize() { - + this.diffEngine = new DiffEngine(); } @Override @@ -104,14 +233,15 @@ public class DiffObjectsWalker extends RodWalker { // out.printf("Test diff objects%n"); // out.println(test.toString()); - List diffs = diffEngine.diff(master, test); + List diffs = diffEngine.diff(master, test); if ( showItemizedDifferences ) { out.printf("Itemized results%n"); - for ( SpecificDifference diff : diffs ) + for ( Difference diff : diffs ) out.printf("DIFF: %s%n", diff.toString()); } DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff); + params.setDescending(false); diffEngine.reportSummarizedDifferences(diffs, params); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java index 3750496a1..963191446 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java @@ -24,8 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; -import org.broadinstitute.sting.utils.Utils; - /** * Created by IntelliJ IDEA. * User: depristo diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java index af5771c55..a117206f1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java @@ -39,12 +39,27 @@ import java.io.File; */ public interface DiffableReader { @Ensures("result != null") + /** + * Return the name of this DiffableReader type. For example, the VCF reader returns 'VCF' and the + * bam reader 'BAM' + */ public String getName(); @Ensures("result != null") @Requires("file != null") + /** + * Read up to maxElementsToRead DiffElements from file, and return them. + */ public DiffElement readFromFile(File file, int maxElementsToRead); + /** + * Return true if the file can be read into DiffElement objects with this reader. This should + * be uniquely true/false for all readers, as the system will use the first reader that can read the + * file. This routine should never throw an exception. The VCF reader, for example, looks at the + * first line of the file for the ##format=VCF4.1 header, and the BAM reader for the BAM_MAGIC value + * @param file + * @return + */ @Requires("file != null") public boolean canRead(File file); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java index efc6ef160..81b6f7e0e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java @@ -27,13 +27,24 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; public class Difference implements Comparable { final String path; // X.Y.Z final String[] parts; - int count = 0; + int count = 1; + DiffElement master = null , test = null; public Difference(String path) { this.path = path; this.parts = DiffEngine.diffNameToPath(path); } + public Difference(DiffElement master, DiffElement test) { + this(createPath(master, test), master, test); + } + + public Difference(String path, DiffElement master, DiffElement test) { + this(path); + this.master = master; + this.test = test; + } + public String[] getParts() { return parts; } @@ -44,6 +55,10 @@ public class Difference implements Comparable { return count; } + public void setCount(int count) { + this.count = count; + } + /** * The fully qualified path object A.B.C etc * @return @@ -81,7 +96,7 @@ public class Difference implements Comparable { @Override public String toString() { - return String.format("%s:%d", getPath(), getCount()); + return String.format("%s:%d:%s", getPath(), getCount(), valueDiffString()); } @Override @@ -91,5 +106,31 @@ public class Difference implements Comparable { return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path); } + public String valueDiffString() { + if ( hasSpecificDifference() ) { + return String.format("%s!=%s", getOneLineString(master), getOneLineString(test)); + } else { + return "N/A"; + } + } + private static String createPath(DiffElement master, DiffElement test) { + return (master == null ? test : master).fullyQualifiedName(); + } + + private static String getOneLineString(DiffElement elt) { + return elt == null ? "MISSING" : elt.getValue().toOneLineString(); + } + + public boolean hasSpecificDifference() { + return master != null || test != null; + } + + public DiffElement getMaster() { + return master; + } + + public DiffElement getTest() { + return test; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index a812babaf..77a992ce0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -30,7 +30,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.*; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.IOException; import java.util.Map; @@ -50,7 +53,13 @@ public class VCFDiffableReader implements DiffableReader { public DiffElement readFromFile(File file, int maxElementsToRead) { DiffNode root = DiffNode.rooted(file.getName()); try { + // read the version line from the file LineReader lineReader = new AsciiLineReader(new FileInputStream(file)); + final String version = lineReader.readLine(); + root.add("VERSION", version); + lineReader.close(); + + lineReader = new AsciiLineReader(new FileInputStream(file)); VCFCodec vcfCodec = new VCFCodec(); // must be read as state is stored in reader itself @@ -63,13 +72,19 @@ public class VCFDiffableReader implements DiffableReader { } String line = lineReader.readLine(); - int count = 0; + int count = 0, nRecordsAtPos = 1; + String prevName = ""; while ( line != null ) { if ( count++ > maxElementsToRead && maxElementsToRead != -1) break; VariantContext vc = (VariantContext)vcfCodec.decode(line); String name = vc.getChr() + ":" + vc.getStart(); + if ( name.equals(prevName) ) { + name += "_" + ++nRecordsAtPos; + } else { + prevName = name; + } DiffNode vcRoot = DiffNode.empty(name, root); // add fields diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index 828d39717..efc101618 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.fasta; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Collection; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java index 6be2336c0..2dbfc76ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java @@ -25,16 +25,15 @@ package org.broadinstitute.sting.gatk.walkers.fasta; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java index b31526987..2c009f7f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.filters; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; public class ClusteredSnps { private GenomeLocParser genomeLocParser; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java index c3849e240..ede19746a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java @@ -25,8 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.filters; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; public class FiltrationContext { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java index 225cdecc3..d7c0dd4d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java @@ -27,7 +27,9 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; /** * A window of variants surrounding the current variant being investigated diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 884d0ac24..6c023573a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -25,21 +25,20 @@ package org.broadinstitute.sting.gatk.walkers.filters; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 47be7e6fe..83a8ce7d7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -26,14 +26,15 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; -import java.util.*; +import java.util.Map; +import java.util.Set; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java index 22c9dcf91..696a74de8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java @@ -4,8 +4,6 @@ import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import java.util.Arrays; - /** * Created by IntelliJ IDEA. * User: delangel diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index ab075eaf2..2014801e4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -26,13 +26,14 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.pileup.FragmentPileup; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.pileup.FragmentPileup; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import static java.lang.Math.log10; import static java.lang.Math.pow; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 89504b371..5c27bc943 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -26,17 +26,22 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.SimpleTimer; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.exceptions.UserException; import sun.reflect.generics.reflectiveObjects.NotImplementedException; -import java.util.*; import java.io.PrintStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index 3902a0b7f..8261cd588 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -30,11 +30,11 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java index c4e315f68..10b646d63 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java @@ -26,18 +26,18 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; import java.io.PrintStream; +import java.util.*; public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 1f430548b..60ea601d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -25,16 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; -import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.genotype.Haplotype; @@ -42,11 +40,10 @@ import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; @@ -394,7 +391,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood if (DEBUG) System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength, (int)ref.getWindow().size(), loc.getStart(), numPrefBases); - + //System.out.println(eventLength); haplotypeMap = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(), ref, hsize, numPrefBases); @@ -421,8 +418,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood - // which genotype likelihoods correspond to two most likely alleles? By convention, likelihood vector is lexically ordered, for example - // for 3 alleles it's 00 01 02 11 12 22 + // which genotype likelihoods correspond to two most likely alleles? By convention, likelihood vector is ordered as for example + // for 3 alleles it's 00 01 11 02 12 22 GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(), alleleList, genotypeLikelihoods, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java index 2cf149fd0..3652763de 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 9f4d4182f..9d917078d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -25,23 +25,25 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { @@ -66,7 +68,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC if ( vc == null ) { vc = vc_input; } else { - logger.warn("Multiple valid VCF records detected at site " + ref.getLocus() + ", only considering alleles from first record only"); + logger.warn("Multiple valid VCF records detected at site " + ref.getLocus() + ", only considering alleles from first record"); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java index cf1c57a05..22c3081a3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java @@ -25,8 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.DownsampleType; @@ -36,8 +34,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.HashSet; +import java.util.Set; +import java.util.TreeSet; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index 1533e8777..68d8f9b54 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -24,19 +24,19 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 055eb0b97..2b25df4aa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Input; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index fc8a5819a..2a0338bca 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -25,22 +25,25 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.gatk.contexts.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.DownsampleType; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.BadMateFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; -import org.broadinstitute.sting.gatk.DownsampleType; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.utils.codecs.vcf.*; -import java.util.*; import java.io.PrintStream; +import java.util.*; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 6fc972b5d..a10897172 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -25,22 +25,24 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import com.google.java.contract.Requires; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import com.google.java.contract.*; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java index df1f4f908..adb7c4c38 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.picard.sam.SamPairUtil; -import net.sf.samtools.*; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordComparator; +import net.sf.samtools.SAMRecordCoordinateComparator; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 7617aa9de..e68aa31e0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -25,21 +25,16 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.samtools.AlignmentBlock; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; import java.util.HashMap; -import java.util.List; public class HaplotypeIndelErrorModel { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index a53665d64..61f21c488 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -25,39 +25,41 @@ package org.broadinstitute.sting.gatk.walkers.indels; +import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.*; import net.sf.samtools.util.RuntimeIOException; -import net.sf.samtools.util.StringUtil; import net.sf.samtools.util.SequenceUtil; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.commandline.*; +import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.interval.IntervalMergingRule; -import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.walkers.BAQMode; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.walkers.BAQMode; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.interval.NwayIntervalMergingIterator; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.NWaySAMFileWriter; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java index 010e0cf6f..af8051334 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java @@ -25,8 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.samtools.*; -import org.broadinstitute.sting.commandline.*; +import net.sf.samtools.Cigar; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index ab7ae4184..55450486b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -28,30 +28,25 @@ package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -/*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate; -import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager; -import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum; -import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection; -*/import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.collections.NestedHashMap; -import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.text.XReadLines; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.io.File; -import java.io.FileNotFoundException; -import java.util.*; -import java.util.regex.Pattern; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; + +/*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection; +*/ public class PairHMMIndelErrorModel { @@ -1047,8 +1042,8 @@ public class PairHMMIndelErrorModel { int k=0; double maxElement = Double.NEGATIVE_INFINITY; - for (int i=0; i < hSize; i++) { - for (int j=i; j < hSize; j++){ + for (int j=0; j < hSize; j++) { + for (int i=0; i <= j; i++){ genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j]; if (haplotypeLikehoodMatrix[i][j] > maxElement) maxElement = haplotypeLikehoodMatrix[i][j]; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignedReadCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignedReadCounter.java index fc196e712..2c89b907b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignedReadCounter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignedReadCounter.java @@ -25,19 +25,23 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.samtools.*; -import org.broadinstitute.sting.utils.interval.IntervalMergingRule; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.filters.BadMateFilter; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; -import org.broadinstitute.sting.utils.sam.ReadUtils; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.filters.BadMateFilter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.By; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.sam.ReadUtils; import java.io.File; -import java.util.*; +import java.util.Iterator; @By(DataSource.READS) // walker to count realigned reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 3b94989aa..488e37f26 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -25,27 +25,27 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.BadCigarFilter; -import org.broadinstitute.sting.gatk.filters.Platform454Filter; -import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.filters.BadMateFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; +import org.broadinstitute.sting.gatk.filters.Platform454Filter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.ArrayList; import java.io.PrintStream; +import java.util.ArrayList; /** * Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java index 1fe3fdd04..3854a4a8c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java @@ -23,7 +23,8 @@ */ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.samtools.*; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordCoordinateComparator; /** * Extends Picard's Comparator for sorting SAMRecords by coordinate. This one actually deals with unmapped reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 1f05ddaf0..443e6e9f2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -25,15 +25,21 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import net.sf.samtools.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; -import org.broadinstitute.sting.gatk.filters.*; -import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; +import org.broadinstitute.sting.gatk.filters.Platform454Filter; +import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter; +import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.Transcript; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; @@ -42,22 +48,22 @@ import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODItera import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.interval.IntervalUtils; -import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; -import org.broadinstitute.sting.utils.interval.IntervalMergingRule; -import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.collections.CircularArray; import org.broadinstitute.sting.utils.collections.PrimitivePair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.Hidden; - -import net.sf.samtools.*; +import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalUtils; +import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java index 869edf784..cb123c868 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java @@ -23,9 +23,9 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java index 81d9b4ddb..9aa370d3f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java @@ -24,14 +24,10 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; @@ -41,9 +37,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.AminoAci import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java new file mode 100644 index 000000000..298d8d6c8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java @@ -0,0 +1,110 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.util.*; + +/** + * Merges read-back-phased and phase-by-transmission files. + */ +public class MergeAndMatchHaplotypes extends RodWalker { + @Output + protected VCFWriter vcfWriter = null; + + private Map pbtCache = new HashMap(); + private Map rbpCache = new HashMap(); + + private final String SOURCE_NAME = "MergeReadBackedAndTransmissionPhasedVariants"; + + public void initialize() { + ArrayList rodNames = new ArrayList(); + rodNames.add("pbt"); + + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); + Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + Set headerLines = new HashSet(); + headerLines.addAll(VCFUtils.getHeaderFields(this.getToolkit())); + + vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples)); + } + + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker != null) { + Collection pbts = tracker.getVariantContexts(ref, "pbt", null, ref.getLocus(), true, true); + Collection rbps = tracker.getVariantContexts(ref, "rbp", null, ref.getLocus(), true, true); + + VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null; + VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null; + + if (pbt != null && rbp != null) { + Map genotypes = pbt.getGenotypes(); + + if (!rbp.isFiltered()) { + for (String sample : rbp.getSampleNames()) { + Genotype rbpg = rbp.getGenotype(sample); + Genotype pbtg = pbt.getGenotype(sample); + + // Propagate read-backed phasing information to genotypes unphased by transmission + //if (!pbtg.isPhased() && rbpCache.containsKey(sample)) { + if (!pbtg.isPhased() && rbpg.isPhased() && rbpCache.containsKey(sample)) { + boolean orientationMatches = rbpCache.get(sample).sameGenotype(pbtCache.get(sample), false); + + if (orientationMatches) { + pbtg = rbpg; + } else { + List fwdAlleles = rbpg.getAlleles(); + List revAlleles = new ArrayList(); + + for (int i = fwdAlleles.size() - 1; i >= 0; i--) { + revAlleles.add(fwdAlleles.get(i)); + } + + pbtg = new Genotype(sample, revAlleles, rbpg.getNegLog10PError(), rbpg.getFilters(), rbpg.getAttributes(), rbpg.isPhased()); + } + } + + genotypes.put(sample, pbtg); + + // Update the cache + if (/*rbpg.isPhased() &&*/ rbpg.isHet()) { + rbpCache.put(sample, rbpg); + pbtCache.put(sample, pbtg); + } else if (!rbpg.isPhased()) { + rbpCache.remove(sample); + pbtCache.remove(sample); + } + } + } + + VariantContext newvc = new VariantContext(SOURCE_NAME, pbt.getChr(), pbt.getStart(), pbt.getStart(), pbt.getAlleles(), genotypes, pbt.getNegLog10PError(), pbt.getFilters(), pbt.getAttributes()); + vcfWriter.add(newvc, ref.getBase()); + } + } + + return null; + } + + @Override + public Integer reduceInit() { + return null; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return null; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java index 709bc44ce..5bd438605 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java @@ -24,10 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -35,7 +31,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java index abced442e..b0491a281 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java @@ -24,20 +24,19 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequenceFile; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java index ec6f5c648..be15d4541 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java @@ -24,20 +24,22 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java new file mode 100755 index 000000000..cf4afbb6d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -0,0 +1,343 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.text.XReadLines; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.*; + +/** + * Phases a trio VCF (child phased by transmission, implied phase carried over to parents). Given genotypes for a trio, + * this walker modifies the genotypes (if necessary) to reflect the most likely configuration given the genotype + * likelihoods and inheritance constraints, phases child by transmission and carries over implied phase to the parents + * (their alleles in their genotypes are ordered as transmitted|untransmitted). Computes probability that the + * determined phase is correct given that the genotype configuration is correct (useful if you want to use this to + * compare phasing accuracy, but want to break that comparison down by phasing confidence in the truth set). Optionally + * filters out sites where the phasing is indeterminate (site has no-calls), ambiguous (everyone is heterozygous), or + * the genotypes exhibit a Mendelian violation. This walker assumes there are only three samples in the VCF file to + * begin. + */ +public class PhaseByTransmission extends RodWalker { + @Argument(shortName="f", fullName="familySpec", required=true, doc="Patterns for the family structure (usage: mom+dad=child). Specify several trios by supplying this argument many times and/or a file containing many patterns.") + public ArrayList familySpecs = null; + + @Output + protected VCFWriter vcfWriter = null; + + private final String ROD_NAME = "variant"; + private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP"; + private final String SOURCE_NAME = "PhaseByTransmission"; + + private final Double MENDELIAN_VIOLATION_PRIOR = 1e-8; + + private class Trio { + private String mother; + private String father; + private String child; + + public Trio(String mother, String father, String child) { + this.mother = mother; + this.father = father; + this.child = child; + } + + public Trio(String familySpec) { + String[] pieces = familySpec.split("[\\+\\=]"); + + this.mother = pieces[0]; + this.father = pieces[1]; + this.child = pieces[2]; + } + + public String getMother() { return mother; } + public String getFather() { return father; } + public String getChild() { return child; } + } + + private ArrayList trios = new ArrayList(); + + public ArrayList getFamilySpecsFromCommandLineInput(ArrayList familySpecs) { + if (familySpecs != null) { + // Let's first go through the list and see if we were given any files. We'll add every entry in the file to our + // spec list set, and treat the entries as if they had been specified on the command line. + ArrayList specs = new ArrayList(); + for (String familySpec : familySpecs) { + File specFile = new File(familySpec); + + try { + XReadLines reader = new XReadLines(specFile); + + List lines = reader.readLines(); + for (String line : lines) { + specs.add(new Trio(line)); + } + } catch (FileNotFoundException e) { + specs.add(new Trio(familySpec)); // not a file, so must be a family spec + } + } + + return specs; + } + + return new ArrayList(); + } + + /** + * Parse the familial relationship specification, and initialize VCF writer + */ + public void initialize() { + trios = getFamilySpecsFromCommandLineInput(familySpecs); + + ArrayList rodNames = new ArrayList(); + rodNames.add(ROD_NAME); + + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); + Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + + Set headerLines = new HashSet(); + headerLines.addAll(VCFUtils.getHeaderFields(this.getToolkit())); + headerLines.add(new VCFFormatHeaderLine(TRANSMISSION_PROBABILITY_TAG_NAME, 1, VCFHeaderLineType.Float, "Probability that the phase is correct given that the genotypes are correct")); + headerLines.add(new VCFHeaderLine("source", SOURCE_NAME)); + vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples)); + } + + private double computeTransmissionLikelihoodOfGenotypeConfiguration(Genotype mom, Genotype dad, Genotype child) { + double[] momLikelihoods = MathUtils.normalizeFromLog10(mom.getLikelihoods().getAsVector()); + double[] dadLikelihoods = MathUtils.normalizeFromLog10(dad.getLikelihoods().getAsVector()); + double[] childLikelihoods = MathUtils.normalizeFromLog10(child.getLikelihoods().getAsVector()); + + int momIndex = mom.getType().ordinal() - 1; + int dadIndex = dad.getType().ordinal() - 1; + int childIndex = child.getType().ordinal() - 1; + + return momLikelihoods[momIndex]*dadLikelihoods[dadIndex]*childLikelihoods[childIndex]; + } + + private ArrayList createAllThreeGenotypes(Allele refAllele, Allele altAllele, Genotype g) { + List homRefAlleles = new ArrayList(); + homRefAlleles.add(refAllele); + homRefAlleles.add(refAllele); + Genotype homRef = new Genotype(g.getSampleName(), homRefAlleles, g.getNegLog10PError(), null, g.getAttributes(), false); + + List hetAlleles = new ArrayList(); + hetAlleles.add(refAllele); + hetAlleles.add(altAllele); + Genotype het = new Genotype(g.getSampleName(), hetAlleles, g.getNegLog10PError(), null, g.getAttributes(), false); + + List homVarAlleles = new ArrayList(); + homVarAlleles.add(altAllele); + homVarAlleles.add(altAllele); + Genotype homVar = new Genotype(g.getSampleName(), homVarAlleles, g.getNegLog10PError(), null, g.getAttributes(), false); + + ArrayList genotypes = new ArrayList(); + genotypes.add(homRef); + genotypes.add(het); + genotypes.add(homVar); + + return genotypes; + } + + private int getNumberOfMatchingAlleles(Allele alleleToMatch, Genotype g) { + List alleles = g.getAlleles(); + int matchingAlleles = 0; + + for (Allele a : alleles) { + if (!alleleToMatch.equals(a)) { + matchingAlleles++; + } + } + + return matchingAlleles; + } + + private boolean isMendelianViolation(Allele refAllele, Allele altAllele, Genotype mom, Genotype dad, Genotype child) { + int numMomRefAlleles = getNumberOfMatchingAlleles(refAllele, mom) > 0 ? 1 : 0; + int numMomAltAlleles = getNumberOfMatchingAlleles(altAllele, mom) > 0 ? 1 : 0; + + int numDadRefAlleles = getNumberOfMatchingAlleles(refAllele, dad) > 0 ? 1 : 0; + int numDadAltAlleles = getNumberOfMatchingAlleles(altAllele, dad) > 0 ? 1 : 0; + + int numChildRefAlleles = getNumberOfMatchingAlleles(refAllele, child); + int numChildAltAlleles = getNumberOfMatchingAlleles(altAllele, child); + + return (numMomRefAlleles + numDadRefAlleles < numChildRefAlleles || numMomAltAlleles + numDadAltAlleles < numChildAltAlleles); + } + + private ArrayList getPhasedGenotypes(Genotype mom, Genotype dad, Genotype child) { + Set possiblePhasedChildGenotypes = new HashSet(); + + for (Allele momAllele : mom.getAlleles()) { + for (Allele dadAllele : dad.getAlleles()) { + ArrayList possiblePhasedChildAlleles = new ArrayList(); + possiblePhasedChildAlleles.add(momAllele); + possiblePhasedChildAlleles.add(dadAllele); + + Genotype possiblePhasedChildGenotype = new Genotype(child.getSampleName(), possiblePhasedChildAlleles, child.getNegLog10PError(), child.getFilters(), child.getAttributes(), true); + + possiblePhasedChildGenotypes.add(possiblePhasedChildGenotype); + } + } + + ArrayList finalGenotypes = new ArrayList(); + + for (Genotype phasedChildGenotype : possiblePhasedChildGenotypes) { + if (child.sameGenotype(phasedChildGenotype, true)) { + Allele momTransmittedAllele = phasedChildGenotype.getAllele(0); + Allele momUntransmittedAllele = mom.getAllele(0) != momTransmittedAllele ? mom.getAllele(0) : mom.getAllele(1); + + ArrayList phasedMomAlleles = new ArrayList(); + phasedMomAlleles.add(momTransmittedAllele); + phasedMomAlleles.add(momUntransmittedAllele); + + Genotype phasedMomGenotype = new Genotype(mom.getSampleName(), phasedMomAlleles, mom.getNegLog10PError(), mom.getFilters(), mom.getAttributes(), true); + + Allele dadTransmittedAllele = phasedChildGenotype.getAllele(1); + Allele dadUntransmittedAllele = dad.getAllele(0) != dadTransmittedAllele ? dad.getAllele(0) : dad.getAllele(1); + + ArrayList phasedDadAlleles = new ArrayList(); + phasedDadAlleles.add(dadTransmittedAllele); + phasedDadAlleles.add(dadUntransmittedAllele); + + Genotype phasedDadGenotype = new Genotype(dad.getSampleName(), phasedDadAlleles, dad.getNegLog10PError(), dad.getFilters(), dad.getAttributes(), true); + + finalGenotypes.add(phasedMomGenotype); + finalGenotypes.add(phasedDadGenotype); + finalGenotypes.add(phasedChildGenotype); + + return finalGenotypes; + } + } + + finalGenotypes.add(mom); + finalGenotypes.add(dad); + finalGenotypes.add(child); + + return finalGenotypes; + } + + private ArrayList phaseTrioGenotypes(Allele ref, Allele alt, Genotype mother, Genotype father, Genotype child) { + ArrayList finalGenotypes = new ArrayList(); + finalGenotypes.add(mother); + finalGenotypes.add(father); + finalGenotypes.add(child); + + if (mother.isCalled() && father.isCalled() && child.isCalled()) { + ArrayList possibleMotherGenotypes = createAllThreeGenotypes(ref, alt, mother); + ArrayList possibleFatherGenotypes = createAllThreeGenotypes(ref, alt, father); + ArrayList possibleChildGenotypes = createAllThreeGenotypes(ref, alt, child); + + double bestConfigurationLikelihood = 0.0; + double bestPrior = 0.0; + Genotype bestMotherGenotype = mother; + Genotype bestFatherGenotype = father; + Genotype bestChildGenotype = child; + + double norm = 0.0; + + for (Genotype motherGenotype : possibleMotherGenotypes) { + for (Genotype fatherGenotype : possibleFatherGenotypes) { + for (Genotype childGenotype : possibleChildGenotypes) { + double prior = isMendelianViolation(ref, alt, motherGenotype, fatherGenotype, childGenotype) ? MENDELIAN_VIOLATION_PRIOR : 1.0 - 12*MENDELIAN_VIOLATION_PRIOR; + double configurationLikelihood = computeTransmissionLikelihoodOfGenotypeConfiguration(motherGenotype, fatherGenotype, childGenotype); + norm += prior*configurationLikelihood; + + if (prior*configurationLikelihood > bestPrior*bestConfigurationLikelihood) { + bestConfigurationLikelihood = configurationLikelihood; + bestPrior = prior; + bestMotherGenotype = motherGenotype; + bestFatherGenotype = fatherGenotype; + bestChildGenotype = childGenotype; + } + } + } + } + + if (!(bestMotherGenotype.isHet() && bestFatherGenotype.isHet() && bestChildGenotype.isHet())) { + Map attributes = new HashMap(); + attributes.putAll(bestChildGenotype.getAttributes()); + attributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, bestPrior*bestConfigurationLikelihood / norm); + bestChildGenotype = Genotype.modifyAttributes(bestChildGenotype, attributes); + + finalGenotypes = getPhasedGenotypes(bestMotherGenotype, bestFatherGenotype, bestChildGenotype); + } + } + + return finalGenotypes; + } + + /** + * For each variant in the file, determine the phasing for the child and replace the child's genotype with the trio's genotype + * + * @param tracker the reference meta-data tracker + * @param ref the reference context + * @param context the alignment context + * @return null + */ + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker != null) { + Collection vcs = tracker.getVariantContexts(ref, ROD_NAME, null, context.getLocation(), true, true); + + for (VariantContext vc : vcs) { + Map genotypeMap = vc.getGenotypes(); + + for (Trio trio : trios) { + Genotype mother = vc.getGenotype(trio.getMother()); + Genotype father = vc.getGenotype(trio.getFather()); + Genotype child = vc.getGenotype(trio.getChild()); + + ArrayList trioGenotypes = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child); + + Genotype phasedMother = trioGenotypes.get(0); + Genotype phasedFather = trioGenotypes.get(1); + Genotype phasedChild = trioGenotypes.get(2); + + genotypeMap.put(phasedMother.getSampleName(), phasedMother); + genotypeMap.put(phasedFather.getSampleName(), phasedFather); + genotypeMap.put(phasedChild.getSampleName(), phasedChild); + } + + VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeMap); + + vcfWriter.add(newvc, ref.getBase()); + } + } + + return null; + } + + /** + * Provide an initial value for reduce computations. + * + * @return Initial value of reduce. + */ + @Override + public Integer reduceInit() { + return null; + } + + /** + * Reduces a single map with the accumulator provided as the ReduceType. + * + * @param value result of the map. + * @param sum accumulator for the reduce. + * @return accumulator with result of the map taken into account. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return null; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index fbe6e5b5a..9702fd18c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -23,27 +23,28 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.DisjointSet; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.HasGenomeLocation; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.codecs.vcf.SortingVCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.*; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java index db1f888a1..153c4a23f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java @@ -23,10 +23,10 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; public class SNPallelePair extends AllelePair { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java index f82e48abd..2851ace0d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java @@ -24,9 +24,9 @@ package org.broadinstitute.sting.gatk.walkers.phasing; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; public class WriteVCF { public static void writeVCF(VariantContext vc, VCFWriter writer, Logger logger) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java index feb5f62af..2bdd4558f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java @@ -6,14 +6,12 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.RefWalker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; -import java.util.List; import java.io.PrintStream; +import java.util.List; /** * Counts the number of contiguous regions the walker traverses over. Slower than it needs to be, but diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java index ef6ff04f2..0d68c8493 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import net.sf.samtools.SAMFileWriter; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java index cece04fcf..26fa9a258 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java @@ -25,14 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.walkers.ReadPairWalker; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; -import org.broadinstitute.sting.commandline.Output; -import net.sf.samtools.SAMRecord; +import java.io.PrintStream; import java.util.Collection; import java.util.List; -import java.io.PrintStream; /** * Counts the number of read pairs encountered in a file sorted in @@ -40,7 +40,6 @@ import java.io.PrintStream; * of paired reads. * * @author mhanna - * @version 0.1 */ public class CountPairsWalker extends ReadPairWalker { @Output diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java index 74f63aa2f..87c0409b9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.Requires; /** * Walks over the input data set, calculating the number of reads seen for diagnostic purposes. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java index a4e80138f..d1545f159 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; /** * Prints out counts of the number of reference ordered data objects are diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java index 92867e1a9..8a03dea44 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java @@ -27,23 +27,24 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import java.io.PrintStream; import java.util.ArrayList; -import java.util.List; import java.util.Collection; import java.util.LinkedList; -import java.io.PrintStream; +import java.util.List; /** * Prints out counts of the number of reference ordered data objects are diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java index 5ae35416a..b5f5442cd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java @@ -25,22 +25,22 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.collections.PrimitivePair; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.collections.PrimitivePair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import java.util.*; import java.io.*; +import java.util.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java index 39b69ba29..d3b992cb5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import java.util.List; -import java.util.Arrays; import java.io.PrintStream; - -import net.sf.samtools.SAMRecord; +import java.util.Arrays; +import java.util.List; /** * At each locus in the input data set, prints the reference base, genomic location, and diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java index 2f1773d01..908e389a8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java @@ -22,21 +22,24 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import net.sf.samtools.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import java.util.*; -import java.io.*; +import java.io.PrintStream; +import java.util.Arrays; /** * User: depristo diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java index 6ad0340a4..fa1bb4d55 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileWriter; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.List; import java.util.ArrayList; +import java.util.List; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java index 9cb715507..170630b77 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -8,8 +7,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.walkers.Reference; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; import java.math.BigInteger; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java index bc68be592..e1e6c4b69 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java @@ -25,20 +25,20 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.sampileup.SAMPileupFeature; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.Arrays; import java.io.PrintStream; +import java.util.Arrays; /** * At every locus in the input set, compares the pileup data (reference base, aligned base from diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java index 568e1b638..fc6b3daee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java @@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.text.XReadLines; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.regex.Pattern; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index c21f548b3..775cde1f4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -27,24 +27,25 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import org.broad.tribble.bed.BEDCodec; import org.broad.tribble.dbsnp.DbSNPCodec; -import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; -import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Gather; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; +import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.collections.NestedHashMap; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -57,6 +58,8 @@ import java.util.List; import java.util.Map; /** + * First pass of the recalibration. Generates recalibration table based on various user-specified covariates (such as reported quality score, cycle, and dinucleotide). + * * This walker is designed to work as the first pass in a two-pass processing step. * It does a by-locus traversal operating only at sites that are not in dbSNP. * We assume that all reference mismatches we see are therefore errors and indicative of poor base quality. @@ -71,7 +74,6 @@ import java.util.Map; * * @author rpoplin * @since Nov 3, 2009 - * @help.summary First pass of the recalibration. Generates recalibration table based on various user-specified covariates (such as reported quality score, cycle, and dinucleotide). */ @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index 64e0864c0..945d02837 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; - import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java index 0de6897d0..a7717161a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; -import java.util.HashMap; - import net.sf.samtools.SAMRecord; - import org.broadinstitute.sting.utils.BaseUtils; +import java.util.HashMap; + /* * Copyright (c) 2009 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index fdbeb6a31..e6d0b306c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -25,19 +25,21 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.collections.NestedHashMap; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import java.util.*; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.SAMUtils; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java index 7ba441ccc..f04989fa5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; -import java.util.*; +import java.util.List; /* * Copyright (c) 2010 The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 0eaa1245e..fec7ee4e6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -25,35 +25,39 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; -import java.io.File; -import java.io.FileNotFoundException; -import java.util.*; -import java.util.regex.Pattern; - import net.sf.samtools.*; import net.sf.samtools.util.SequenceUtil; - +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.collections.NestedHashMap; +import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; +import org.broadinstitute.sting.utils.text.XReadLines; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.ArrayList; +import java.util.List; +import java.util.MissingResourceException; +import java.util.ResourceBundle; +import java.util.regex.Pattern; /** + * Second pass of the recalibration. Uses the table generated by CountCovariates to update the base quality scores of the input bam file using a sequential table calculation making the base quality scores more accurately reflect the actual quality of the bases as measured by reference mismatch rate. + * * This walker is designed to work as the second pass in a two-pass processing step, doing a by-read traversal. - + * * For each base in each read this walker calculates various user-specified covariates (such as read group, reported quality score, cycle, and dinuc) * Using these values as a key in a large hashmap the walker calculates an empirical base quality score and overwrites the quality score currently in the read. * This walker then outputs a new bam file with these updated (recalibrated) reads. @@ -63,7 +67,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * * @author rpoplin * @since Nov 3, 2009 - * @help.summary Second pass of the recalibration. Uses the table generated by CountCovariates to update the base quality scores of the input bam file using a sequential table calculation making the base quality scores more accurately reflect the actual quality of the bases as measured by reference mismatch rate. */ @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/CreateSequenomMask.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/CreateSequenomMask.java deleted file mode 100755 index c1c17bda5..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/CreateSequenomMask.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.sequenom; - -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.io.PrintStream; - -/** - * Create a mask for use with the PickSequenomProbes walker. - */ -public class CreateSequenomMask extends RodWalker { - @Output - PrintStream out; - - public void initialize() {} - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return 0; - - int result = 0; - for ( VariantContext vc : tracker.getAllVariantContexts(ref) ) { - if ( vc.isSNP() ) { - GenomeLoc loc = context.getLocation(); - out.println(loc.getContig() + "\t" + (loc.getStart()-1) + "\t" + loc.getStop()); - result = 1; - break; - } - } - - return result; - } - - public Integer reduceInit() { - return 0; - } - - public Integer reduce(Integer value, Integer sum) { - return value + sum; - } - - public void onTraversalDone(Integer sum) { - logger.info("Found " + sum + " masking sites."); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java deleted file mode 100755 index fde233b5d..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.sequenom; - -import net.sf.samtools.util.CloseableIterator; -import org.broad.tribble.bed.BEDCodec; -import org.broad.tribble.dbsnp.DbSNPCodec; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; -import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; - -import java.io.File; -import java.util.*; -import java.io.PrintStream; - - -/** - * Generates Sequenom probe information given a single variant track. Emitted is the variant - * along with the 200 reference bases on each side of the variant. - */ -@WalkerName("PickSequenomProbes") -@Requires(value={DataSource.REFERENCE}) -@Reference(window=@Window(start=-200,stop=200)) -public class PickSequenomProbes extends RodWalker { - @Output - PrintStream out; - - @Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's") - protected String SNP_MASK = null; - @Argument(required=false, shortName="project_id",doc="If specified, all probenames will be prepended with 'project_id|'") - String project_id = null; - @Argument(required = false, shortName="omitWindow", doc = "If specified, the window appender will be omitted from the design files (e.g. \"_chr:start-stop\")") - boolean omitWindow = false; - @Argument(required = false, fullName="usePlinkRODNamingConvention", shortName="nameConvention",doc="Use the naming convention defined in PLINKROD") - boolean useNamingConvention = false; - @Argument(required = false, fullName="noMaskWindow",shortName="nmw",doc="Do not mask bases within X bases of an event when designing probes") - int noMaskWindow = 0; - @Argument(required = false, shortName="counter", doc = "If specified, unique count id (ordinal number) is added to the end of each assay name") - boolean addCounter = false; - - private byte [] maskFlags = new byte[401]; - - private LocationAwareSeekableRODIterator snpMaskIterator=null; - - private GenomeLoc positionOfLastVariant = null; - - private int cnt = 0; - private int discarded = 0; - - VariantCollection VCs ; // will keep a set of distinct variants at a given site - private List processedVariantsInScope = new LinkedList(); - - public void initialize() { - if ( SNP_MASK != null ) { - logger.info("Loading SNP mask... "); - ReferenceOrderedData snp_mask; - //if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) { - RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe); - RMDTrack track = builder.createInstanceOfTrack(BEDCodec.class, new File(SNP_MASK)); - snpMaskIterator = new SeekableRODIterator(track.getHeader(), - track.getSequenceDictionary(), - getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), - getToolkit().getGenomeLocParser(), - track.getIterator()); - //} else { - // // TODO: fix me when Plink is back - // throw new IllegalArgumentException("We currently do not support other snp_mask tracks (like Plink)"); - //} - - } - VCs = new VariantCollection(); - } - - - public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return ""; - - logger.debug("Probing " + ref.getLocus() + " " + ref.getWindow()); - - VCs.clear(); - VCs.addAll( tracker.getAllVariantContexts(ref), ref.getLocus() ); - - discarded += VCs.discarded(); - - if ( VCs.size() == 0 ) { - logger.debug(" Context empty"); - return ""; - } - - if ( VCs.size() > 1 ) { - logger.debug(" "+VCs.size()+ " variants at the locus"); - } - -// System.out.print("At locus "+ref.getLocus()+": "); -// for ( VariantContext vc : VCs ) { -// System.out.println(vc.toString()); -// } - - // little optimization: since we may have few events at the current site on the reference, - // we are going to make sure we compute the mask and ref bases only once for each location and only if we need to - boolean haveMaskForWindow = false; - boolean haveBasesForWindow = false; - String leading_bases = null; - String trailing_bases = null; - - StringBuilder assaysForLocus = new StringBuilder(""); // all assays for current locus will be collected here (will be multi-line if multiple events are assayed) - - // get all variant contexts!!!! - for ( VariantContext vc : VCs ) { - - // we can only deal with biallelic sites for now - if ( !vc.isBiallelic() ) { - logger.debug(" Not biallelic; skipped"); - continue; - } - - // we don't want to see the same multi-base event (deletion, DNP etc) multiple times. - // All the vcs we are currently seeing are clearly on the same contig as the current reference - // poisiton (or we would not see them at all!). All we need to check is if the vc starts at the - // current reference position (i.e. it is the first time we see it) or not (i.e. we saw it already). - if ( ref.getLocus().getStart() != vc.getStart() ) - continue; - - if ( ! haveMaskForWindow ) { - String contig = context.getLocation().getContig(); - int offset = context.getLocation().getStart(); - int true_offset = offset - 200; - - // we have variant; let's load all the snps falling into the current window and prepare the mask array. - // we need to do it only once per window, regardless of how many vcs we may have at this location! - if ( snpMaskIterator != null ) { - // clear the mask - for ( int i = 0 ; i < 401; i++ ) - maskFlags[i] = 0; - - RODRecordList snpList = snpMaskIterator.seekForward(getToolkit().getGenomeLocParser().createGenomeLoc(contig,offset-200,offset+200)); - if ( snpList != null && snpList.size() != 0 ) { - Iterator snpsInWindow = snpList.iterator(); - int i = 0; - while ( snpsInWindow.hasNext() ) { - GenomeLoc snp = snpsInWindow.next().getLocation(); - // we don't really want to mask out multi-base indels - if ( snp.size() > 1 ) - continue; - logger.debug(" SNP at "+snp.getStart()); - int offsetInWindow = (int)(snp.getStart() - true_offset); - maskFlags[offsetInWindow] = 1; - } - } - } - haveMaskForWindow = true; // if we use masking, we will probably need to recompute the window... - } - - if ( ! haveBasesForWindow ) { - byte[] context_bases = ref.getBases(); - for (int i = 0; i < 401; i++) { - if ( maskFlags[i] == 1 && ( i < 200 - noMaskWindow || i > 200 + getNoMaskWindowRightEnd(vc,noMaskWindow) ) ) { - context_bases[i] = 'N'; - } - } - leading_bases = new String(Arrays.copyOfRange(context_bases, 0, 200)); - trailing_bases = new String(Arrays.copyOfRange(context_bases, 201, 401)); - // masked bases are not gonna change for the current window, unless we use windowed masking; - // in the latter case the bases (N's) will depend on the event we are currently looking at, - // so we better recompute.. - if ( noMaskWindow == 0 ) haveBasesForWindow = true; - } - - - // below, build single assay line for the current VC: - - String assay_sequence; - if ( vc.isSNP() ) - assay_sequence = leading_bases + "[" + (char)ref.getBase() + "/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases; - else if ( vc.isMNP() ) - assay_sequence = leading_bases + "[" + new String(vc.getReference().getBases()) + "/" + new String(vc.getAlternateAllele(0).getBases())+"]"+trailing_bases.substring(vc.getReference().length()-1); - else if ( vc.isInsertion() ) - assay_sequence = leading_bases + (char)ref.getBase() + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases; - else if ( vc.isDeletion() ) - assay_sequence = leading_bases + (char)ref.getBase() + "[" + new String(vc.getReference().getBases()) + "/-]" + trailing_bases.substring(vc.getReference().length()); - else - continue; - - StringBuilder assay_id = new StringBuilder(); - if ( project_id != null ) { - assay_id.append(project_id); - assay_id.append('|'); - } - if ( useNamingConvention ) { - assay_id.append('c'); - assay_id.append(context.getLocation().toString().replace(":","_p")); - } else { - assay_id.append(context.getLocation().toString().replace(':','_')); - } - if ( vc.isInsertion() ) assay_id.append("_gI"); - else if ( vc.isDeletion()) assay_id.append("_gD"); - - if ( ! omitWindow ) { - assay_id.append("_"); - assay_id.append(ref.getWindow().toString().replace(':', '_')); - } - ++cnt; - if ( addCounter ) assay_id.append("_"+cnt); - - assaysForLocus.append(assay_id); - assaysForLocus.append('\t'); - assaysForLocus.append(assay_sequence); - assaysForLocus.append('\n'); - } - return assaysForLocus.toString(); - } - - public String reduceInit() { - return ""; - } - - public String reduce(String data, String sum) { - out.print(data); - return ""; - } - - private int getNoMaskWindowRightEnd(VariantContext vc, int window) { - if ( window == 0 ) { - return 0; - } - - if ( vc.isInsertion() ) { - return window-1; - } - - int max = 0; - for (Allele a : vc.getAlleles() ) { - if ( vc.isInsertion() ) { - logger.debug("Getting length of allele "+a.toString()+" it is "+a.getBases().length+" (ref allele is "+vc.getReference().toString()+")"); - } - if ( a.getBases().length > max ) { - max = a.getBases().length; - } - } - return max+window-1; - } - - public void onTraversalDone(String sum) { - logger.info(cnt+" assay seqences generated"); - logger.info(discarded+" events were found to be duplicates and discarded (no redundant assays generated)"); - } - - static class EventComparator implements Comparator { - - public int compare(VariantContext o1, VariantContext o2) { - // if variants start at different positions, they are different. All we actually - // care about is detecting the variants that are strictly the same; the actual ordering of distinct variants - // (which one we deem less and which one greater) is utterly unimportant. We just need to be consistent. - if ( o1.getStart() < o2.getStart() ) return -1; - if ( o1.getStart() > o2.getStart() ) return 1; - - if ( o1.getType() != o2.getType() ) return o1.getType().compareTo(o2.getType()); - - int refComp = o1.getReference().compareTo(o2.getReference()); - if ( refComp != 0 ) return refComp; - - return o1.getAlternateAllele(0).compareTo(o2.getAlternateAllele(0)); - - } - } - - static class VariantCollection implements Iterable { - TreeSet variants = new TreeSet(new EventComparator()); - int discarded = 0; - - public void add(VariantContext vc, GenomeLoc current) { - if ( vc.getStart() != current.getStart() ) return; // we add only variants that start at current locus - // note that we do not check chr here, since the way this class is used, the mathod is always called with - // VCs coming from the same metadata tracker, so they simply can not be on different chrs! - if ( !vc.isBiallelic() ) { - logger.info(" Non-biallelic variant encountered; skipped"); - return; - } - if ( variants.add(vc) == false ) discarded++; - } - - public void addAll(Collection c, GenomeLoc current) { - for ( VariantContext vc : c ) add(vc,current); - } - - public void clear() { - variants.clear(); - discarded = 0; - } - - public int discarded() { return discarded; } - - public int size() { return variants.size(); } - - public Iterator iterator() { return variants.iterator(); } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java new file mode 100755 index 000000000..cb03d4c61 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -0,0 +1,414 @@ +package org.broadinstitute.sting.gatk.walkers.validation; + +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMSequenceDictionary; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.RMD; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 6/13/11 + * Time: 2:12 PM + * To change this template use File | Settings | File Templates. + */ +@Requires(value={DataSource.REFERENCE}, referenceMetaData={@RMD(name="ProbeIntervals",type=TableFeature.class), +@RMD(name="ValidateAlleles",type=VariantContext.class),@RMD(name="MaskAlleles",type=VariantContext.class)}) +public class ValidationAmplicons extends RodWalker { + + @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false) + boolean lowerCaseSNPs = false; + + @Argument(doc="Size of the virtual primer to use for lower-casing regions with low specificity",fullName="virtualPrimerSize",required=false) + int virtualPrimerSize = 20; + + @Argument(doc="Monomorphic sites in the mask file will be treated as filtered",fullName="filterMonomorphic",required=false) + boolean filterMonomorphic = false; + + @Argument(doc="Do not use BWA, lower-case repeats only",fullName="doNotUseBWA",required=false) + boolean doNotUseBWA = false; + + GenomeLoc prevInterval; + GenomeLoc allelePos; + String probeName; + StringBuilder sequence; + StringBuilder rawSequence; + boolean sequenceInvalid; + List invReason; + int indelCounter; + + @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " + + "generated by bwa index -d bwtsw. If unspecified, will default " + + "to the reference specified via the -R argument.",required=false) + private File targetReferenceFile = null; + + @Output + PrintStream out; + + BWACAligner aligner = null; + + private SAMFileHeader header = null; + + public void initialize() { + if ( ! doNotUseBWA ) { + if(targetReferenceFile == null) + targetReferenceFile = getToolkit().getArguments().referenceFile; + BWTFiles bwtFiles = new BWTFiles(targetReferenceFile.getAbsolutePath()); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + header = new SAMFileHeader(); + SAMSequenceDictionary referenceDictionary = + ReferenceSequenceFileFactory.getReferenceSequenceFile(targetReferenceFile).getSequenceDictionary(); + header.setSequenceDictionary(referenceDictionary); + header.setSortOrder(SAMFileHeader.SortOrder.unsorted); + } + } + + public Integer reduceInit() { + prevInterval = null; + sequence = null; + rawSequence = null; + sequenceInvalid = false; + probeName = null; + invReason = null; + indelCounter = 0; + return 0; + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null || ! tracker.hasROD("ProbeIntervals")) { return null; } + + GenomeLoc interval = ((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getLocation(); + //logger.debug(interval); + if ( prevInterval == null || ! interval.equals(prevInterval) ) { + // we're in a new interval, we should: + // 1) print out previous data + // 2) reset internal data + // 3) instantiate traversal of this interval + + // step 1: + if ( prevInterval != null ) { + // there was a previous interval + validateSequence(); // ensure the sequence in the region is valid + // next line removed in favor of the one after + if ( doNotUseBWA ) { + lowerRepeats(); // change repeats in sequence to lower case + } else { + lowerNonUniqueSegments(); + } + print(); // print out the fasta sequence + } + + // step 2: + prevInterval = interval; + allelePos = null; + sequence = new StringBuilder(); + rawSequence = new StringBuilder(); + sequenceInvalid = false; + invReason = new LinkedList(); + logger.debug(Utils.join("\t",((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getAllValues())); + probeName = ((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getValue(1); + indelCounter = 0; + } + + // step 3 (or 1 if not new): + // build up the sequence + + VariantContext mask = tracker.getVariantContext(ref,"MaskAlleles",ref.getLocus()); + VariantContext validate = tracker.getVariantContext(ref,"ValidateAlleles",ref.getLocus()); + + if ( mask == null && validate == null ) { + if ( indelCounter > 0 ) { + sequence.append('N'); + indelCounter--; + } else { + sequence.append(Character.toUpperCase((char) ref.getBase())); + } + rawSequence.append(Character.toUpperCase((char) ref.getBase())); + } else if ( validate != null ) { + // doesn't matter if there's a mask here too -- this is what we want to validate + if ( validate.isFiltered() ) { + logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site."); + sequenceInvalid = true; + invReason.add("SITE_IS_FILTERED"); + } + if ( validate.isIndel() ) { + sequence.append(Character.toUpperCase((char)ref.getBase())); + rawSequence.append(Character.toUpperCase((char)ref.getBase())); + } + sequence.append('['); + sequence.append(validate.getAlternateAllele(0).toString()); + sequence.append('/'); + sequence.append(validate.getReference().toString()); + sequence.append(']'); + // do this to the raw sequence to -- the indeces will line up that way + rawSequence.append('['); + rawSequence.append(validate.getAlternateAllele(0).getBaseString()); + rawSequence.append('/'); + rawSequence.append(validate.getReference().getBaseString()); + rawSequence.append(']'); + allelePos = ref.getLocus(); + if ( indelCounter > 0 ) { + logger.warn("An indel event overlaps the event to be validated. This completely invalidates the probe."); + sequenceInvalid = true; + invReason.add("INDEL_OVERLAPS_VALIDATION_SITE"); + if ( validate.isSNP() ) { + indelCounter--; + } else { + indelCounter -= validate.getEnd()-validate.getStart(); + } + } + } else /* (mask != null && validate == null ) */ { + if ( ! mask.isSNP() && ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphic() )) { + logger.warn("Mask Variant Context on the following warning line is not a SNP. Currently we can only mask out SNPs. This probe will not be designed."); + logger.warn(String.format("%s:%d-%d\t%s\t%s",mask.getChr(),mask.getStart(),mask.getEnd(),mask.isInsertion() ? "INS" : "DEL", Utils.join(",",mask.getAlleles()))); + sequenceInvalid = true; + invReason.add(mask.isInsertion() ? "INSERTION" : "DELETION"); + // note: indelCounter could be > 0 (could have small deletion within larger one). This always selects + // the larger event. + int indelCounterNew = mask.isInsertion() ? 2 : mask.getEnd()-mask.getStart(); + if ( indelCounterNew > indelCounter ) { + indelCounter = indelCounterNew; + } + //sequence.append((char) ref.getBase()); + //sequence.append(mask.isInsertion() ? 'I' : 'D'); + sequence.append("N"); + indelCounter--; + rawSequence.append(Character.toUpperCase((char) ref.getBase())); + } else if ( indelCounter > 0 ) { + // previous section resets the indel counter. Doesn't matter if there's a SNP underlying this, we just want to append an 'N' and move on. + sequence.append('N'); + indelCounter--; + rawSequence.append(Character.toUpperCase((char)ref.getBase())); + } else if ( ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphic() )){ + logger.debug("SNP in mask found at " + ref.getLocus().toString()); + + if ( lowerCaseSNPs ) { + sequence.append(Character.toLowerCase((char) ref.getBase())); + } else { + sequence.append((char) BaseUtils.N); + } + + rawSequence.append(Character.toUpperCase((char) ref.getBase())); + } else if ( mask.isSNP() ) { + logger.debug("SNP in mask found at "+ref.getLocus().toString()+" but was either filtered or monomorphic"); + sequence.append((Character.toUpperCase((char) ref.getBase()))); + rawSequence.append(Character.toUpperCase((char) ref.getBase())); + } + } + + return 1; + } + + public Integer reduce(Integer i, Integer j) { + return 0; + } + + public void onTraversalDone(Integer fin ) { + validateSequence(); + if ( doNotUseBWA ) { + lowerRepeats(); + } else { + lowerNonUniqueSegments(); + aligner.close(); + } + print(); + } + + public void validateSequence() { + // code for ensuring primer sequence is valid goes here + + // validate that there are no masked sites near to the variant site + String seq = sequence.toString(); + int start = seq.indexOf('[') - 4; + int end = seq.indexOf(']') + 5; + + if ( start < 50 ) { + logger.warn("There is not enough sequence before the start position of the probed allele for adequate probe design. This site will not be designed."); + sequenceInvalid = true; + invReason.add("START_TOO_CLOSE"); + } else if ( end > seq.length() - 50 ) { + logger.warn("There is not enough sequence after the end position of the probed allele fore adequate probe design. This site will not be desinged. "); + sequenceInvalid = true; + invReason.add("END_TOO_CLOSE"); + } else { + boolean maskNearVariantSite = false; + for ( int i = start; i < end; i++ ) { + maskNearVariantSite |= (seq.charAt(i) == 'N' || Character.isLowerCase(seq.charAt(i))); + } + + if ( maskNearVariantSite ) { + logger.warn("There is one (or more) mask variants within 4 basepair of the variant given for validation. This site will not be designed."); + sequenceInvalid = true; + invReason.add("VARIANT_TOO_NEAR_PROBE"); + } + } + + if ( seq.indexOf("[") != seq.lastIndexOf("[") ) { + logger.warn("Multiple probe variants were found within this interval. Please fix the definitions of the intervals so they do not overlap."); + sequenceInvalid = true; + invReason.add("MULTIPLE_PROBES"); + } + + if ( seq.indexOf("[") < 0 ) { + logger.warn("No variants in region were found. This site will not be designed."); + sequenceInvalid = true; + invReason.add("NO_VARIANTS_FOUND"); + } + } + + public void lowerNonUniqueSegments() { + if ( ! invReason.contains("MULTIPLE_PROBES") && !invReason.contains("NO_VARIANTS_FOUND") ) { + String leftFlank = rawSequence.toString().split("\\[")[0]; + String rightFlank = rawSequence.toString().split("\\]")[1]; + List badLeft = getBadIndeces(leftFlank); + List badRight = getBadIndeces(rightFlank); + // propagate lowercases into the printed sequence + for ( int idx = 0; idx < leftFlank.length(); idx++ ) { + while ( badLeft.size() > 0 && idx > badLeft.get(0) + virtualPrimerSize ) { + badLeft.remove(0); + } + + if ( badLeft.size() > 0 && badLeft.get(0) <= idx && idx <= badLeft.get(0) + virtualPrimerSize ) { + sequence.setCharAt(idx,Character.toLowerCase(sequence.charAt(idx))); + } + } + + int offset = 1 + rawSequence.indexOf("]"); + for ( int i= 0; i < rightFlank.length(); i++ ) { + int idx = i + offset; + while ( badRight.size() > 0 && i > badRight.get(0) + virtualPrimerSize ) { + //logger.debug("Removing "+badRight.get(0)+" because "+(badRight.get(0)+virtualPrimerSize)+" < "+i); + badRight.remove(0); + } + + if ( badRight.size() > 0 && badRight.get(0) <= i && i <= badRight.get(0) + virtualPrimerSize ) { + //logger.debug("Resetting character on right flank: "+idx+" "+i+" offset="+offset); + //logger.debug(sequence); + sequence.setCharAt(idx,Character.toLowerCase(sequence.charAt(idx))); + //logger.debug(sequence); + } + } + } + } + + private List getBadIndeces(String sequence) { + + List badLeftIndeces = new ArrayList(sequence.length()-virtualPrimerSize); + for ( int i = 0; i < sequence.length()-virtualPrimerSize ; i++ ) { + String toAlign = sequence.substring(i,i+virtualPrimerSize); + Iterable allAlignments = aligner.getAllAlignments(toAlign.getBytes()); + for ( Alignment[] alignments : allAlignments ) { + if ( alignments.length > 1 ) { + if ( alignments[0].getMappingQuality() == 0 ) { + // this region is bad -- multiple MQ alignments + badLeftIndeces.add(i); + } + } + } + } + + return badLeftIndeces; + } + + + /** + * Note- this is an old function - a proxy for identifying regions with low specificity to genome. Saved in case the alignment-based version + * turns out to be worse than just doing a simple repeat-lowering method. + */ + public void lowerRepeats() { + // convert to lower case low-complexity repeats, e.g. tandem k-mers + final int K_LIM = 8; + String seq = sequence.toString(); + StringBuilder newSequence = new StringBuilder(); + int start_pos = 0; + while( start_pos < seq.length() ) { + boolean broke = false; + for ( int length = K_LIM; length > 1; length -- ) { + //logger.debug(String.format("start1: %d end1: %d start2: %d end2: %d str: %d",start_pos,start_pos+length,start_pos+length,start_pos+2*length,seq.length())); + if ( start_pos + 2*length> seq.length() ) { + continue; + } + if ( equalsIgnoreNs(seq.substring(start_pos,start_pos+length),seq.substring(start_pos+length,start_pos+2*length)) ) { + newSequence.append(seq.substring(start_pos,start_pos+length).toLowerCase()); + newSequence.append(seq.substring(start_pos+length,start_pos+2*length).toLowerCase()); + start_pos += 2*length; + broke = true; + break; + } + } + + if ( ! broke ) { + newSequence.append(seq.substring(start_pos,start_pos+1)); + start_pos++; + } + + } + + if ( seq.indexOf("[") != seq.lastIndexOf("[") ) { + return; + } + + sequence = newSequence; + } + + public boolean equalsIgnoreNs(String one, String two) { + if ( one.length() != two.length() ) { return false; } + for ( int idx = 0; idx < one.length(); idx++ ) { + if ( Character.toUpperCase(one.charAt(idx)) != Character.toUpperCase(two.charAt(idx)) ) { + if ( Character.toUpperCase(one.charAt(idx)) != 'N' && Character.toUpperCase(two.charAt(idx)) != 'N' ) { + return false; + } + } + } + + //logger.debug(String.format("one: %s two: %s",one,two)); + + return true; + } + + public void print() { + String valid; + if ( sequenceInvalid ) { + valid = ""; + while ( invReason.size() > 0 ) { + String reason = invReason.get(0); + invReason.remove(reason); + int num = 1; + while ( invReason.contains(reason) ) { + num++; + invReason.remove(reason); + } + valid += String.format("%s=%d,",reason,num); + } + } else { + valid = "Valid"; + } + + String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D'); + out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 15d808ebe..3867aa958 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -1,14 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; @@ -20,23 +18,21 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.*; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import net.sf.picard.reference.FastaSequenceFile; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import net.sf.picard.reference.ReferenceSequence; -import java.io.FileNotFoundException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; +import java.io.FileNotFoundException; import java.io.PrintStream; import java.lang.reflect.Field; import java.util.*; @@ -71,7 +67,7 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="stratificationModule", shortName="ST", doc="One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified)", required=false) protected String[] STRATIFICATIONS_TO_USE = {}; - @Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)") + @Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)", required=false) protected Boolean NO_STANDARD_STRATIFICATIONS = false; @Argument(fullName="onlyVariantsOfType", shortName="VT", doc="If provided, only variants of these types will be considered during the evaluation, in ", required=false) @@ -81,7 +77,7 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="evalModule", shortName="EV", doc="One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified)", required=false) protected String[] MODULES_TO_USE = {}; - @Argument(fullName="doNotUseAllStandardModules", shortName="noEV", doc="Do not use the standard modules by default (instead, only those that are specified with the -E option)") + @Argument(fullName="doNotUseAllStandardModules", shortName="noEV", doc="Do not use the standard modules by default (instead, only those that are specified with the -E option)", required=false) protected Boolean NO_STANDARD_MODULES = false; // Other arguments diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompEvalGenotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompEvalGenotypes.java index 787dbe9af..925bff9c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompEvalGenotypes.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompEvalGenotypes.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.variantcontext.Genotype; class NewCompEvalGenotypes { private GenomeLoc loc; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java index 76db330ed..255a54737 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; /** * The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index c4277adc9..8c281b2f8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; @Analysis(description = "Counts different classes of variants in the sample") public class CountVariants extends VariantEvaluator implements StandardEval { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java index 4b56cf130..bbd3f5f54 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java @@ -1,18 +1,18 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java index 3d14dd0e5..a476a2680 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java @@ -1,20 +1,20 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.phasing.AllelePair; import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.HashMap; import java.util.HashSet; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java index 5daf33a9f..77def0f30 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; /** * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java index eca6c5193..6e1b76acd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java index 48b06d532..d99196ecf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java @@ -1,15 +1,15 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.IndelUtils; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index 85e0b5889..a0cc393d9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -1,21 +1,15 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.Arrays; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; /** * Mendelian violation detection and counting diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java index 7d54d0df8..b209ee13d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java @@ -24,12 +24,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; @Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites") public class PrintMissingComp extends VariantEvaluator { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java index 5f3e6b0fa..751f61a97 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.variantcontext.Genotype; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java index deed05508..d466645ea 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java @@ -1,18 +1,18 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Degeneracy; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Sample; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java index 89c67cfe9..ec43cbd55 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java index 8811dc001..be957abd7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @Analysis(description = "Ti/Tv Variant Evaluator") public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java index 405f35635..9c331b577 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java @@ -1,17 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Set; /** * The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java index 6017ecca3..e29e7ed50 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; public abstract class VariantEvaluator { public void initialize(VariantEvalWalker walker) {} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java index 4af14810b..b6ad55b18 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java @@ -25,16 +25,16 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.ArrayList; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 2cbc66e31..411493d4f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -1,12 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java index 48b4ffa91..2ffc7716c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java index 9942ba8d6..c6975808f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java index 1a9d31085..c14355035 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java index 15b1d41c1..e1f2ae983 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java @@ -1,13 +1,25 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; +/** + * CpG is a stratification module for VariantEval that divides the input data by within/not within a CpG site + * + *

+ * It is a three-state stratification: + *

    + *
  • The locus is a CpG site ("CpG") + *
  • The locus is not a CpG site ("non_CpG") + *
  • The locus is either a CpG or not a CpG site ("all") + *
+ * A CpG site is defined as a site where the reference base at a locus is a C and the adjacent reference base in the 3' direction is a G. + */ public class CpG extends VariantStratifier { private ArrayList states; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java index 65af6090c..155a66186 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.HashMap; -import java.util.Set; import java.util.HashSet; +import java.util.Set; public class Degeneracy extends VariantStratifier { private ArrayList states; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java index 7bd15a974..40f952fd2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java index 8d8782ab7..3b7a419f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index fd724d6d1..c6c094f8e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java index a7ccd3182..76efedbf4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index 39d2e6b4d..a0973a088 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Collection; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java index f909de4f3..a2a3eb3fb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index 218cb23ca..2c4b8bc46 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/Analysis.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java similarity index 80% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/Analysis.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java index 129d5a95d..2b37ce210 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/Analysis.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.tags; +package org.broadinstitute.sting.gatk.walkers.varianteval.util; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java index c8d917040..db44e9e28 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java @@ -23,8 +23,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.lang.annotation.Annotation; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/DataPoint.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java similarity index 77% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/DataPoint.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java index 3ba448049..396843252 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/DataPoint.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.tags; +package org.broadinstitute.sting.gatk.walkers.varianteval.util; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java index 3208c26bb..8112ae97f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -9,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvalu import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.HashMap; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java index 0281653af..38f7a7f40 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; -import org.apache.commons.jexl2.*; +import org.apache.commons.jexl2.Expression; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; public class SortableJexlVCMatchExp extends VariantContextUtils.JexlVCMatchExp implements Comparable { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index b8e45e462..0a915db37 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -1,24 +1,21 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; -import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.RequiredStratification; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.StandardStratification; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.lang.reflect.Field; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index d4e07dccf..b195fd35f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -25,23 +25,23 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.FileNotFoundException; -import java.lang.Double; import java.util.*; /** @@ -49,8 +49,6 @@ import java.util.*; * * @author rpoplin * @since Mar 14, 2011 - * - * @help.summary Applies cuts to the input vcf file (by adding filter lines) to achieve the desired novel FDR levels which were specified during VariantRecalibration */ public class ApplyRecalibration extends RodWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java index 64fe36637..15424f0f7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java @@ -25,11 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import java.io.*; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; import java.util.*; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java index 5deb5d8c2..bc7252ec2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java @@ -26,10 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index ddeda1699..67d54a408 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -34,9 +33,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 2d0355d7d..76c888640 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -25,14 +25,12 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; @@ -42,6 +40,8 @@ import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; @@ -53,8 +53,6 @@ import java.util.*; * * User: rpoplin * Date: 3/12/11 - * - * @help.summary Takes variant calls as .vcf files, learns a Gaussian mixture model over the variant annotations and evaluates the variant -- assigning an informative lod score */ public class VariantRecalibrator extends RodWalker, ExpandingArrayList> implements TreeReducible> { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 597a54dc1..9c2a520ef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -25,12 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.apache.poi.hpsf.Variant; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.Requires; @@ -38,11 +39,10 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; @@ -150,7 +150,7 @@ public class CombineVariants extends RodWalker { // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus - Collection vcs = tracker.getAllVariantContexts(ref, null,context.getLocation(), true, false); + Collection vcs = tracker.getAllVariantContexts(ref, null, context.getLocation(), true, false); if ( sitesOnlyVCF ) { vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs); @@ -173,17 +173,25 @@ public class CombineVariants extends RodWalker { if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN)) return 0; - VariantContext mergedVC; + List mergedVCs = new ArrayList(); if ( master ) { - mergedVC = VariantContextUtils.masterMerge(vcs, "master"); + mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master")); } else { - mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, filteredRecordsMergeType, - genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC); + Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); + // iterate over the types so that it's deterministic + for ( VariantContext.Type type : VariantContext.Type.values() ) { + if ( VCsByType.containsKey(type) ) + mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), + priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, + ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); + } } - //out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC); + for ( VariantContext mergedVC : mergedVCs ) { + // only operate at the start of events + if ( mergedVC == null ) + continue; - if ( mergedVC != null ) { // only operate at the start of events HashMap attributes = new HashMap(mergedVC.getAttributes()); // re-compute chromosome counts VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index ba6f5e513..b45ee1b67 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -24,18 +24,21 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.Map; +import java.util.Set; /** * Filters a lifted-over VCF file for ref bases that have been changed. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index d9dd85f0c..2ebd183f4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -28,21 +28,17 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.SortingVCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 7eda54387..4f05c8aac 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -24,27 +24,27 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; - -import java.io.File; -import java.util.*; - import net.sf.picard.PicardException; import net.sf.picard.liftover.LiftOver; import net.sf.picard.util.Interval; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RMD; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.io.File; +import java.util.*; /** * Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index 0c41a9728..f0756d884 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -24,11 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -39,11 +34,15 @@ import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; /** * Takes a VCF file, randomly splits variants into two different sets, and outputs 2 new VCFs with the results. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index cbac54326..e1a3659b8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -25,25 +25,37 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; import java.lang.annotation.AnnotationFormatError; import java.util.*; @@ -92,6 +104,13 @@ public class SelectVariants extends RodWalker { @Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false) private boolean KEEP_AF_SPECTRUM = false; + @Hidden + @Argument(fullName="afFile", shortName="afFile", doc="The output recal file used by ApplyRecalibration", required=false) + private File AF_FILE = new File(""); + + @Hidden + @Argument(fullName="family_structure_file", shortName="familyFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) + private File FAMILY_STRUCTURE_FILE = null; @Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) private String FAMILY_STRUCTURE = ""; @@ -114,6 +133,9 @@ public class SelectVariants extends RodWalker { @Argument(fullName="selectIndels", shortName="indels", doc="Select only Indels.", required=false) private boolean SELECT_INDELS = false; + @Hidden + @Argument(fullName="outMVFile", shortName="outMVFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) + private String outMVFile = null; /* Private class used to store the intermediate variants in the integer random selection process */ private class RandomVariantStructure { @@ -141,7 +163,7 @@ public class SelectVariants extends RodWalker { private boolean DISCORDANCE_ONLY = false; private boolean CONCORDANCE_ONLY = false; - private MendelianViolation mv; + private Set mvSet = new HashSet(); /* default name for the variant dataset (VCF) */ private final String variantRodName = "variant"; @@ -156,8 +178,14 @@ public class SelectVariants extends RodWalker { private RandomVariantStructure [] variantArray; + /* Variables used for random selection with AF boosting */ + private ArrayList afBreakpoints = null; + private ArrayList afBoosts = null; + double bkDelta = 0.0; + private PrintStream outMVFileStream = null; + /** * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher @@ -213,10 +241,29 @@ public class SelectVariants extends RodWalker { CONCORDANCE_ONLY = concordanceRodName.length() > 0; if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceRodName); - if (MENDELIAN_VIOLATIONS) - mv = new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD); + if (MENDELIAN_VIOLATIONS) { + if ( FAMILY_STRUCTURE_FILE != null) { + try { + for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) { + MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD); + if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom())) + mvSet.add(mv); + } + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(AF_FILE, e); + } + if (outMVFile != null) + try { + outMVFileStream = new PrintStream(outMVFile); + } + catch (FileNotFoundException e) { + throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); } + } + else + mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD)); + } else if (!FAMILY_STRUCTURE.isEmpty()) { - mv = new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD); + mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); MENDELIAN_VIOLATIONS = true; } @@ -227,7 +274,34 @@ public class SelectVariants extends RodWalker { } SELECT_RANDOM_FRACTION = fractionRandom > 0; - if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + fractionRandom + "% of the variants at random from the variant track"); + if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + 100.0*fractionRandom + "% of the variants at random from the variant track"); + + + if (KEEP_AF_SPECTRUM) { + try { + afBreakpoints = new ArrayList(); + afBoosts = new ArrayList(); + logger.info("Reading in AF boost table..."); + boolean firstLine = false; + for ( final String line : new XReadLines( AF_FILE ) ) { + if (!firstLine) { + firstLine = true; + continue; + } + final String[] vals = line.split(" "); + + double bkp = Double.valueOf(vals[0]); + double afb = Double.valueOf(vals[1]); + afBreakpoints.add(bkp); + afBoosts.add(afb); + + } + bkDelta = afBreakpoints.get(0); + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(AF_FILE, e); + } + + } } /** @@ -251,9 +325,24 @@ public class SelectVariants extends RodWalker { for (VariantContext vc : vcs) { if (MENDELIAN_VIOLATIONS) { - if (!mv.isViolation(vc)) { - break; + boolean foundMV = false; + for (MendelianViolation mv : mvSet) { + if (mv.isViolation(vc)) { + foundMV = true; + //System.out.println(vc.toString()); + if (outMVFile != null) + outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + + "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(), + vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getChromosomeCount(vc.getAlternateAllele(0)), + mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(), + vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), + vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), + vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() ); + } } + + if (!foundMV) + break; } if (DISCORDANCE_ONLY) { Collection compVCs = tracker.getVariantContexts(ref, discordanceRodName, null, context.getLocation(), true, false); @@ -284,46 +373,59 @@ public class SelectVariants extends RodWalker { if (SELECT_RANDOM_NUMBER) { randomlyAddVariant(++variantNumber, sub, ref.getBase()); } - else if (!SELECT_RANDOM_FRACTION || GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom) { + else if (!SELECT_RANDOM_FRACTION || (!KEEP_AF_SPECTRUM && GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { vcfWriter.add(sub, ref.getBase()); } else { if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) { - Collection compVCs = tracker.getVariantContexts(ref, inputAFRodName, null, context.getLocation(), true, false); - if (compVCs.isEmpty()) - return 0; - // ok we have a comp VC and we need to match the AF spectrum of inputAFRodName. // We then pick a variant with probablity AF*desiredFraction - for (VariantContext compVC : compVCs) { - if ( compVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) { - String afo = compVC.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY); + if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) { + String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY); - double af; - if (afo.contains(",")) { - String[] afs = afo.split(","); - afs[0] = afs[0].substring(1,afs[0].length()); - afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); + double af; + double afBoost = 1.0; + if (afo.contains(",")) { + String[] afs = afo.split(","); + afs[0] = afs[0].substring(1,afs[0].length()); + afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); - double[] afd = new double[afs.length]; + double[] afd = new double[afs.length]; - for (int k=0; k < afd.length; k++) - afd[k] = Double.valueOf(afs[k]); + for (int k=0; k < afd.length; k++) + afd[k] = Double.valueOf(afs[k]); - af = MathUtils.arrayMax(afd); - //af = Double.valueOf(afs[0]); + af = MathUtils.arrayMax(afd); + //af = Double.valueOf(afs[0]); - } - else - af = Double.valueOf(afo); - - //System.out.format("%s .. %4.4f\n",afo.toString(), af); - if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * af) - vcfWriter.add(sub, ref.getBase()); } - break; // do only one vc + else + af = Double.valueOf(afo); + + // now boost af by table read from file if desired + //double bkpt = 0.0; + int bkidx = 0; + if (!afBreakpoints.isEmpty()) { + for ( Double bkpt : afBreakpoints) { + if (af < bkpt + bkDelta) + break; + else bkidx++; + } + if (bkidx >=afBoosts.size()) + bkidx = afBoosts.size()-1; + afBoost = afBoosts.get(bkidx); + //System.out.formatPrin("af:%f bkidx:%d afboost:%f\n",af,bkidx,afBoost); + + + + } + + //System.out.format("%s .. %4.4f\n",afo.toString(), af); + if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * afBoost * afBoost) + vcfWriter.add(sub, ref.getBase()); } + } } } @@ -407,8 +509,8 @@ public class SelectVariants extends RodWalker { private boolean haveSameGenotypes(Genotype g1, Genotype g2) { if ((g1.isCalled() && g2.isFiltered()) || - (g2.isCalled() && g1.isFiltered()) || - (g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED)) + (g2.isCalled() && g1.isFiltered()) || + (g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED)) return false; List a1s = g1.getAlleles(); @@ -441,7 +543,7 @@ public class SelectVariants extends RodWalker { * @param vc the VariantContext record to subset * @param samples the samples to extract * @return the subsetted VariantContext - */ + */ private VariantContext subsetRecord(VariantContext vc, Set samples) { if ( samples == null || samples.isEmpty() ) return vc; @@ -451,7 +553,7 @@ public class SelectVariants extends RodWalker { if ( samples.contains(genotypePair.getKey()) ) genotypes.add(genotypePair.getValue()); } - + VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles()); HashMap attributes = new HashMap(sub.getAttributes()); @@ -461,7 +563,7 @@ public class SelectVariants extends RodWalker { Genotype g = sub.getGenotype(sample); if (g.isNotFiltered() && g.isCalled()) { - + String dp = (String) g.getAttribute("DP"); if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) { depth += Integer.valueOf(dp); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 1bd73414c..0644c669b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -25,22 +25,25 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.TribbleException; +import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; import java.io.File; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 482679593..86bb3b0e8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -25,20 +25,19 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 71dd5df3f..39358dad5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -24,18 +24,18 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.PrintStream; import java.util.*; @@ -75,17 +75,29 @@ public class VariantsToTable extends RodWalker { // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } }); getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } }); - getters.put("REF", new Getter() { public String get(VariantContext vc) { return vc.getReference().toString(); } }); + getters.put("REF", new Getter() { + public String get(VariantContext vc) { + String x = ""; + if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { + Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); + x=x+new String(new byte[]{refByte}); + } + return x+vc.getReference().getDisplayString(); + } + }); getters.put("ALT", new Getter() { public String get(VariantContext vc) { StringBuilder x = new StringBuilder(); int n = vc.getAlternateAlleles().size(); - if ( n == 0 ) return "."; + if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { + Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); + x.append(new String(new byte[]{refByte})); + } for ( int i = 0; i < n; i++ ) { if ( i != 0 ) x.append(","); - x.append(vc.getAlternateAllele(i).toString()); + x.append(vc.getAlternateAllele(i).getDisplayString()); } return x.toString(); } @@ -169,6 +181,31 @@ public class VariantsToTable extends RodWalker { throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc)); } + if (field.equals("AF") || field.equals("AC")) { + String afo = val; + + double af=0; + if (afo.contains(",")) { + String[] afs = afo.split(","); + afs[0] = afs[0].substring(1,afs[0].length()); + afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); + + double[] afd = new double[afs.length]; + + for (int k=0; k < afd.length; k++) + afd[k] = Double.valueOf(afs[k]); + + af = MathUtils.arrayMax(afd); + //af = Double.valueOf(afs[0]); + + } + else + if (!afo.equals("NA")) + af = Double.valueOf(afo); + + val = Double.toString(af); + + } vals.add(val); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 79134b553..aa0e5987f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -28,26 +28,26 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.dbsnp.DbSNPCodec; import org.broad.tribble.dbsnp.DbSNPFeature; -import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/jna/clibrary/LibC.java b/public/java/src/org/broadinstitute/sting/jna/clibrary/LibC.java index b01533ee6..b5efcc153 100644 --- a/public/java/src/org/broadinstitute/sting/jna/clibrary/LibC.java +++ b/public/java/src/org/broadinstitute/sting/jna/clibrary/LibC.java @@ -24,7 +24,10 @@ package org.broadinstitute.sting.jna.clibrary; -import com.sun.jna.*; +import com.sun.jna.LastErrorException; +import com.sun.jna.Native; +import com.sun.jna.NativeLong; +import com.sun.jna.Structure; import com.sun.jna.ptr.NativeLongByReference; /** diff --git a/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java b/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java index 0c0579d6f..c7b3de6cf 100644 --- a/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java +++ b/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java @@ -29,7 +29,7 @@ import com.sun.jna.ptr.FloatByReference; import com.sun.jna.ptr.IntByReference; import com.sun.jna.ptr.PointerByReference; import org.broadinstitute.sting.jna.clibrary.JNAUtils; -import org.broadinstitute.sting.jna.clibrary.LibC.*; +import org.broadinstitute.sting.jna.clibrary.LibC.timeval; /* NOTE: This library uses Pointer for some Struct.ByReference members going diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java index fe6758e76..c09c4037e 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java @@ -27,13 +27,16 @@ package org.broadinstitute.sting.queue.extensions.gatk; import net.sf.samtools.BAMIndex; import net.sf.samtools.SAMFileWriter; import org.broad.tribble.Tribble; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import java.io.File; import java.lang.annotation.Annotation; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; public abstract class ArgumentDefinitionField extends ArgumentField { diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java index 2da427d44..e90933504 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java @@ -35,7 +35,10 @@ import java.io.File; import java.io.InputStream; import java.io.OutputStream; import java.lang.annotation.Annotation; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; public abstract class ArgumentField { diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index 21fb44733..5095bd6e5 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -27,17 +27,20 @@ package org.broadinstitute.sting.queue.extensions.gatk; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; +import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; -import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; diff --git a/public/java/src/org/broadinstitute/sting/utils/ContigComparator.java b/public/java/src/org/broadinstitute/sting/utils/ContigComparator.java new file mode 100644 index 000000000..619beddb8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/ContigComparator.java @@ -0,0 +1,71 @@ +package org.broadinstitute.sting.utils; + +import java.util.Comparator; +import java.util.Set; +import java.util.TreeSet; + +/** + * Created by IntelliJ IDEA. + * User: carneiro + * Date: 7/23/11 + * Time: 6:07 PM + * + * Contig comparator -- sorting contigs like Picard + * + * This is very useful if you want to output your text files or manipulate data in the usual chromosome ordering : + * 1 + * 2 + * 3 + * ... + * 21 + * 22 + * X + * Y + * GL*** + * ... + * Just use this comparator in any SortedSet class constructor and your data will be sorted like in the BAM file. + */ +public class ContigComparator implements Comparator { + private Set specialChrs; + + public ContigComparator() { + specialChrs = new TreeSet(); + specialChrs.add("X"); + specialChrs.add("Y"); + } + + public int compare(String chr1, String chr2) { + if (chr1.equals(chr2)) + return 0; + + Integer x = convertStringWithoutException(chr1); + Integer y = convertStringWithoutException(chr2); + // both contigs are numbered + if (x != null && y != null) + return (x < y) ? -1:1; + + // both contigs are named + if (x == null && y == null) { + // both contigs are special contigs or neither contig is a special contigs + if (specialChrs.contains(chr1) && specialChrs.contains(chr2) || (!specialChrs.contains(chr1) && !specialChrs.contains(chr2))) + return chr1.compareTo(chr2); + // one contig is a special and the other is not special + if (specialChrs.contains(chr1)) + return -1; + return 1; + } + + // one contig is named the other is numbered + if (x != null) + return -1; + return 1; + } + + private Integer convertStringWithoutException(String contig) { + Integer x = null; + try { + x = Integer.decode(contig); + } catch (NumberFormatException n){} + return x; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java b/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java index 5f68d3414..52c18e6d6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java +++ b/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java @@ -23,7 +23,9 @@ */ package org.broadinstitute.sting.utils; -import java.util.*; +import java.util.Collection; +import java.util.Set; +import java.util.TreeSet; public class DisjointSet { private ItemNode[] nodes; diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java index 1f8800542..b96923589 100644 --- a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.Serializable; diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index 18a1e7ffd..a5c6e0537 100644 --- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -25,12 +25,14 @@ package org.broadinstitute.sting.utils; -import com.google.java.contract.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; +import com.google.java.contract.ThrowEnsures; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; - import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java b/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java index 30e1a3f5b..af69ebca6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.utils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 50aa0f707..36ed506aa 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -25,17 +25,14 @@ package org.broadinstitute.sting.utils; -import cern.jet.math.Arithmetic; - -import java.math.BigDecimal; -import java.util.*; - import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.collections.PrimitivePair; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.math.BigDecimal; +import java.util.*; + /** * MathUtils is a static class (no instantiation allowed!) with some useful math methods. * diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index a8089ffe8..c6a07b5ce 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -1,12 +1,13 @@ package org.broadinstitute.sting.utils; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Collection; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; diff --git a/public/java/src/org/broadinstitute/sting/utils/PathUtils.java b/public/java/src/org/broadinstitute/sting/utils/PathUtils.java index 47466be20..822d04dfd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/PathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/PathUtils.java @@ -2,10 +2,10 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.List; -import java.util.ArrayList; import java.io.File; import java.io.FilenameFilter; +import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java b/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java index 30257d967..a758df431 100644 --- a/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java @@ -3,7 +3,9 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; /** * Randomly downsample from a stream of elements. This algorithm is a direct, diff --git a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java index 4b7fa3e41..92d73a5ce 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java +++ b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java @@ -24,15 +24,14 @@ package org.broadinstitute.sting.utils; +import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import net.sf.samtools.Cigar; - -import java.util.*; - import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.StingException; +import java.util.*; + /** * Created by IntelliJ IDEA. * User: asivache diff --git a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java index c0370064d..f9997bfd8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java @@ -27,12 +27,12 @@ package org.broadinstitute.sting.utils; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMReadGroupRecord; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java index a59c5134a..342087b41 100644 --- a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java +++ b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java @@ -1,7 +1,10 @@ package org.broadinstitute.sting.utils; +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; + import java.io.PrintStream; -import com.google.java.contract.*; /** * A useful simple system for timing code. This code is not thread safe! diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 4c13d8b18..6a50badce 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -25,13 +25,12 @@ package org.broadinstitute.sting.utils; -import java.util.*; - import net.sf.samtools.util.StringUtil; - import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.collections.Pair; +import java.util.*; + /** * Created by IntelliJ IDEA. * User: depristo diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index 456f0fa0f..ef7cf751e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.utils.baq; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequence; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java index 24d4152a5..26356a4a4 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java @@ -2,11 +2,10 @@ package org.broadinstitute.sting.utils.baq; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import net.sf.samtools.SAMRecord; import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java b/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java index abcae066f..b95165841 100644 --- a/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java @@ -1,11 +1,12 @@ package org.broadinstitute.sting.utils.bed; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; -import java.util.*; +import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java index 404bd80b6..e65b8f921 100755 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java @@ -29,9 +29,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.reflections.util.ClasspathHelper; -import java.lang.reflect.*; import java.io.File; import java.io.IOException; +import java.lang.reflect.*; import java.net.URL; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 95f3e160c..5449906b2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -4,9 +4,6 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.walkers.ClipReadsWalker; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.Vector; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 031467ed9..988d297f6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.clipreads; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.walkers.ClipReadsWalker; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java index a286ce789..fef6c4ea0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java @@ -30,7 +30,6 @@ import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.regex.Matcher; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java index 618d9ce79..e169dbdfc 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java @@ -4,11 +4,11 @@ import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.NameAwareCodec; import org.broad.tribble.TribbleException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java index e7ddac185..b7f4be39a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java @@ -33,13 +33,13 @@ import org.broad.tribble.util.LittleEndianOutputStream; import org.broad.tribble.util.ParsingUtils; import org.broad.tribble.util.PositionalStream; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; -import java.util.*; import java.lang.reflect.Array; +import java.util.*; /** * this class writes VCF files diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFParser.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFParser.java index dac996494..1dba351e2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFParser.java @@ -2,7 +2,9 @@ package org.broadinstitute.sting.utils.codecs.vcf; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; -import java.util.*; + +import java.util.List; +import java.util.Map; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index 4037f75b9..f43891e77 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.utils.codecs.vcf; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java b/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java index a36ed9ac6..d280ac804 100755 --- a/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java @@ -25,7 +25,8 @@ package org.broadinstitute.sting.utils.collections; -import java.util.*; +import java.util.HashMap; +import java.util.Map; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/RODMergingIterator.java b/public/java/src/org/broadinstitute/sting/utils/collections/RODMergingIterator.java index 3a731c2fd..6d6cb8272 100644 --- a/public/java/src/org/broadinstitute/sting/utils/collections/RODMergingIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/RODMergingIterator.java @@ -27,10 +27,13 @@ package org.broadinstitute.sting.utils.collections; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.PriorityQueue; public class RODMergingIterator implements Iterator, Iterable { PriorityQueue queue = new PriorityQueue(); diff --git a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index 41de5ef96..bba47c76c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -26,15 +26,18 @@ package org.broadinstitute.sting.utils.duplicates; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; -import java.util.List; import java.util.Arrays; +import java.util.List; public class DupUtils { private static SAMRecord tmpCopyRead(SAMRecord read) { diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 17c4a7df4..3c3299ff5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -28,9 +28,9 @@ import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.util.Arrays; @@ -44,6 +44,9 @@ import java.util.Arrays; * Date: Sep 3, 2010 * Time: 2:24:09 PM */ +@DocumentedGATKFeature( + groupName = "User exceptions", + summary = "Exceptions caused by incorrect user behavior, such as bad files, bad arguments, etc." ) public class UserException extends ReviewedStingException { public UserException(String msg) { super(msg); } public UserException(String msg, Throwable e) { super(msg, e); } diff --git a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java index 0c5085cc7..43ef4aa74 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java +++ b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java @@ -25,15 +25,16 @@ package org.broadinstitute.sting.utils.fasta; import net.sf.picard.PicardException; -import net.sf.picard.reference.*; +import net.sf.picard.reference.FastaSequenceIndex; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.reference.ReferenceSequence; import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.FileNotFoundException; import java.util.Arrays; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - /** * A caching version of the IndexedFastaSequenceFile that avoids going to disk as often as the raw indexer. * diff --git a/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java b/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java index cbfba848c..1c2cfe2e1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.utils.genotype; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.MathUtils; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java index 31791e805..a17e81461 100755 --- a/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java @@ -24,14 +24,14 @@ package org.broadinstitute.sting.utils.genotype; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; -import java.util.*; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; public class Haplotype { protected byte[] bases = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ApplicationDetails.java b/public/java/src/org/broadinstitute/sting/utils/help/ApplicationDetails.java index d9e74640f..483c874dc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/ApplicationDetails.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/ApplicationDetails.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.utils.help; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.List; -import java.util.Collections; import java.io.IOException; +import java.util.Collections; +import java.util.List; /** * Contains details additional details that the program can diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java deleted file mode 100644 index 68633a2b3..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java +++ /dev/null @@ -1,60 +0,0 @@ -package org.broadinstitute.sting.utils.help; - -import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.Tag; - -import java.util.Map; - -/** - * Provide an alternate description for the given help system. - * - * @author mhanna - * @version 0.1 - */ -public class DescriptionTaglet extends HelpTaglet { - /** - * The key tag for this taglet. - */ - public static final String NAME = "help.description"; - - /** - * Return the name of this custom tag. - */ - @Override - public String getName() { - return NAME; - } - - /** - * Will return false since overviews are always named - * by the @WalkerName tag. - * @return false always - */ - @Override - public boolean inOverview() { - return true; - } - - /** - * Will return true to indicate that packages can be given useful - * description. - * @return true always - */ - @Override - public boolean inPackage() { - return true; - } - - /** - * Register this Taglet. - * @param tagletMap the map to register this tag to. - */ - public static void register(Map tagletMap) { - DescriptionTaglet tag = new DescriptionTaglet(); - Taglet t = (Taglet)tagletMap.get(tag.getName()); - if (t != null) { - tagletMap.remove(tag.getName()); - } - tagletMap.put(tag.getName(), tag); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java deleted file mode 100644 index be6f7f3eb..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.broadinstitute.sting.utils.help; - -import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.Tag; - -import java.util.Map; - -/** - * Provide a display name in the help for packages - * - * @author mhanna - * @version 0.1 - */ -public class DisplayNameTaglet extends HelpTaglet { - /** - * The display name for this taglet. - */ - public static final String NAME = "help.display.name"; - - /** - * Return the name of this custom tag. - */ - @Override - public String getName() { - return NAME; - } - - /** - * Will return true to indicate that packages can be given useful - * display text. - * @return true always - */ - @Override - public boolean inPackage() { - return true; - } - - /** - * Register this Taglet. - * @param tagletMap the map to register this tag to. - */ - public static void register(Map tagletMap) { - DisplayNameTaglet tag = new DisplayNameTaglet(); - Taglet t = (Taglet)tagletMap.get(tag.getName()); - if (t != null) { - tagletMap.remove(tag.getName()); - } - tagletMap.put(tag.getName(), tag); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java new file mode 100644 index 000000000..710503ca8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import java.lang.annotation.*; + +/** + * An annotation to identify a class as a GATK capability for documentation + * + * @author depristo + */ +@Documented +@Inherited +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface DocumentedGATKFeature { + public boolean enable() default true; + public String groupName(); + public String summary() default ""; + public Class handler() default GenericDocumentationHandler.class; + public Class[] extraDocs() default {}; +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java similarity index 52% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java rename to public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java index 2fe9b47f8..366df0c3a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/SpecificDifference.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java @@ -22,38 +22,38 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.walkers.diffengine; +package org.broadinstitute.sting.utils.help; + +import com.sun.javadoc.ClassDoc; +import com.sun.javadoc.RootDoc; + +import java.io.*; +import java.util.Set; /** - * Created by IntelliJ IDEA. - * User: depristo - * Date: 7/4/11 - * Time: 12:53 PM * - * Represents a specific difference between two specific DiffElements */ -public class SpecificDifference extends Difference { - DiffElement master, test; +public abstract class DocumentedGATKFeatureHandler { + private GATKDoclet doclet; - public SpecificDifference(DiffElement master, DiffElement test) { - super(createName(master, test)); - if ( master == null && test == null ) throw new IllegalArgumentException("Master and test both cannot be null"); - this.master = master; - this.test = test; + protected RootDoc getRootDoc() { + return this.doclet.rootDoc; } - public String toString() { - return String.format("%s:%s!=%s", - getPath(), - getOneLineString(master), - getOneLineString(test)); + public void setDoclet(GATKDoclet doclet) { + this.doclet = doclet; } - private static String createName(DiffElement master, DiffElement test) { - return (master == null ? test : master).fullyQualifiedName(); + public GATKDoclet getDoclet() { + return doclet; } - private static String getOneLineString(DiffElement elt) { - return elt == null ? "MISSING" : elt.getValue().toOneLineString(); + public boolean shouldBeProcessed(ClassDoc doc) { return true; } + + public String getDestinationFilename(ClassDoc doc) { + return HelpUtils.getClassName(doc).replace(".", "_") + ".html"; } + + public abstract String getTemplateName(ClassDoc doc) throws IOException; + public abstract void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcess, Set all); } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java new file mode 100644 index 000000000..65c6624d5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import com.sun.javadoc.ClassDoc; + +import java.util.HashMap; +import java.util.Map; + +/** +* Created by IntelliJ IDEA. +* User: depristo +* Date: 7/24/11 +* Time: 7:59 PM +* To change this template use File | Settings | File Templates. +*/ +public class GATKDocWorkUnit implements Comparable { + // known at the start + final String name, filename, group; + final DocumentedGATKFeatureHandler handler; + final ClassDoc classDoc; + final Class clazz; + final DocumentedGATKFeature annotation; + final String buildTimestamp, absoluteVersion; + + // set by the handler + String summary; + Map forTemplate; + + public GATKDocWorkUnit(String name, String filename, String group, + DocumentedGATKFeature annotation, DocumentedGATKFeatureHandler handler, + ClassDoc classDoc, Class clazz, + String buildTimestamp, String absoluteVersion) { + this.annotation = annotation; + this.name = name; + this.filename = filename; + this.group = group; + this.handler = handler; + this.classDoc = classDoc; + this.clazz = clazz; + this.buildTimestamp = buildTimestamp; + this.absoluteVersion = absoluteVersion; + } + + public void setHandlerContent(String summary, Map forTemplate) { + this.summary = summary; + this.forTemplate = forTemplate; + } + + public Map toMap() { + Map data = new HashMap(); + data.put("name", name); + data.put("summary", summary); + data.put("filename", filename); + data.put("group", group); + return data; + } + + public int compareTo(GATKDocWorkUnit other) { + return this.name.compareTo(other.name); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java new file mode 100644 index 000000000..49214237a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import com.sun.javadoc.ClassDoc; +import com.sun.javadoc.RootDoc; +import freemarker.template.Configuration; +import freemarker.template.DefaultObjectWrapper; +import freemarker.template.Template; +import freemarker.template.TemplateException; +import org.apache.commons.io.FileUtils; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.util.*; + +/** + * + */ +public class GATKDoclet { + final protected static File SETTINGS_DIR = new File("settings/helpTemplates"); + final protected static File DESTINATION_DIR = new File("gatkdocs"); + final protected static Logger logger = Logger.getLogger(GATKDoclet.class); + protected static String buildTimestamp = null, absoluteVersion = null; + protected static boolean showHiddenFeatures = false; + + RootDoc rootDoc; + + /** + * Extracts the contents of certain types of javadoc and adds them to an XML file. + * @param rootDoc The documentation root. + * @return Whether the JavaDoc run succeeded. + * @throws java.io.IOException if output can't be written. + */ + public static boolean start(RootDoc rootDoc) throws IOException { + logger.setLevel(Level.DEBUG); + // load arguments + for(String[] options: rootDoc.options()) { + if(options[0].equals("-build-timestamp")) + buildTimestamp = options[1]; + if (options[0].equals("-absolute-version")) + absoluteVersion = options[1]; + if (options[0].equals("-include-hidden")) + showHiddenFeatures = true; + } + + GATKDoclet doclet = new GATKDoclet(); + doclet.processDocs(rootDoc); + return true; + } + + /** + * Validate the given options against options supported by this doclet. + * @param option Option to validate. + * @return Number of potential parameters; 0 if not supported. + */ + public static int optionLength(String option) { + if(option.equals("-build-timestamp") || option.equals("-absolute-version") || option.equals("-include-hidden")) { + return 2; + } + return 0; + } + + public boolean showHiddenFeatures() { + return showHiddenFeatures; + } + + public Set workUnits() { + TreeSet m = new TreeSet(); + + for ( ClassDoc doc : rootDoc.classes() ) { + logger.debug("Considering " + doc); + Class clazz = getClassForClassDoc(doc); + + if ( clazz != null && clazz.getName().equals("org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance")) + logger.debug("foo"); + + DocumentedGATKFeature feature = getFeatureForClassDoc(doc); + DocumentedGATKFeatureHandler handler = createHandler(doc, feature); + if ( handler != null && handler.shouldBeProcessed(doc) ) { + logger.info("Going to generate documentation for class " + doc); + String filename = handler.getDestinationFilename(doc); + GATKDocWorkUnit unit = new GATKDocWorkUnit(doc.name(), + filename, feature.groupName(), + feature, handler, doc, clazz, + buildTimestamp, absoluteVersion); + m.add(unit); + } + } + + return m; + } + + protected void processDocs(RootDoc rootDoc) { + // setup the global access to the root + this.rootDoc = rootDoc; + + try { + // basic setup + DESTINATION_DIR.mkdirs(); + FileUtils.copyFile(new File(SETTINGS_DIR + "/style.css"), new File(DESTINATION_DIR + "/style.css")); + + /* ------------------------------------------------------------------- */ + /* You should do this ONLY ONCE in the whole application life-cycle: */ + + Configuration cfg = new Configuration(); + // Specify the data source where the template files come from. + cfg.setDirectoryForTemplateLoading(SETTINGS_DIR); + // Specify how templates will see the data-model. This is an advanced topic... + cfg.setObjectWrapper(new DefaultObjectWrapper()); + + Set myWorkUnits = workUnits(); + for ( GATKDocWorkUnit workUnit : myWorkUnits ) { + processDocWorkUnit(cfg, workUnit, myWorkUnits); + } + + processIndex(cfg, new ArrayList(myWorkUnits)); + } catch ( FileNotFoundException e ) { + throw new RuntimeException(e); + } catch ( IOException e ) { + throw new RuntimeException(e); + } + } + + private DocumentedGATKFeatureHandler createHandler(ClassDoc doc, DocumentedGATKFeature feature) { + try { + if ( feature != null ) { + if ( feature.enable() ) { + DocumentedGATKFeatureHandler handler = feature.handler().newInstance(); + handler.setDoclet(this); + return handler; + } else { + logger.info("Skipping disabled Documentation for " + doc); + } + } + } catch ( IllegalAccessException e) { + throw new RuntimeException(e); // the constructor is now private -- this is an error + } catch ( InstantiationException e) { + throw new RuntimeException(e); // the constructor is now private -- this is an error + } + + return null; + } + + private DocumentedGATKFeature getFeatureForClassDoc(ClassDoc doc) { + // todo -- what do I need the ? extends Object to pass the compiler? + Class docClass = getClassForClassDoc(doc); + if ( docClass != null && docClass.isAnnotationPresent(DocumentedGATKFeature.class) ) { + return docClass.getAnnotation(DocumentedGATKFeature.class); + } else { + return null; // not annotated so it shouldn't be documented + } + } + + private Class getClassForClassDoc(ClassDoc doc) { + try { + // todo -- what do I need the ? extends Object to pass the compiler? + return (Class)HelpUtils.getClassForDoc(doc); + } catch ( ClassNotFoundException e) { + //logger.warn("Couldn't find class for ClassDoc " + doc); + // we got a classdoc for a class we can't find. Maybe in a library or something + return null; + } catch ( NoClassDefFoundError e ) { + return null; + } catch ( UnsatisfiedLinkError e) { + return null; // naughty BWA bindings + } + } + + public static ClassDoc getClassDocForClass(RootDoc rootDoc, Class clazz) { + return rootDoc.classNamed(clazz.getName()); + } + + private void processIndex(Configuration cfg, List indexData) throws IOException { + /* Get or create a template */ + Template temp = cfg.getTemplate("generic.index.template.html"); + + /* Merge data-model with template */ + Writer out = new OutputStreamWriter(new FileOutputStream(new File(DESTINATION_DIR + "/index.html"))); + try { + temp.process(groupIndexData(indexData), out); + out.flush(); + } catch ( TemplateException e ) { + throw new ReviewedStingException("Failed to create GATK documentation", e); + } + } + + private Map groupIndexData(List indexData) { + // + // root -> data -> { summary -> y, filename -> z }, etc + // -> groups -> group1, group2, etc. + Map root = new HashMap(); + + Collections.sort(indexData); + + Set docFeatures = new HashSet(); + List> data = new ArrayList>(); + for ( GATKDocWorkUnit workUnit : indexData ) { + data.add(workUnit.toMap()); + docFeatures.add(workUnit.annotation); + } + + List> groups = new ArrayList>(); + for ( DocumentedGATKFeature feature : docFeatures ) { + groups.add(toMap(feature)); + } + + root.put("data", data); + root.put("groups", groups); + root.put("timestamp", buildTimestamp); + root.put("version", absoluteVersion); + + return root; + } + + private static final Map toMap(DocumentedGATKFeature annotation) { + Map root = new HashMap(); + root.put("name", annotation.groupName()); + root.put("summary", annotation.summary()); + return root; + } + + public final static GATKDocWorkUnit findWorkUnitForClass(Class c, Set all) { + for ( final GATKDocWorkUnit unit : all ) + if ( unit.clazz.equals(c) ) + return unit; + return null; + } + + private void processDocWorkUnit(Configuration cfg, GATKDocWorkUnit unit, Set all) + throws IOException { + //System.out.printf("Processing documentation for class %s%n", unit.classDoc); + + unit.handler.processOne(rootDoc, unit, all); + + // Get or create a template + Template temp = cfg.getTemplate(unit.handler.getTemplateName(unit.classDoc)); + + // Merge data-model with template + File outputPath = new File(DESTINATION_DIR + "/" + unit.filename); + try { + Writer out = new OutputStreamWriter(new FileOutputStream(outputPath)); + temp.process(unit.forTemplate, out); + out.flush(); + } catch ( TemplateException e ) { + throw new ReviewedStingException("Failed to create GATK documentation", e); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java new file mode 100644 index 000000000..c69345816 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -0,0 +1,372 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import com.google.java.contract.Requires; +import com.sun.javadoc.ClassDoc; +import com.sun.javadoc.FieldDoc; +import com.sun.javadoc.RootDoc; +import com.sun.javadoc.Tag; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.lang.reflect.Field; +import java.util.*; + +/** + * + */ +public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { + private static Logger logger = Logger.getLogger(GenericDocumentationHandler.class); + GATKDocWorkUnit toProcess; + ClassDoc classdoc; + Set all; + RootDoc rootDoc; + + @Override + public boolean shouldBeProcessed(ClassDoc doc) { + return true; +// try { +// Class type = HelpUtils.getClassForDoc(doc); +// return JVMUtils.isConcrete(type); +// } catch ( ClassNotFoundException e ) { +// return false; +// } + } + + + @Override + public String getTemplateName(ClassDoc doc) throws IOException { + return "generic.template.html"; + } + + @Override + public void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcessArg, Set allArg) { + this.rootDoc = rootDoc; + this.toProcess = toProcessArg; + this.all = allArg; + this.classdoc = toProcess.classDoc; + + //System.out.printf("%s class %s%n", toProcess.group, toProcess.classDoc); + Map root = new HashMap(); + + addHighLevelBindings(root); + addArgumentBindings(root); + addRelatedBindings(root); + + toProcess.setHandlerContent((String)root.get("summary"), root); + } + + protected void addHighLevelBindings(Map root) { + root.put("name", classdoc.name()); + + // Extract overrides from the doc tags. + StringBuilder summaryBuilder = new StringBuilder(); + for(Tag tag: classdoc.firstSentenceTags()) + summaryBuilder.append(tag.text()); + root.put("summary", summaryBuilder.toString()); + root.put("description", classdoc.commentText().substring(summaryBuilder.toString().length())); + root.put("timestamp", toProcess.buildTimestamp); + root.put("version", toProcess.absoluteVersion); + + for(Tag tag: classdoc.tags()) { + root.put(tag.name(), tag.text()); + } + } + + protected void addArgumentBindings(Map root) { + ParsingEngine parsingEngine = createStandardGATKParsingEngine(); + + // attempt to instantiate the class + Object instance = makeInstanceIfPossible(toProcess.clazz); + + Map> args = new HashMap>(); + root.put("arguments", args); + args.put("all", new ArrayList()); + args.put("required", new ArrayList()); + args.put("optional", new ArrayList()); + args.put("hidden", new ArrayList()); + args.put("depreciated", new ArrayList()); + try { + for ( ArgumentSource argumentSource : parsingEngine.extractArgumentSources(HelpUtils.getClassForDoc(classdoc)) ) { + ArgumentDefinition argDef = argumentSource.createArgumentDefinitions().get(0); + FieldDoc fieldDoc = getFieldDoc(classdoc, argumentSource.field.getName()); + Map argBindings = docForArgument(fieldDoc, argumentSource, argDef); // todo -- why can you have multiple ones? + if ( ! argumentSource.isHidden() || getDoclet().showHiddenFeatures() ) { + logger.debug(String.format("Processing %s", argumentSource)); + String kind = "optional"; + if ( argumentSource.isRequired() ) kind = "required"; + else if ( argumentSource.isHidden() ) kind = "hidden"; + else if ( argumentSource.isDeprecated() ) kind = "depreciated"; + + // get the value of the field + if ( instance != null ) { + Object value = getFieldValue(toProcess.clazz, instance, fieldDoc.name()); + if ( value != null ) + argBindings.put("defaultValue", prettyPrintValueString(value)); + } + + args.get(kind).add(argBindings); + args.get("all").add(argBindings); + } else { + logger.debug(String.format("Skipping hidden feature %s", argumentSource)); + } + } + } catch ( ClassNotFoundException e ) { + throw new RuntimeException(e); + } + } + + private Object getFieldValue(Class c, Object instance, String fieldName) { + Field field = JVMUtils.findField(c, fieldName); + if ( field != null ) { + Object value = JVMUtils.getFieldValue(field, instance); + //System.out.printf("Fetched value of field %s in class %s: %s%n", fieldName, c, value); + return value; + } else { + return findFieldValueInArgumentCollections(c, instance, fieldName); + } + } + + private Object findFieldValueInArgumentCollections(Class c, Object instance, String fieldName) { + for ( Field field : JVMUtils.getAllFields(c) ) { + if ( field.isAnnotationPresent(ArgumentCollection.class) ) { + //System.out.printf("Searching for %s in argument collection field %s%n", fieldName, field); + Object fieldValue = JVMUtils.getFieldValue(field, instance); + Object value = getFieldValue(fieldValue.getClass(), fieldValue, fieldName); + if ( value != null ) + return value; + } + } + + return null; + } + + /** + * Assumes value != null + * @param value + * @return + */ + private Object prettyPrintValueString(Object value) { + if ( value.getClass().isArray() ) { + Class type = value.getClass().getComponentType(); + if ( boolean.class.isAssignableFrom(type) ) + return Arrays.toString((boolean[])value); + if ( byte.class.isAssignableFrom(type) ) + return Arrays.toString((byte[])value); + if ( char.class.isAssignableFrom(type) ) + return Arrays.toString((char[])value); + if ( double.class.isAssignableFrom(type) ) + return Arrays.toString((double[])value); + if ( float.class.isAssignableFrom(type) ) + return Arrays.toString((float[])value); + if ( int.class.isAssignableFrom(type) ) + return Arrays.toString((int[])value); + if ( long.class.isAssignableFrom(type) ) + return Arrays.toString((long[])value); + if ( short.class.isAssignableFrom(type) ) + return Arrays.toString((short[])value); + if ( Object.class.isAssignableFrom(type) ) + return Arrays.toString((Object[])value); + else + throw new RuntimeException("Unexpected array type in prettyPrintValue. Value was " + value + " type is " + type); + } else + return value.toString(); + } + + private Object makeInstanceIfPossible(Class c) { + Object instance = null; + try { + // don't try to make something where we will obviously fail + if (! c.isEnum() && ! c.isAnnotation() && ! c.isAnonymousClass() && + ! c.isArray() && ! c.isPrimitive() & JVMUtils.isConcrete(c) ) { + instance = c.newInstance(); + //System.out.printf("Created object of class %s => %s%n", c, instance); + return instance; + } else + return null; + } + catch (IllegalAccessException e ) { } + catch (InstantiationException e ) { } + catch (ExceptionInInitializerError e ) { } + catch (SecurityException e ) { } + // this last one is super dangerous, but some of these methods catch ClassNotFoundExceptions + // and rethrow then as RuntimeExceptions + catch (RuntimeException e) {} + finally { + if ( instance == null ) + logger.warn(String.format("Unable to create instance of class %s => %s", c, instance)); + } + + return instance; + } + + protected void addRelatedBindings(Map root) { + List> extraDocsData = new ArrayList>(); + + // add in all of the explicitly related items + for ( final Class extraDocClass : toProcess.annotation.extraDocs() ) { + final GATKDocWorkUnit otherUnit = GATKDoclet.findWorkUnitForClass(extraDocClass, all); + if ( otherUnit == null ) + throw new ReviewedStingException("Requested extraDocs for class without any documentation: " + extraDocClass); + extraDocsData.add( + new HashMap(){{ + put("filename", otherUnit.filename); + put("name", otherUnit.name);}}); + + } + + List> hierarchyDocs = new ArrayList>(); + for (final GATKDocWorkUnit other : all ) { + final String relation = classRelationship(toProcess.clazz, other.clazz); + if ( relation != null ) + hierarchyDocs.add( + new HashMap(){{ + put("filename", other.filename); + put("relation", relation); + put("name", other.name);}}); + + } + + root.put("relatedDocs", hierarchyDocs); + root.put("extradocs", extraDocsData); + } + + private static final String classRelationship(Class me, Class other) { + if ( other.equals(me) ) + // no circular references + return null; + else if ( other.isAssignableFrom(me) ) + // toProcess is a superclass of other.clazz + return "superclass"; + else if ( me.isAssignableFrom(other) ) + // toProcess inherits from other.clazz + return "subclass"; + else + return null; + + } + + protected ParsingEngine createStandardGATKParsingEngine() { + CommandLineProgram clp = new CommandLineGATK(); + try { + CommandLineProgram.start(clp, new String[]{}, true); + return clp.parser; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private FieldDoc getFieldDoc(ClassDoc classDoc, String name) { + return getFieldDoc(classDoc, name, true); + } + + private FieldDoc getFieldDoc(ClassDoc classDoc, String name, boolean primary) { + //System.out.printf("Looking for %s in %s%n", name, classDoc.name()); + for ( FieldDoc fieldDoc : classDoc.fields(false) ) { + //System.out.printf("fieldDoc " + fieldDoc + " name " + fieldDoc.name()); + if ( fieldDoc.name().equals(name) ) + return fieldDoc; + + Field field = HelpUtils.getFieldForFieldDoc(fieldDoc); + if ( field.isAnnotationPresent(ArgumentCollection.class) ) { + ClassDoc typeDoc = getRootDoc().classNamed(fieldDoc.type().qualifiedTypeName()); + if ( typeDoc == null ) + throw new ReviewedStingException("Tried to get javadocs for ArgumentCollection field " + fieldDoc + " but could't find the class in the RootDoc"); + else { + FieldDoc result = getFieldDoc(typeDoc, name, false); + if ( result != null ) + return result; + // else keep searching + } + } + } + + // if we didn't find it here, wander up to the superclass to find the field + if ( classDoc.superclass() != null ) { + return getFieldDoc(classDoc.superclass(), name, false); + } + + if ( primary ) + throw new RuntimeException("No field found for expected field " + name); + else + return null; + } + + protected Map docForArgument(FieldDoc fieldDoc, ArgumentSource source, ArgumentDefinition def) { + Map root = new HashMap(); + root.put("name", def.shortName != null ? "-" + def.shortName : "--" + def.fullName ); + + if ( def.shortName != null && def.fullName != null ) + root.put("synonyms", "--" + def.fullName); + + root.put("required", def.required ? "yes" : "no"); + root.put("type", def.argumentType.getSimpleName()); + + // summary and fulltext + root.put("summary", def.doc != null ? def.doc : ""); + root.put("fulltext", fieldDoc.commentText()); + + List attributes = new ArrayList(); + // this one below is just too much. + //attributes.add(def.ioType.annotationClass.getSimpleName()); + if ( def.required ) attributes.add("required"); + // flag is just boolean, not interesting + //if ( def.isFlag ) attributes.add("flag"); + if ( def.isHidden ) attributes.add("hidden"); + if ( source.isDeprecated() ) attributes.add("depreciated"); + if ( attributes.size() > 0 ) + root.put("attributes", Utils.join(", ", attributes)); + + if ( def.validOptions != null ) { + root.put("options", docForEnumArgument(source.field.getType())); + } + + return root; + } + + @Requires("enumClass.isEnum()") + private List> docForEnumArgument(Class enumClass) { + ClassDoc doc = GATKDoclet.getClassDocForClass(rootDoc, enumClass); + if ( doc == null ) // || ! doc.isEnum() ) + throw new RuntimeException("Tried to get docs for enum " + enumClass + " but got instead: " + doc); + + List> bindings = new ArrayList>(); + for (final FieldDoc field : doc.fields(false) ) { + bindings.add( + new HashMap(){{ + put("name", field.name()); + put("summary", field.commentText());}}); + } + + return bindings; + } + +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java index 493f26e76..a9d71ef98 100755 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java @@ -25,22 +25,16 @@ package org.broadinstitute.sting.utils.help; +import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.ArgumentDefinition; import org.broadinstitute.sting.commandline.ArgumentDefinitionGroup; import org.broadinstitute.sting.commandline.ArgumentDefinitions; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.Utils; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; -import java.util.Formatter; -import java.util.List; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.Comparator; -import java.util.Collection; -import java.util.Collections; import java.text.DateFormat; import java.text.SimpleDateFormat; +import java.util.*; /** * Print out help for Sting command-line applications. */ diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java deleted file mode 100644 index b962664eb..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java +++ /dev/null @@ -1,93 +0,0 @@ -package org.broadinstitute.sting.utils.help; - -import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.Tag; - -import java.util.Map; - -/** - * Basic functionality for the help taglet. - * - * @author mhanna - * @version 0.1 - */ -public abstract class HelpTaglet implements Taglet { - /** - * Return the name of this custom tag. - */ - public abstract String getName(); - - /** - * Will return false since this tag cannot be applied - * to a field. - * @return false since this tag cannot be applied to a field. - */ - public boolean inField() { - return false; - } - - /** - * Will return false since by default, help tags cannot be applied to a constructor. - * @return false since by default, help tags cannot be applied to a constructor. - */ - public boolean inConstructor() { - return false; - } - - /** - * Will return false since by default, help tags cannot be applied to a method. - * @return false since by default, this tag cannot be applied to a method. - */ - public boolean inMethod() { - return false; - } - - /** - * Will return false since by default, help tags cannot be applied to an overview. - * @return false since by default, help tags cannot be applied to an overview. - */ - public boolean inOverview() { - return false; - } - - /** - * Will return false since by default, help tags cannot be applied to a package. - * description. - * @return false since by default, help tags cannot be applied to a package. - */ - public boolean inPackage() { - return false; - } - - /** - * Will return false since help tags are by default not inline. - * @return false since help tags are by default not inline. - */ - public boolean inType() { - return false; - } - - /** - * Will return false since help tags are by default not inline. - * @return false since help tags are by default not inline. - */ - public boolean isInlineTag() { - return false; - } - - /** - * Create a string representation of this tag. Since this tag is only - * used by the help system, don't output any HTML. - */ - public String toString(Tag tag) { - return null; - } - - /** - * Create a string representation of this tag. Since this tag is only - * used by the help system, don't output any HTML. - */ - public String toString(Tag[] tags) { - return null; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java new file mode 100644 index 000000000..4527c6afe --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import com.sun.javadoc.FieldDoc; +import com.sun.javadoc.PackageDoc; +import com.sun.javadoc.ProgramElementDoc; +import org.broadinstitute.sting.utils.classloader.JVMUtils; + +import java.lang.reflect.Field; + +public class HelpUtils { + protected static boolean implementsInterface(ProgramElementDoc classDoc, Class... interfaceClasses) { + for (Class interfaceClass : interfaceClasses) + if (assignableToClass(classDoc, interfaceClass, false)) + return true; + return false; + } + + protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) { + try { + Class type = getClassForDoc(classDoc); + return lhsClass.isAssignableFrom(type) && (!requireConcrete || JVMUtils.isConcrete(type)); + } catch (Throwable t) { + // Ignore errors. + return false; + } + } + + protected static Class getClassForDoc(ProgramElementDoc doc) throws ClassNotFoundException { + return Class.forName(getClassName(doc)); + } + + protected static Field getFieldForFieldDoc(FieldDoc fieldDoc) { + try { + Class clazz = getClassForDoc(fieldDoc.containingClass()); + return JVMUtils.findField(clazz, fieldDoc.name()); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + /** + * Reconstitute the class name from the given class JavaDoc object. + * + * @param doc the Javadoc model for the given class. + * @return The (string) class name of the given class. + */ + protected static String getClassName(ProgramElementDoc doc) { + PackageDoc containingPackage = doc.containingPackage(); + return containingPackage.name().length() > 0 ? + String.format("%s.%s", containingPackage.name(), doc.name()) : + String.format("%s", doc.name()); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java index 4afac69c3..a28a7bcee 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java @@ -26,15 +26,12 @@ package org.broadinstitute.sting.utils.help; import com.sun.javadoc.*; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.Utils; import java.io.*; import java.util.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.JVMUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.gatk.walkers.Walker; - /** * Extracts certain types of javadoc (specifically package and class descriptions) and makes them available * to applications at runtime. @@ -46,17 +43,21 @@ public class ResourceBundleExtractorDoclet { /** * Taglet for the particular version number. */ - private static final String VERSION_TAGLET_NAME = "version"; + public static final String VERSION_TAGLET_NAME = "version"; + public static final String SUMMARY_TAGLET_NAME = "help.summary"; + public static final String DESCRIPTION_TAGLET_NAME = "help.description"; /** * Maintains a collection of resources in memory as they're accumulated. */ - private static final Properties resourceText = new Properties(); + protected final Properties resourceText = new Properties(); /** * Maintains a collection of classes that should really be documented. */ - private static final Set undocumentedWalkers = new HashSet(); + protected final Set undocumentedWalkers = new HashSet(); + + protected String buildTimestamp = null, absoluteVersion = null; /** * Extracts the contents of certain types of javadoc and adds them to an XML file. @@ -65,26 +66,38 @@ public class ResourceBundleExtractorDoclet { * @throws IOException if output can't be written. */ public static boolean start(RootDoc rootDoc) throws IOException { + ResourceBundleExtractorDoclet doclet = new ResourceBundleExtractorDoclet(); + PrintStream out = doclet.loadData(rootDoc, true); + doclet.processDocs(rootDoc, out); + return true; + } + + protected PrintStream loadData(RootDoc rootDoc, boolean overwriteResourcesFile) { PrintStream out = System.out; - String buildTimestamp = null, versionPrefix = null, versionSuffix = null, absoluteVersion = null; for(String[] options: rootDoc.options()) { if(options[0].equals("-out")) { - loadExistingResourceFile(options[1], rootDoc); - out = new PrintStream(options[1]); + try { + loadExistingResourceFile(options[1], rootDoc); + if ( overwriteResourcesFile ) + out = new PrintStream(options[1]); + } catch ( FileNotFoundException e ) { + throw new RuntimeException(e); + } catch ( IOException e ) { + throw new RuntimeException(e); + } } if(options[0].equals("-build-timestamp")) buildTimestamp = options[1]; - if(options[0].equals("-version-prefix")) - versionPrefix = options[1]; - if(options[0].equals("-version-suffix")) - versionSuffix = options[1]; if (options[0].equals("-absolute-version")) absoluteVersion = options[1]; } resourceText.setProperty("build.timestamp",buildTimestamp); + return out; + } + protected void processDocs(RootDoc rootDoc, PrintStream out) { // Cache packages as we see them, since there's no direct way to iterate over packages. Set packages = new HashSet(); @@ -95,13 +108,19 @@ public class ResourceBundleExtractorDoclet { if(isRequiredJavadocMissing(currentClass) && isWalker(currentClass)) undocumentedWalkers.add(currentClass.name()); - renderHelpText(getClassName(currentClass),currentClass,versionPrefix,versionSuffix,absoluteVersion); + renderHelpText(HelpUtils.getClassName(currentClass),currentClass); } for(PackageDoc currentPackage: packages) - renderHelpText(currentPackage.name(),currentPackage,versionPrefix,versionSuffix,absoluteVersion); + renderHelpText(currentPackage.name(),currentPackage); - resourceText.store(out,"Strings displayed by the Sting help system"); + try { + resourceText.store(out,"Strings displayed by the Sting help system"); + } catch ( FileNotFoundException e ) { + throw new RuntimeException(e); + } catch ( IOException e ) { + throw new RuntimeException(e); + } // ASCII codes for making text blink final String blink = "\u001B\u005B\u0035\u006D"; @@ -109,8 +128,6 @@ public class ResourceBundleExtractorDoclet { if(undocumentedWalkers.size() > 0) Utils.warnUser(String.format("The following walkers are currently undocumented: %s%s%s", blink, Utils.join(" ",undocumentedWalkers), reset)); - - return true; } /** @@ -119,7 +136,7 @@ public class ResourceBundleExtractorDoclet { * @return Number of potential parameters; 0 if not supported. */ public static int optionLength(String option) { - if(option.equals("-build-timestamp") || option.equals("-version-prefix") || option.equals("-version-suffix") || option.equals("-out") || option.equals("-absolute-version") ) { + if(option.equals("-build-timestamp") || option.equals("-out") || option.equals("-absolute-version") ) { return 2; } return 0; @@ -135,7 +152,7 @@ public class ResourceBundleExtractorDoclet { * @throws IOException if there is an I/O-related error other than FileNotFoundException * while attempting to read the resource file. */ - private static void loadExistingResourceFile( String resourceFileName, RootDoc rootDoc ) throws IOException { + private void loadExistingResourceFile( String resourceFileName, RootDoc rootDoc ) throws IOException { try { BufferedReader resourceFile = new BufferedReader(new FileReader(resourceFileName)); try { @@ -155,27 +172,8 @@ public class ResourceBundleExtractorDoclet { * @param classDoc the type of the given class. * @return True if the class of the given name is a walker. False otherwise. */ - private static boolean isWalker(ClassDoc classDoc) { - try { - Class type = Class.forName(getClassName(classDoc)); - return Walker.class.isAssignableFrom(type) && JVMUtils.isConcrete(type); - } - catch(Throwable t) { - // Ignore errors. - return false; - } - } - - /** - * Reconstitute the class name from the given class JavaDoc object. - * @param classDoc the Javadoc model for the given class. - * @return The (string) class name of the given class. - */ - private static String getClassName(ClassDoc classDoc) { - PackageDoc containingPackage = classDoc.containingPackage(); - return containingPackage.name().length() > 0 ? - String.format("%s.%s",containingPackage.name(),classDoc.name()) : - String.format("%s",classDoc.name()); + protected static boolean isWalker(ClassDoc classDoc) { + return HelpUtils.assignableToClass(classDoc, Walker.class, true); } /** @@ -184,8 +182,6 @@ public class ResourceBundleExtractorDoclet { * @return True if the JavaDoc is missing. False otherwise. */ private static boolean isRequiredJavadocMissing(ClassDoc classDoc) { - if(classDoc.containingPackage().name().contains("oneoffprojects")) - return false; return classDoc.commentText().length() == 0 || classDoc.commentText().contains("Created by IntelliJ"); } @@ -193,53 +189,23 @@ public class ResourceBundleExtractorDoclet { * Renders all the help text required for a given name. * @param elementName element name to use as the key * @param element Doc element to process. - * @param versionPrefix Text to add to the start of the version string. - * @param versionSuffix Text to add to the end of the version string. */ - private static void renderHelpText(String elementName, Doc element, String versionPrefix, String versionSuffix, String absoluteVersion) { - // Extract overrides from the doc tags. - String name = null; - String version = null; + private void renderHelpText(String elementName, Doc element) { StringBuilder summaryBuilder = new StringBuilder(); for(Tag tag: element.firstSentenceTags()) summaryBuilder.append(tag.text()); String summary = summaryBuilder.toString(); String description = element.commentText(); - for(Tag tag: element.tags()) { - if(tag.name().equals("@"+DisplayNameTaglet.NAME)) { - if(name != null) - throw new ReviewedStingException("Only one display name tag can be used per package / walker."); - name = tag.text(); - } - else if(tag.name().equals("@"+VERSION_TAGLET_NAME)) { - if ( absoluteVersion != null ) { - version = absoluteVersion; - } - else { - version = String.format("%s%s%s", (versionPrefix != null) ? versionPrefix : "", - tag.text(), - (versionSuffix != null) ? versionSuffix : ""); - } - } - else if(tag.name().equals("@"+SummaryTaglet.NAME)) - summary = tag.text(); - else if(tag.name().equals("@"+DescriptionTaglet.NAME)) - description = tag.text(); - } - - // Write out an alternate element name, if exists. - if(name != null) - resourceText.setProperty(String.format("%s.%s",elementName,DisplayNameTaglet.NAME),name); - - if(version != null) - resourceText.setProperty(String.format("%s.%s",elementName,VERSION_TAGLET_NAME),version); + // this might seem unnecessary, but the GATK command line program uses this tag to determine the version when running + if(absoluteVersion != null) + resourceText.setProperty(String.format("%s.%s",elementName,VERSION_TAGLET_NAME),absoluteVersion); // Write out an alternate element summary, if exists. - resourceText.setProperty(String.format("%s.%s",elementName,SummaryTaglet.NAME),formatText(summary)); + resourceText.setProperty(String.format("%s.%s",elementName,SUMMARY_TAGLET_NAME),formatText(summary)); // Write out an alternate description, if present. - resourceText.setProperty(String.format("%s.%s",elementName,DescriptionTaglet.NAME),formatText(description)); + resourceText.setProperty(String.format("%s.%s",elementName,DESCRIPTION_TAGLET_NAME),formatText(description)); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java deleted file mode 100644 index 036bbec4f..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.broadinstitute.sting.utils.help; - -import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.Tag; - -import java.util.Map; - -/** - * Provide an alternate brief summary for this walker / package. - * Acts as an alternative to the first sentence employed by default. - * @author mhanna - * @version 0.1 - */ -public class SummaryTaglet extends HelpTaglet { - /** - * The key tag for this taglet. - */ - public static final String NAME = "help.summary"; - - /** - * Return the name of this custom tag. - */ - @Override - public String getName() { - return NAME; - } - - /** - * Will return false since overviews are always named - * by the @WalkerName tag. - * @return false always - */ - @Override - public boolean inOverview() { - return true; - } - - /** - * Will return true to indicate that packages can be given useful summary. - * @return true always - */ - @Override - public boolean inPackage() { - return true; - } - - /** - * Register this Taglet. - * @param tagletMap the map to register this tag to. - */ - public static void register(Map tagletMap) { - SummaryTaglet tag = new SummaryTaglet(); - Taglet t = (Taglet)tagletMap.get(tag.getName()); - if (t != null) { - tagletMap.remove(tag.getName()); - } - tagletMap.put(tag.getName(), tag); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/instrumentation/Sizeof.java b/public/java/src/org/broadinstitute/sting/utils/instrumentation/Sizeof.java index 3420c9876..73a29ba4f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/instrumentation/Sizeof.java +++ b/public/java/src/org/broadinstitute/sting/utils/instrumentation/Sizeof.java @@ -27,9 +27,9 @@ package org.broadinstitute.sting.utils.instrumentation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.lang.instrument.Instrumentation; +import java.lang.reflect.Array; import java.lang.reflect.Field; import java.lang.reflect.Modifier; -import java.lang.reflect.Array; import java.util.IdentityHashMap; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java index e722ac196..988240ef9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java @@ -25,16 +25,16 @@ package org.broadinstitute.sting.utils.interval; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import org.broadinstitute.sting.gatk.refdata.utils.StringToGenomeLocIteratorAdapter; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; -import org.broadinstitute.sting.gatk.refdata.utils.StringToGenomeLocIteratorAdapter; -import java.util.Iterator; import java.io.File; import java.io.FileNotFoundException; +import java.util.Iterator; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 80dc35455..f551e1368 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -5,17 +5,17 @@ import net.sf.picard.util.IntervalList; import net.sf.samtools.SAMFileHeader; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.bed.BedParser; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; +import java.io.File; import java.io.IOException; import java.util.*; -import java.io.File; /** * Parse text representations of interval strings that diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIterator.java b/public/java/src/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIterator.java index 31a2f41fb..7e87ce8b5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIterator.java @@ -26,11 +26,10 @@ package org.broadinstitute.sting.utils.interval; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.PriorityQueue; import java.util.Iterator; +import java.util.PriorityQueue; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java b/public/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java index 0b63d582e..29ffb13e4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java @@ -25,9 +25,8 @@ package org.broadinstitute.sting.utils.interval; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import org.broadinstitute.sting.utils.GenomeLoc; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index 543302446..3821c9c8a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -24,19 +24,16 @@ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.gatk.iterators.IterableIterator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; import java.util.*; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.exceptions.StingException; - /** * A generic implementation of read-backed pileups. * diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java index b6f3e9f09..26e66014c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java @@ -1,8 +1,5 @@ package org.broadinstitute.sting.utils.pileup; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.BaseUtils; import net.sf.samtools.SAMRecord; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java index 6c855c1c7..f7d237401 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java @@ -1,6 +1,9 @@ package org.broadinstitute.sting.utils.pileup; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; /** * An easy to access fragment-based pileup, which contains two separate pileups. The first diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java b/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java index d8af2ea8f..7005cf869 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java @@ -27,9 +27,9 @@ package org.broadinstitute.sting.utils.pileup; import net.sf.picard.util.PeekableIterator; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import java.util.PriorityQueue; import java.util.Comparator; import java.util.Iterator; +import java.util.PriorityQueue; /** * Merges multiple pileups broken down by sample. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 51e02bf74..66e1afecb 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -1,9 +1,10 @@ package org.broadinstitute.sting.utils.pileup; -import org.broadinstitute.sting.utils.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.sam.ReadUtils; -import com.google.java.contract.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java index a32aa5645..8d43a368a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java @@ -24,15 +24,13 @@ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; -import java.util.Iterator; -import java.util.List; import java.util.Collection; - -import net.sf.samtools.SAMRecord; +import java.util.List; /** * A clean interface for working with extended event pileups. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java index a1a08c95f..31d29430a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java @@ -23,15 +23,14 @@ */ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.*; -import net.sf.samtools.SAMRecord; - public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup implements ReadBackedExtendedEventPileup { private int nInsertions; private int maxDeletionLength; // cached value of the length of the longest deletion observed at the site diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index c52cc0b52..36b8a8c65 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -24,14 +24,13 @@ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.iterators.IterableIterator; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.HasGenomeLocation; -import java.util.List; import java.util.Collection; +import java.util.List; /** * A data retrieval interface for accessing parts of the pileup. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java index 70eba577c..e5b054961 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java @@ -23,9 +23,9 @@ */ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.GenomeLoc; -import net.sf.samtools.SAMRecord; import java.util.List; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 4e4294b20..344eccb83 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -25,15 +25,16 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMRecord; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.ArrayList; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java index 5812c9aec..5f7db458a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java @@ -1,15 +1,13 @@ package org.broadinstitute.sting.utils.sam; -import org.broadinstitute.sting.gatk.traversals.TraversalEngine; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.apache.log4j.Logger; - -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.traversals.TraversalEngine; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; /* diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java index ce6ca570c..adf60b16b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java @@ -1,15 +1,17 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; - -import java.io.InputStream; -import java.io.ByteArrayInputStream; -import java.io.UnsupportedEncodingException; -import java.util.*; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; /** * User: hanna * Date: Jun 11, 2009 diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java index b683f5247..62e371bc0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java @@ -1,9 +1,8 @@ package org.broadinstitute.sting.utils.sam; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.ReadProperties; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java index 4f5dcca61..2b359fe6a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java @@ -1,13 +1,12 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.List; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - /* * Copyright (c) 2009 The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java index 01f1dfe96..31deb7535 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java @@ -26,8 +26,6 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; public class ComparableSAMRecord implements Comparable { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java index 2b8ac387c..c7ffcab0c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.*; +import net.sf.samtools.SAMReadGroupRecord; /** * @author ebanks diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index f3d9edab9..517f9f75d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -1,12 +1,14 @@ package org.broadinstitute.sting.utils.sam; -import java.lang.reflect.Method; -import java.util.*; - import net.sf.samtools.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + /** * @author ebanks * GATKSAMRecord diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 080762039..6c15910b1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -25,13 +25,14 @@ package org.broadinstitute.sting.utils.sam; -import com.google.java.contract.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.samtools.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; import java.io.File; +import java.util.*; /** * A miscellaneous collection of utilities for working with SAM files, headers, etc. diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java b/public/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java index f8a0ca6a7..bb9db5d98 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java @@ -26,11 +26,10 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMFileReader; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - /** * Allows the user to steadily accumulate information about what * components go into a SAM file writer, ultimately using this diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java b/public/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java index df2010e8b..60e1d9948 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java @@ -3,13 +3,6 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.util.Iterator; -import java.util.NoSuchElementException; /** * XXX diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index 9beb7895b..f6aa882ad 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.utils.text; import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.commandline.Tags; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; diff --git a/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java index 803d6ac0f..1d4251542 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java @@ -28,11 +28,11 @@ package org.broadinstitute.sting.utils.text; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; -import java.util.regex.Pattern; -import java.util.regex.Matcher; -import java.io.StringReader; import java.io.IOException; +import java.io.StringReader; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Common utilities for dealing with text formatting. diff --git a/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java b/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java index 064256388..52b6f3b01 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.utils.text; -import java.util.Iterator; -import java.util.List; -import java.util.LinkedList; import java.io.*; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; /** * Support for Python-like xreadlines() function as a class. This is an iterator and iterable over diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/ClosableReentrantLock.java b/public/java/src/org/broadinstitute/sting/utils/threading/ClosableReentrantLock.java deleted file mode 100644 index d16c19130..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/ClosableReentrantLock.java +++ /dev/null @@ -1,16 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import java.util.concurrent.locks.ReentrantLock; - -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: 1/19/11 - * Time: 9:50 AM - * - * Simple extension of a ReentrantLock that supports a close method. - */ -public class ClosableReentrantLock extends ReentrantLock { - public boolean ownsLock() { return super.isHeldByCurrentThread(); } - public void close() {} -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java b/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java deleted file mode 100644 index cae099eeb..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java +++ /dev/null @@ -1,118 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.*; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.locks.ReentrantLock; - -/** - * Keeps a copy of the processing locks in a file - */ -public class FileBackedGenomeLocProcessingTracker extends GenomeLocProcessingTracker { - private static final Logger logger = Logger.getLogger(FileBackedGenomeLocProcessingTracker.class); - private static final boolean DEBUG = false; - private static final String READ_MODE = "r"; - private static final String WRITE_MODE = "rws"; - - private final File sharedFile; - private final GenomeLocParser parser; - private long lastReadPosition = 0; - - public FileBackedGenomeLocProcessingTracker(File sharedFile, GenomeLocParser parser, ClosableReentrantLock lock, PrintStream status) { - super(lock, status); - - this.sharedFile = sharedFile; - this.parser = parser; - } - - private RandomAccessFile openFile(String mode) { - try { - return new RandomAccessFile(sharedFile, mode); - } catch (FileNotFoundException e) { - throw new UserException.CouldNotCreateOutputFile(sharedFile, e); - } - } - - private void closeFile(RandomAccessFile raFile) { - try { - if ( raFile != null ) raFile.close(); - } catch (IOException e) { - throw new UserException.CouldNotCreateOutputFile(sharedFile, e); - } - } - - @Override - protected List readNewLocs() { - List newPLocs = new ArrayList(); // todo -- gratitous object creation - - if ( sharedFile.exists() ) { - RandomAccessFile raFile = null; - try { - raFile = openFile(READ_MODE); - //logger.warn(String.format("Reading new locs at: file.length=%d last=%d", raFile.length(), lastReadPosition)); - if ( raFile.length() > lastReadPosition ) { - raFile.seek(lastReadPosition); - - int counter = 0; - String line = raFile.readLine(); // Read another line - while ( line != null ) { - String[] parts = line.split(" "); - if ( parts.length != 2 ) throw new ReviewedStingException("BUG: bad sharedFile line '" + line + "' at " + raFile.getFilePointer()); - ProcessingLoc ploc = new ProcessingLoc(parser.parseGenomeLoc(parts[0]), parts[1]); - //logger.warn(" Read " + ploc); - newPLocs.add(ploc); - line = raFile.readLine(); - counter++; - } - lastReadPosition = raFile.getFilePointer(); - if ( DEBUG ) logger.warn(String.format("Read %s locs from file, current pos is %d, # read new locs is %d", - counter, lastReadPosition, newPLocs.size())); - } - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(sharedFile, e); - } catch (IOException e) { - throw new ReviewedStingException("Couldn't read sharedFile " + sharedFile, e); - } finally { - closeFile(raFile); - } - } - - return newPLocs; - } - - @Override - protected void registerNewLocs(Collection plocs) { - RandomAccessFile raFile = null; - - try { - raFile = openFile(WRITE_MODE); - long startPos = raFile.getFilePointer(); - raFile.seek(raFile.length()); - StringBuffer bytes = new StringBuffer(); - for ( ProcessingLoc ploc : plocs ) { - String packet = String.format("%s %s%n", ploc.getLocation(), ploc.getOwner()); - bytes.append(packet); - if ( DEBUG ) logger.warn(String.format("Wrote loc %s to file: %d + %d bytes ending at %d", ploc, startPos, packet.length(), raFile.getFilePointer())); - } - raFile.write(bytes.toString().getBytes()); - //raFile.getChannel().force(true); - } catch (FileNotFoundException e) { - throw new UserException.CouldNotCreateOutputFile(sharedFile, e); - } catch (IOException e) { - throw new UserException.CouldNotCreateOutputFile(sharedFile, e); - } finally { - closeFile(raFile); - } - } -} - - diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTracker.java b/public/java/src/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTracker.java deleted file mode 100644 index e97a73fb8..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTracker.java +++ /dev/null @@ -1,486 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.SimpleTimer; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.PrintStream; -import java.text.SimpleDateFormat; -import java.util.*; - -/** - * Abstract base class to coordinating data processing by a collecting for processes / threads. - * - * Conceptually, the genome is viewed as a collection of non-overlapping genome location: - * - * chr1:1-10 - * chr1:11-20 - * chr1:21-30 - * etc. - * - * This class, and it's concrete derived classes, provide the ability to claim individual locations - * as "mine", and exclude other processes / threads from processing them. At the lowest-level this - * is implemented by the claimOwnership(loc, name) function, that returns true if loc free (unclaimed) - * and makes name the owner of loc. High-level, and more efficient operations provide claiming - * iterators over streams of objects implementing the HasGenomeLocation interface, so that you can - * write code that looks like: - * - * for ( GenomeLoc ownedLoc : onlyOwned(allLocsToProcess.iterator) ) { - * doSomeWork(ownedLoc) - * - * Much of the code in this class is actually surrounding debugging and performance metrics code. - * The actual synchronization code is separated out into the ClosableReentrantLock() system - * and the two abstract functions: - * - * protected abstract void registerNewLocs(Collection plocs); - * protected abstract Collection readNewLocs(); - * - * That maintain the state of the tracker. - * - * That is, the ProcessingTracker is made of two components: a thread / process locking system and - * a subclass that implements the methods to record new claimed state changes and to read out updates - * that may have occurred by another thread or process. - * - * NOTE: this class assumes that all threads / processes are working with the same set of potential - * GenomeLocs to own. Claiming chr1:1-10 and then chr1:5-6 is allowed by the system. Basically, - * you only can stake claim to GenomeLocs that are .equal(). - */ -public abstract class GenomeLocProcessingTracker { - private final static Logger logger = Logger.getLogger(FileBackedGenomeLocProcessingTracker.class); - private final static SimpleDateFormat STATUS_FORMAT = new SimpleDateFormat("HH:mm:ss,SSS"); - private final static int DEFAULT_OWNERSHIP_ITERATOR_SIZE = 1; - - /** - * Useful state strings for printing status - */ - private final static String GOING_FOR_LOCK = "going_for_lock"; - private final static String RELEASING_LOCK = "releasing_lock"; - private final static String HAVE_LOCK = "have_lock"; - private final static String RUNNING = "running"; - - /** - * A map, for efficiency, that allows quick lookup of the processing loc for a - * given GenomeLoc. The map points from loc -> loc / owner as a ProcessingLoc - */ - private final Map processingLocs; - - /** - * The locking object used to protect data from simulatanous access by multiple - * threads or processes. - */ - private final ClosableReentrantLock lock; - - /** A stream for writing status messages. Can be null if we aren't writing status */ - private final PrintStream status; - - // - // Timers for recording performance information - // Note -- these cannot be used because this class isn't thread safe, and neither are the - // timers, so they result in invalid operations w.r.t. the SimpleTimer contract - // -// protected final SimpleTimer writeTimer = new SimpleTimer("writeTimer"); -// protected final SimpleTimer readTimer = new SimpleTimer("readTimer"); -// protected final SimpleTimer lockWaitTimer = new SimpleTimer("lockWaitTimer"); - protected final SimpleTimer timer = new SimpleTimer(); - protected long nLocks = 0, nWrites = 0, nReads = 0; - - // -------------------------------------------------------------------------------- - // - // Creating ProcessingTrackers - // - // -------------------------------------------------------------------------------- - public GenomeLocProcessingTracker(ClosableReentrantLock lock, PrintStream status) { - this.processingLocs = new HashMap(); - this.status = status; - this.lock = lock; - printStatusHeader(); - } - - // -------------------------------------------------------------------------------- - // - // Code to override to change the dynamics of the the GenomeLocProcessingTracker - // - // -------------------------------------------------------------------------------- - - protected void close() { - lock.close(); - if ( status != null ) status.close(); - } - - /** - * Takes a collection of newly claimed (i.e., previous unclaimed) genome locs - * and the name of their owner and "registers" this data in some persistent way that's - * visible to all threads / processes communicating via this GenomeLocProcessingTracker. - * - * Could be a in-memory data structure (a list) if we are restricting ourselves to intra-memory - * parallelism, a locked file on a shared file system, or a server we communicate with. - * - * @param plocs - */ - protected abstract void registerNewLocs(Collection plocs); - - /** - * The inverse of the registerNewLocs() function. Looks at the persistent data store - * shared by all threads / processes and returns the ones that have appeared since the last - * call to readNewLocs(). Note that we expect the pair of registerNewLocs and readNewLocs to - * include everything, even locs registered by this thread / process. For example: - * - * readNewLocs() => List() - * registerNewLocs(List(x, y,)) => void - * readNewLocs() => List(x,y)) - * - * even for this thread or process. - * @return - */ - protected abstract Collection readNewLocs(); - - - // -------------------------------------------------------------------------------- - // - // Code to claim intervals for processing and query for their ownership - // - // -------------------------------------------------------------------------------- - - /** - * Queries the current database if a location is owned. Does not guarantee that the - * loc can be owned in a future call, though. - * - * @param loc - * @return - */ - public final boolean locIsOwned(GenomeLoc loc, String id) { - return findOwner(loc, id) != null; - } - - /** - * The workhorse routine. Attempt to claim processing ownership of loc, with my name. - * This is an atomic operation -- other threads / processes will wait until this function - * returns. The return result is the ProcessingLoc object describing who owns this - * location. If the location isn't already claimed and we now own the location, the pl owner - * will be myName. Otherwise, the name of the owner can found in the pl. - * - * @param loc - * @param myName - * @return - */ - public final ProcessingLoc claimOwnership(final GenomeLoc loc, final String myName) { - // processingLocs is a shared memory synchronized object, and this - // method is synchronized, so we can just do our processing - return new WithLock(myName) { - public ProcessingLoc doBody() { - ProcessingLoc owner = findOwner(loc, myName); - if ( owner == null ) { // we are unowned - owner = new ProcessingLoc(loc, myName); - registerNewLocsWithTimers(Arrays.asList(owner), myName); - } - return owner; - } - }.run(); - } - - - // -------------------------------------------------------------------------------- - // - // High-level iterator-style interface to claiming ownership - // - // -------------------------------------------------------------------------------- - - /** - * A higher-level, and more efficient, interface to obtain the next location we own. Takes an - * iterator producing objects that support the getLocation() interface, and returns the next - * object in that stream that we can claim ownership of. Returns null if we run out of elements - * during the iteration. - * - * Can be more efficiently implemented in subclasses to avoid multiple unlocking - * - * @param iterator - * @param myName - * @return - */ - public final T claimOwnershipOfNextAvailable(Iterator iterator, String myName) { - OwnershipIterator myIt = new OwnershipIterator(iterator, myName, 1); - return myIt.next(); - } - - public final Iterable onlyOwned(Iterator iterator, String myName) { - return new OwnershipIterator(iterator, myName); - } - - private final class OwnershipIterator implements Iterator, Iterable { - private final Iterator subit; - private final String myName; - private final Queue cache; - private final int cacheSize; - - public OwnershipIterator(Iterator subit, String myName) { - this(subit, myName, DEFAULT_OWNERSHIP_ITERATOR_SIZE); - } - - public OwnershipIterator(Iterator subit, String myName, int cacheSize) { - this.subit = subit; - this.myName = myName; - cache = new LinkedList(); - this.cacheSize = cacheSize; - } - - /** - * Will return true for all elements of subit, even if we can't get ownership of some of the future - * elements and so will return null there - * @return - */ - public final boolean hasNext() { - return cache.peek() != null || subit.hasNext(); - } - - /** - * High performance iterator that only locks and unlocks once per claimed object found. Avoids - * locking / unlocking for each query - * - * @return an object of type T owned by this thread, or null if none of the remaining object could be claimed - */ - public final T next() { - if ( cache.peek() != null) - return cache.poll(); - else { - // cache is empty, we need to fill up the cache and return the first element of the queue - return new WithLock(myName) { - public T doBody() { - // read once the database of owners at the start - updateAndGetProcessingLocs(myName); - - boolean done = false; - Queue pwns = new LinkedList(); // ;-) - while ( !done && cache.size() < cacheSize && subit.hasNext() ) { - final T elt = subit.next(); - GenomeLoc loc = elt.getLocation(); - - ProcessingLoc owner = processingLocs.get(loc); - - if ( owner == null ) { // we are unowned - owner = new ProcessingLoc(loc, myName); - pwns.offer(owner); - if ( ! cache.offer(elt) ) throw new ReviewedStingException("Cache offer unexpectedly failed"); - if ( GenomeLoc.isUnmapped(loc) ) done = true; - } - // if not, we continue our search - } - - registerNewLocsWithTimers(pwns, myName); - - // we've either filled up the cache or run out of elements. Either way we return - // the first element of the cache. If the cache is empty, we return null here. - return cache.poll(); - } - }.run(); - } - } - - public final void remove() { - throw new UnsupportedOperationException(); - } - - public final Iterator iterator() { - return this; - } - } - - // -------------------------------------------------------------------------------- - // - // private / protected low-level accessors / manipulators and utility functions - // - // -------------------------------------------------------------------------------- - - /** - * Useful debugging function that returns the ProcessingLoc who owns loc. ID - * is provided for debugging purposes - * @param loc - * @param id - * @return - */ - protected final ProcessingLoc findOwner(GenomeLoc loc, String id) { - // fast path to check if we already have the existing genome loc in memory for ownership claims - // getProcessingLocs() may be expensive [reading from disk, for example] so we shouldn't call it - // unless necessary - ProcessingLoc x = processingLocs.get(loc); - return x == null ? updateAndGetProcessingLocs(id).get(loc) : x; - } - - /** - * Returns the list of currently owned locations, updating the database as necessary. - * DO NOT MODIFY THIS MAP! As with all parallelizing data structures, the list may be - * out of date immediately after the call returns, or may be updating on the fly. - * @return - */ - protected final Map updateAndGetProcessingLocs(String myName) { - return new WithLock>(myName) { - public Map doBody() { -// readTimer.restart(); - for ( ProcessingLoc p : readNewLocs() ) - processingLocs.put(p.getLocation(), p); -// readTimer.stop(); - nReads++; - return processingLocs; - } - }.run(); - } - - /** - * Wrapper around registerNewLocs that also times the operation - * - * @param plocs - * @param myName - */ - protected final void registerNewLocsWithTimers(Collection plocs, String myName) { -// writeTimer.restart(); - registerNewLocs(plocs); - nWrites++; -// writeTimer.stop(); - } - - private final void printStatusHeader() { - if ( status != null ) status.printf("process.id\thr.time\ttime\tstate%n"); - } - - private final void printStatus(String id, long machineTime, String state) { - // prints a line like processID human-readable-time machine-time state - if ( status != null ) { - status.printf("%s\t%s\t%d\t%s%n", id, STATUS_FORMAT.format(machineTime), machineTime, state); - status.flush(); - } - } - - - /** - * Lock the data structure, preventing other threads / processes from reading and writing to the - * common store - * @param id the name of the process doing the locking - */ - private final void lock(String id) { - //lockWaitTimer.restart(); - boolean hadLock = lock.ownsLock(); - if ( ! hadLock ) { - nLocks++; - //printStatus(id, lockWaitTimer.currentTime(), GOING_FOR_LOCK); - } - lock.lock(); - //lockWaitTimer.stop(); - //if ( ! hadLock ) printStatus(id, lockWaitTimer.currentTime(), HAVE_LOCK); - } - - /** - * Unlock the data structure, allowing other threads / processes to read and write to the common store - * @param id the name of the process doing the unlocking - */ - private final void unlock(String id) { - if ( lock.getHoldCount() == 1 ) printStatus(id, timer.currentTime(), RELEASING_LOCK); - lock.unlock(); - if ( ! lock.ownsLock() ) printStatus(id, timer.currentTime(), RUNNING); - } - - // useful code for getting - public final long getNLocks() { return nLocks; } - public final long getNReads() { return nReads; } - public final long getNWrites() { return nWrites; } -// public final double getTimePerLock() { return lockWaitTimer.getElapsedTime() / Math.max(nLocks, 1); } -// public final double getTimePerRead() { return readTimer.getElapsedTime() / Math.max(nReads,1); } -// public final double getTimePerWrite() { return writeTimer.getElapsedTime() / Math.max(nWrites,1); } - - // -------------------------------------------------------------------------------- - // - // Java-style functional form for with lock do { x }; - // - // -------------------------------------------------------------------------------- - - /** - * Private utility class that executes doBody() method with the lock() acquired and - * handles property unlock()ing the system, even if an error occurs. Allows one to write - * clean code like: - * - * new WithLock(name) { - * public Integer doBody() { doSomething(); return 1; } - * }.run() - * - * @param the return type of the doBody() method - */ - private abstract class WithLock { - private final String myName; - - public WithLock(String myName) { - this.myName = myName; - } - - protected abstract T doBody(); - - public T run() { - boolean locked = false; - try { - lock(myName); - locked = true; - return doBody(); - } finally { - if (locked) unlock(myName); - } - } - } - - // -------------------------------------------------------------------------------- - // - // main function for testing performance - // - // -------------------------------------------------------------------------------- - public static void main(String[] args) { - //BasicConfigurator.configure(); - - final String ref = args[0]; - final File file = new File(args[1]); - final int cycles = Integer.valueOf(args[2]); - - File referenceFile = new File(ref); - try { - final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(referenceFile); - final String chr1 = fasta.getSequenceDictionary().getSequence(1).getSequenceName(); - final GenomeLocParser genomeLocParser = new GenomeLocParser(fasta); - - final class MyTest { - String name; - GenomeLocProcessingTracker tracker; - - MyTest(String name, GenomeLocProcessingTracker tracker) { - this.name = name; - this.tracker = tracker; - } - - public void execute(int cycles) { - SimpleTimer delta = new SimpleTimer("delta"); - SimpleTimer timer = new SimpleTimer("none"); - - if ( file.exists() ) file.delete(); - timer.start(); - delta.start(); - for ( int i = 1; i < cycles; i++ ) { - tracker.claimOwnership(genomeLocParser.createGenomeLoc(chr1, i, i+1), "ABCDEFGHIJKL"); - if ( i % 1000 == 0 ) { - System.out.printf("%s\t%d\t%d\t%.4f\t%.4f%n", name, i, timer.currentTime(), timer.getElapsedTime(), delta.getElapsedTime() ); - delta.restart(); - } - } - } - } - - System.out.printf("name\tcycle\tcurrent.time\telapsed.time\tdelta%n"); - new MyTest("in-memory", new SharedMemoryGenomeLocProcessingTracker(new ClosableReentrantLock())).execute(cycles); - new MyTest("nio", new FileBackedGenomeLocProcessingTracker(file, genomeLocParser, new ClosableReentrantLock(), null)).execute(cycles); - new MyTest("nio-file-lock", new FileBackedGenomeLocProcessingTracker(file, genomeLocParser, new SharedFileThreadSafeLock(file,1), null)).execute(cycles); - } - catch(FileNotFoundException ex) { - throw new UserException.CouldNotReadInputFile(referenceFile,ex); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java b/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java deleted file mode 100644 index 4e61ef9e1..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -/** - * Base class, and null tracker. Always says that a GenomeLoc is ready for processing. It is - * critical that this class already return that a loc is owned, no matter if it's been seen before, - * etc. ReadShards can differ in their contents but have the same "unmapped" genome loc - */ -public class NoOpGenomeLocProcessingTracker extends GenomeLocProcessingTracker { - public NoOpGenomeLocProcessingTracker() { - super(new ClosableReentrantLock(), null); - } - - @Override - protected void registerNewLocs(Collection loc) { - ; - } - - @Override - protected List readNewLocs() { - return Collections.emptyList(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/ProcessingLoc.java b/public/java/src/org/broadinstitute/sting/utils/threading/ProcessingLoc.java deleted file mode 100644 index ee2283dcf..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/ProcessingLoc.java +++ /dev/null @@ -1,71 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: 1/19/11 - * Time: 8:06 AM - * - * Information about processing locations and their owners. Contains two basic data, associated - * together. The first is a genome loc, and the second is the name of the owner, as a string. - * - * chr1:1-10 Mark - * chr2:11-20 DePristo - * - * would be two ProcessingLocs that first indicate that the first 10 bp of chr1 are owned by Mark, - * and the second is owned by DePristo. - */ -public class ProcessingLoc implements HasGenomeLocation { - private final GenomeLoc loc; - private final String owner; - - /** - * Create a loc that's already owned - * @param loc - * @param owner - */ - public ProcessingLoc(GenomeLoc loc, String owner) { - if ( loc == null || owner == null ) { - throw new ReviewedStingException("BUG: invalid ProcessingLoc detected: " + loc + " owner " + owner); - } - - this.loc = loc; - this.owner = owner.intern(); // reduce memory consumption by interning the string - } - - public GenomeLoc getLocation() { - return loc; - } - - public String getOwner() { - return owner; - } - - /** - * Returns true iff the owner of this processing loc is name. Can be used to determine - * the owner of this processing location. - * - * @param name - * @return - */ - public boolean isOwnedBy(String name) { - return getOwner().equals(name); - } - - public String toString() { return String.format("ProcessingLoc(%s,%s)", loc, owner); } - - public boolean equals(Object other) { - if (other instanceof ProcessingLoc ) - return this.loc.equals(((ProcessingLoc)other).loc) && this.owner.equals(((ProcessingLoc)other).owner); - else - return false; - } - - public int compareTo(ProcessingLoc other) { - return this.getLocation().compareTo(other.getLocation()); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java b/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java deleted file mode 100644 index 3eb2be96b..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java +++ /dev/null @@ -1,174 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import org.apache.log4j.Logger; -import org.apache.lucene.store.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.channels.*; - -/** - * User: depristo - * Date: 1/19/11 - * Time: 8:24 AM - * - * A reentrant lock for a shared file common file in the file system. Relies on a a Lucene SimpleFSLock - * to manage on disk file locking. - */ -public class SharedFileLock extends ClosableReentrantLock { // todo -- kinda gross inheritance. The super lock is never used - private static Logger logger = Logger.getLogger(SharedFileLock.class); - - private static final String VERIFY_HOST = System.getProperty("verify.host", "gsa1"); - private static final boolean VERIFY = false; - private static final int VERIFY_PORT = 5050; - - // 5 minutes => 360 seconds of trying -> failure - protected static final int DEFAULT_N_TRIES = 1000; - protected static final long DEFAULT_MILLISECONDS_PER_TRY = 360; - - /** The file we are locking */ - private final File file; - - private final LockFactory lockFactory; - private Lock fileLock = null; - - /** - * A counter that indicates the number of 'locks' on this file. - * If locks == 2, then two unlocks are required - * before any resources are freed. - */ - int fileLockReentrantCounter = 0; - - // type of locking - private final int nRetries; - private final long milliSecPerTry; - - /** - * Create a SharedFileThreadSafeLock object locking the file - * @param file - */ - public SharedFileLock(File file, int nRetries, long milliSecPerTry, int ID) { - super(); - this.file = file; - this.nRetries = nRetries; - this.milliSecPerTry = milliSecPerTry; - - File lockDir = new File(file.getParent() == null ? "./" : file.getParent()); - try { - LockFactory factory = new SimpleFSLockFactory(lockDir); - if ( VERIFY ) { // don't forget to start up the VerifyLockServer - this.lockFactory = new VerifyingLockFactory((byte)ID, factory, VERIFY_HOST, VERIFY_PORT); - } else { - this.lockFactory = factory; - } - } catch (IOException e) { - throw new UserException.CouldNotCreateOutputFile(lockDir, "Could not create coordination file locking directory " + lockDir, e); - } - } - - public SharedFileLock(File file, int ID) { - this(file, DEFAULT_N_TRIES, DEFAULT_MILLISECONDS_PER_TRY, ID); - } - - @Override - public void close() { - if ( ownsLock() ) throw new ReviewedStingException("closing SharedFileLock while still owned: ownership count " + fileLockReentrantCounter); - } - - @Override - public int getHoldCount() { - return fileLockReentrantCounter; - } - - @Override - public boolean ownsLock() { - return fileLockReentrantCounter > 0; - } - - // ------------------------------------------------------------------------------------------ - // - // workhorse routines -- acquiring file locks - // - // ------------------------------------------------------------------------------------------ - - private boolean obtainFileLock() throws IOException { - // annoying bug work around for verifylockserver - if ( VERIFY ) - try { - return fileLock.obtain(1); - } catch ( LockObtainFailedException e ) { - return false; - } - else - return fileLock.obtain(); - } - - /** - * Two stage [threading then file] locking mechanism. Reenterant in that multiple lock calls will be - * unwound appropriately. Uses file channel lock *after* thread locking. - */ - @Override - public void lock() { - if ( SharedFileThreadSafeLock.DEBUG ) logger.warn(" lock() " + Thread.currentThread().getName() + ", fileLockReentrantCounter = " + fileLockReentrantCounter); - if ( fileLockReentrantCounter++ == 0 ) { - // Precondition -- lock is always null while we don't have a lock - if ( fileLock != null ) - throw new ReviewedStingException("BUG: lock() function called when a lock already is owned!"); - - int i = 1; - fileLock = lockFactory.makeLock(file.getName() + ".lock"); - try { - boolean obtained = obtainFileLock(); // todo -- maybe use intrinsic lock features - for ( ; ! obtained && i < nRetries; i++ ) { - try { - //logger.warn("tryLock failed on try " + i + ", waiting " + milliSecPerTry + " millseconds for retry"); - Thread.sleep(milliSecPerTry); - } catch ( InterruptedException e ) { - throw new UserException("SharedFileThreadSafeLock interrupted during wait for file lock", e); - } - obtained = obtainFileLock(); // gross workaround for error in verify server - } - - if ( i > 1 ) logger.warn("tryLock required " + i + " tries before completing, waited " + i * milliSecPerTry + " millseconds"); - - if ( ! obtained ) { - fileLock = null; - // filelock == null -> we never managed to acquire the lock! - throw new UserException("SharedFileThreadSafeLock failed to obtain the lock after " + nRetries + " attempts"); - } - - if ( SharedFileThreadSafeLock.DEBUG ) logger.warn(" lock() " + Thread.currentThread().getName() + ", obtained = " + obtained + ", tries = " + i); - } catch (IOException e) { - fileLock = null; - throw new ReviewedStingException("Coordination file could not be created because a lock could not be obtained.", e); - } - } - } - - @Override - public void unlock() { - // update for reentrant unlocking - if ( fileLock == null ) throw new ReviewedStingException("BUG: file lock is null -- file lock was not obtained"); - if ( fileLockReentrantCounter <= 0 ) throw new ReviewedStingException("BUG: file lock counter < 0"); - - // this unlock counts as 1 unlock. If this is our last unlock, actually do something - if ( SharedFileThreadSafeLock.DEBUG ) logger.warn(" unlock() " + Thread.currentThread().getName() + ", count = " + fileLockReentrantCounter); - if ( --fileLockReentrantCounter == 0 ) { - try { - if ( ! fileLock.isLocked() ) throw new ReviewedStingException("BUG: call to unlock() when we don't have a valid lock!"); - fileLock.release(); - if ( SharedFileThreadSafeLock.DEBUG ) logger.warn(" unlock() " + Thread.currentThread().getName() + ", actually releasing"); - } catch ( IOException e ) { - throw new ReviewedStingException("Could not free file lock on file " + file, e); - } finally { // make sure we null out the filelock, regardless of our state - fileLock = null; - } - } else { - if ( SharedFileThreadSafeLock.DEBUG ) logger.warn(" unlock() " + Thread.currentThread().getName() + ", skipping, count = " + fileLockReentrantCounter); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java b/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java deleted file mode 100644 index dec69f7c2..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java +++ /dev/null @@ -1,80 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.channels.*; - -/** - * User: depristo - * Date: 1/19/11 - * Time: 8:24 AM - * - * A reentrant lock that supports multi-threaded locking as well as a shared file lock on a common - * file in the file system. It itself a shared memory reenterant lock to managed thread safety and - * contains a SharedFileLock to handle the file integrity. - */ -public class SharedFileThreadSafeLock extends ClosableReentrantLock { - private static Logger logger = Logger.getLogger(SharedFileThreadSafeLock.class); - protected static final boolean DEBUG = false; - - private final SharedFileLock fileLock; - - /** - * Create a SharedFileThreadSafeLock object locking the file - * @param file - */ - public SharedFileThreadSafeLock(File file, int nRetries, long milliSecPerTry, int ID) { - super(); - this.fileLock = new SharedFileLock(file, nRetries, milliSecPerTry, ID); - } - - public SharedFileThreadSafeLock(File file, int ID) { - this(file, SharedFileLock.DEFAULT_N_TRIES, SharedFileLock.DEFAULT_MILLISECONDS_PER_TRY, ID); - } - - @Override - public void close() { - super.close(); - fileLock.close(); - } - - @Override - public int getHoldCount() { - if ( super.getHoldCount() != fileLock.getHoldCount() ) - throw new ReviewedStingException("BUG: unequal hold counts. threadlock = " + super.getHoldCount() + ", filelock = " + fileLock.getHoldCount()); - return super.getHoldCount(); - } - - @Override - public boolean ownsLock() { - return super.isHeldByCurrentThread() && fileLock.ownsLock(); - } - - /** - * Two stage [threading then file] locking mechanism. Reenterant in that multiple lock calls will be - * unwound appropriately. Uses file channel lock *after* thread locking. - */ - @Override - public void lock() { - if ( DEBUG ) logger.warn("Attempting SharedFileThreadSafe lock: " + Thread.currentThread().getName()); - if ( DEBUG ) logger.warn(" going for thread lock: " + Thread.currentThread().getName()); - super.lock(); - if ( DEBUG ) logger.warn(" going for file lock: " + Thread.currentThread().getName()); - fileLock.lock(); // todo -- should this be in a try? - } - - @Override - public void unlock() { - if ( DEBUG ) logger.warn(" releasing filelock: " + Thread.currentThread().getName()); - fileLock.unlock(); - if ( DEBUG ) logger.warn(" releasing threadlock: " + Thread.currentThread().getName()); - super.unlock(); - if ( DEBUG ) logger.warn(" unlock() complete: " + Thread.currentThread().getName()); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/SharedMemoryGenomeLocProcessingTracker.java b/public/java/src/org/broadinstitute/sting/utils/threading/SharedMemoryGenomeLocProcessingTracker.java deleted file mode 100644 index 9bf8b58b1..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/SharedMemoryGenomeLocProcessingTracker.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - -/** - * Thread-safe shared memory only implementation. Uses a simple list to manage the newly - * added processing locations. - */ -public class SharedMemoryGenomeLocProcessingTracker extends GenomeLocProcessingTracker { - private List newPLocs = new ArrayList(); - - protected SharedMemoryGenomeLocProcessingTracker(ClosableReentrantLock lock) { - super(lock, null); - } - - protected SharedMemoryGenomeLocProcessingTracker(ClosableReentrantLock lock, PrintStream status) { - super(lock, status); - } - - @Override - protected void registerNewLocs(Collection plocs) { - newPLocs.addAll(plocs); - } - - @Override - protected List readNewLocs() { - List r = newPLocs; - newPLocs = new ArrayList(); - return r; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index 901de6fae..c3f437f11 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.utils.variantcontext; -import java.util.Arrays; -import java.util.List; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; +import java.util.List; /** * Immutable representation of an allele diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java index 3feaf5e1c..dba16cf86 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java @@ -24,8 +24,8 @@ package org.broadinstitute.sting.utils.variantcontext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broad.tribble.TribbleException; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; public class GenotypeLikelihoods { public static final boolean CAP_PLS = false; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java index 4efba8825..a191670a4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java @@ -1,7 +1,10 @@ package org.broadinstitute.sting.utils.variantcontext; -import java.util.*; +import java.util.Collection; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; /** * Mutable version of VariantContext diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 92c5d648b..eab392c4d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -2,8 +2,8 @@ package org.broadinstitute.sting.utils.variantcontext; import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broad.tribble.util.ParsingUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFParser; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 5d58954aa..212600360 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -23,21 +23,26 @@ package org.broadinstitute.sting.utils.variantcontext; -import java.io.Serializable; -import java.util.*; - -import com.google.java.contract.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.util.StringUtil; -import org.apache.commons.jexl2.*; +import org.apache.commons.jexl2.Expression; +import org.apache.commons.jexl2.JexlEngine; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; -import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec; import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.io.Serializable; +import java.util.*; + public class VariantContextUtils { final public static JexlEngine engine = new JexlEngine(); static { @@ -284,8 +289,8 @@ public class VariantContextUtils { /** * Returns a newly allocated VC that is the same as VC, but without genotypes - * @param vc - * @return + * @param vc variant context + * @return new VC without genotypes */ @Requires("vc != null") @Ensures("result != null") @@ -298,8 +303,8 @@ public class VariantContextUtils { /** * Returns a newly allocated list of VC, where each VC is the same as the input VCs, but without genotypes - * @param vcs - * @return + * @param vcs collection of VCs + * @return new VCs without genotypes */ @Requires("vcs != null") @Ensures("result != null") @@ -357,9 +362,9 @@ public class VariantContextUtils { * information per genotype. The master merge will add the PQ information from each genotype record, where * appropriate, to the master VC. * - * @param unsortedVCs - * @param masterName - * @return + * @param unsortedVCs collection of VCs + * @param masterName name of master VC + * @return master-merged VC */ public static VariantContext masterMerge(Collection unsortedVCs, String masterName) { VariantContext master = findMaster(unsortedVCs, masterName); @@ -430,11 +435,15 @@ public class VariantContextUtils { * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with * the sample name * - * @param unsortedVCs - * @param priorityListOfVCs - * @param filteredRecordMergeType - * @param genotypeMergeOptions - * @return + * @param genomeLocParser loc parser + * @param unsortedVCs collection of unsorted VCs + * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs + * @param filteredRecordMergeType merge type for filtered records + * @param genotypeMergeOptions merge option for genotypes + * @param annotateOrigin should we annotate the set it came from? + * @param printMessages should we print messages? + * @param inputRefBase the ref base + * @return new VariantContext */ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, @@ -443,6 +452,24 @@ public class VariantContextUtils { return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false); } + /** + * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided. + * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with + * the sample name + * + * @param genomeLocParser loc parser + * @param unsortedVCs collection of unsorted VCs + * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs + * @param filteredRecordMergeType merge type for filtered records + * @param genotypeMergeOptions merge option for genotypes + * @param annotateOrigin should we annotate the set it came from? + * @param printMessages should we print messages? + * @param inputRefBase the ref base + * @param setKey the key name of the set + * @param filteredAreUncalled are filtered records uncalled? + * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? + * @return new VariantContext + */ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey, @@ -465,7 +492,7 @@ public class VariantContextUtils { if ( ! filteredAreUncalled || vc.isNotFiltered() ) VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false)); } - if ( VCs.size() == 0 ) // everything is filtered out and we're filteredareUncalled + if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled return null; // establish the baseline info from the first VC @@ -610,6 +637,17 @@ public class VariantContextUtils { return merged; } + public static Map> separateVariantContextsByType(Collection VCs) { + HashMap> mappedVCs = new HashMap>(); + for ( VariantContext vc : VCs ) { + if ( !mappedVCs.containsKey(vc.getType()) ) + mappedVCs.put(vc.getType(), new ArrayList()); + mappedVCs.get(vc.getType()).add(vc); + } + + return mappedVCs; + } + private static class AlleleMapper { private VariantContext vc = null; private Map map = null; @@ -829,6 +867,7 @@ public class VariantContextUtils { /** * create a genome location, given a variant context + * @param genomeLocParser parser * @param vc the variant context * @return the genomeLoc */ diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java index c5a3b6f2a..a59ed7abe 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java @@ -29,7 +29,10 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.*; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; /** * diff --git a/public/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java b/public/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java index ab91d0e2e..5782c2704 100755 --- a/public/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.utils.wiggle; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index b3e422ba9..ef46d4bff 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -13,10 +13,7 @@ import java.io.*; import java.math.BigInteger; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** * @@ -83,11 +80,6 @@ public abstract class BaseTest { public static final String networkTempDir = "/broad/shptmp/"; public static final File networkTempDirFile = new File(networkTempDir); - /** - * Subdirectory under the ant build directory where we store integration test md5 results - */ - public static final String MD5_FILE_DB_SUBDIR = "integrationtests"; - public static final String testDir = "public/testdata/"; /** before the class starts up */ @@ -129,7 +121,7 @@ public abstract class BaseTest { * 2: Create instances of your subclass. Return from it the call to getTests, providing * the class type of your test * - * @DataProvider(name = "summaries") + * @DataProvider(name = "summaries" * public Object[][] createSummaries() { * new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2"); * new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1"); @@ -206,200 +198,6 @@ public abstract class BaseTest { } } - /** - * a little utility function for all tests to md5sum a file - * Shameless taken from: - * - * http://www.javalobby.org/java/forums/t84420.html - * - * @param file the file - * @return a string - */ - public static String md5SumFile(File file) { - MessageDigest digest; - try { - digest = MessageDigest.getInstance("MD5"); - } catch (NoSuchAlgorithmException e) { - throw new ReviewedStingException("Unable to find MD5 digest"); - } - InputStream is; - try { - is = new FileInputStream(file); - } catch (FileNotFoundException e) { - throw new ReviewedStingException("Unable to open file " + file); - } - byte[] buffer = new byte[8192]; - int read; - try { - while ((read = is.read(buffer)) > 0) { - digest.update(buffer, 0, read); - } - byte[] md5sum = digest.digest(); - BigInteger bigInt = new BigInteger(1, md5sum); - return bigInt.toString(16); - - } - catch (IOException e) { - throw new ReviewedStingException("Unable to process file for MD5", e); - } - finally { - try { - is.close(); - } - catch (IOException e) { - throw new ReviewedStingException("Unable to close input stream for MD5 calculation", e); - } - } - } - - protected static void ensureMd5DbDirectory() { - // todo -- make path - File dir = new File(MD5_FILE_DB_SUBDIR); - if ( ! dir.exists() ) { - System.out.printf("##### Creating MD5 db %s%n", MD5_FILE_DB_SUBDIR); - if ( ! dir.mkdir() ) { - throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + MD5_FILE_DB_SUBDIR); - } - } - } - - protected static File getFileForMD5(final String md5) { - final String basename = String.format("%s.integrationtest", md5); - return new File(MD5_FILE_DB_SUBDIR + "/" + basename); - } - - private static void updateMD5Db(final String md5, final File resultsFile) { - // todo -- copy results file to DB dir if needed under filename for md5 - final File dbFile = getFileForMD5(md5); - if ( ! dbFile.exists() ) { - // the file isn't already in the db, copy it over - System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath()); - try { - FileUtils.copyFile(resultsFile, dbFile); - } catch ( IOException e ) { - throw new ReviewedStingException(e.getMessage()); - } - } else { - System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath()); - - } - } - - private static String getMD5Path(final String md5, final String valueIfNotFound) { - // todo -- look up the result in the directory and return the path if it exists - final File dbFile = getFileForMD5(md5); - return dbFile.exists() ? dbFile.getPath() : valueIfNotFound; - } - - public static byte[] getBytesFromFile(File file) throws IOException { - InputStream is = new FileInputStream(file); - - // Get the size of the file - long length = file.length(); - - if (length > Integer.MAX_VALUE) { - // File is too large - } - - // Create the byte array to hold the data - byte[] bytes = new byte[(int) length]; - - // Read in the bytes - int offset = 0; - int numRead = 0; - while (offset < bytes.length - && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) { - offset += numRead; - } - - // Ensure all the bytes have been read in - if (offset < bytes.length) { - throw new IOException("Could not completely read file " + file.getName()); - } - - // Close the input stream and return bytes - is.close(); - return bytes; - } - - /** - * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different. - * @param name Name of the test. - * @param resultsFile File to MD5. - * @param expectedMD5 Expected MD5 value. - * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. - * @return The calculated MD5. - */ - public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { - String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize); - - if (parameterize || expectedMD5.equals("")) { - // Don't assert - } else if ( filemd5sum.equals(expectedMD5) ) { - System.out.println(String.format(" => %s PASSED", name)); - } else { - Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum)); - } - - - - return filemd5sum; - } - - - /** - * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL NOT throw an exception if the MD5s are different. - * @param name Name of the test. - * @param resultsFile File to MD5. - * @param expectedMD5 Expected MD5 value. - * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. - * @return The calculated MD5. - */ - public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { - try { - byte[] bytesOfMessage = getBytesFromFile(resultsFile); - byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage); - BigInteger bigInt = new BigInteger(1, thedigest); - String filemd5sum = bigInt.toString(16); - while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32 - - // - // copy md5 to integrationtests - // - updateMD5Db(filemd5sum, resultsFile); - - if (parameterize || expectedMD5.equals("")) { - System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b", - name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5))); - } else { - System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5)); - System.out.flush(); - - if ( ! expectedMD5.equals(filemd5sum) ) { - // we are going to fail for real in assertEquals (so we are counted by the testing framework). - // prepare ourselves for the comparison - System.out.printf("##### Test %s is going fail #####%n", name); - String pathToExpectedMD5File = getMD5Path(expectedMD5, "[No DB file found]"); - String pathToFileMD5File = getMD5Path(filemd5sum, "[No DB file found]"); - System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File); - System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File); - System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File); - - // inline differences - DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0); - boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params); - if ( success ) - System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n", - pathToExpectedMD5File, pathToFileMD5File); - } - } - - return filemd5sum; - } catch (Exception e) { - throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile, e); - } - } - /** * Creates a temp file that will be deleted on exit after tests are complete. * @param name Prefix of the file. diff --git a/public/java/test/org/broadinstitute/sting/MD5DB.java b/public/java/test/org/broadinstitute/sting/MD5DB.java new file mode 100644 index 000000000..0194e114a --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/MD5DB.java @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting; + +import org.apache.commons.io.FileUtils; +import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.testng.Assert; + +import java.io.*; +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/18/11 + * Time: 9:10 AM + * + * Utilities for manipulating the MD5 database of previous results + */ +public class MD5DB { + /** + * Subdirectory under the ant build directory where we store integration test md5 results + */ + private static final int MAX_RECORDS_TO_READ = 10000; + public static final String LOCAL_MD5_DB_DIR = "integrationtests"; + public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests"; + + // ---------------------------------------------------------------------- + // + // MD5 DB stuff + // + // ---------------------------------------------------------------------- + + /** + * Create the MD5 file directories if necessary + */ + protected static void ensureMd5DbDirectory() { + File dir = new File(LOCAL_MD5_DB_DIR); + if ( ! dir.exists() ) { + System.out.printf("##### Creating MD5 db %s%n", LOCAL_MD5_DB_DIR); + if ( ! dir.mkdir() ) { + throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + LOCAL_MD5_DB_DIR); + } + } + } + + /** + * Returns the path to an already existing file with the md5 contents, or valueIfNotFound + * if no such file exists in the db. + * + * @param md5 + * @param valueIfNotFound + * @return + */ + public static String getMD5FilePath(final String md5, final String valueIfNotFound) { + // we prefer the global db to the local DB, so match it first + for ( String dir : Arrays.asList(GLOBAL_MD5_DB_DIR, LOCAL_MD5_DB_DIR)) { + File f = getFileForMD5(md5, dir); + if ( f.exists() && f.canRead() ) + return f.getPath(); + } + + return valueIfNotFound; + } + + /** + * Utility function that given a file's md5 value and the path to the md5 db, + * returns the canonical name of the file. For example, if md5 is XXX and db is YYY, + * this will return YYY/XXX.integrationtest + * + * @param md5 + * @param dbPath + * @return + */ + private static File getFileForMD5(final String md5, final String dbPath) { + final String basename = String.format("%s.integrationtest", md5); + return new File(dbPath + "/" + basename); + } + + /** + * Copies the results file with md5 value to its canonical file name and db places + * + * @param md5 + * @param resultsFile + */ + private static void updateMD5Db(final String md5, final File resultsFile) { + copyFileToDB(getFileForMD5(md5, LOCAL_MD5_DB_DIR), resultsFile); + copyFileToDB(getFileForMD5(md5, GLOBAL_MD5_DB_DIR), resultsFile); + } + + /** + * Low-level utility routine that copies resultsFile to dbFile + * @param dbFile + * @param resultsFile + */ + private static void copyFileToDB(File dbFile, final File resultsFile) { + if ( ! dbFile.exists() ) { + // the file isn't already in the db, copy it over + System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath()); + try { + FileUtils.copyFile(resultsFile, dbFile); + } catch ( IOException e ) { + System.out.printf("##### Skipping update, cannot write file %s%n", dbFile); + } + } else { + System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath()); + } + } + + /** + * Returns the byte[] of the entire contents of file, for md5 calculations + * @param file + * @return + * @throws IOException + */ + private static byte[] getBytesFromFile(File file) throws IOException { + InputStream is = new FileInputStream(file); + + // Get the size of the file + long length = file.length(); + + if (length > Integer.MAX_VALUE) { + // File is too large + } + + // Create the byte array to hold the data + byte[] bytes = new byte[(int) length]; + + // Read in the bytes + int offset = 0; + int numRead = 0; + while (offset < bytes.length + && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) { + offset += numRead; + } + + // Ensure all the bytes have been read in + if (offset < bytes.length) { + throw new IOException("Could not completely read file " + file.getName()); + } + + // Close the input stream and return bytes + is.close(); + return bytes; + } + + /** + * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different. + * @param name Name of the test. + * @param resultsFile File to MD5. + * @param expectedMD5 Expected MD5 value. + * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. + * @return The calculated MD5. + */ + public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { + String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize); + + if (parameterize || expectedMD5.equals("")) { + // Don't assert + } else if ( filemd5sum.equals(expectedMD5) ) { + System.out.println(String.format(" => %s PASSED", name)); + } else { + Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum)); + } + + return filemd5sum; + } + + + /** + * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL NOT throw an exception if the MD5s are different. + * @param name Name of the test. + * @param resultsFile File to MD5. + * @param expectedMD5 Expected MD5 value. + * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. + * @return The calculated MD5. + */ + public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { + try { + byte[] bytesOfMessage = getBytesFromFile(resultsFile); + byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage); + BigInteger bigInt = new BigInteger(1, thedigest); + String filemd5sum = bigInt.toString(16); + while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32 + + // + // copy md5 to integrationtests + // + updateMD5Db(filemd5sum, resultsFile); + + if (parameterize || expectedMD5.equals("")) { + System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b", + name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5))); + } else { + System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5)); + System.out.flush(); + + if ( ! expectedMD5.equals(filemd5sum) ) { + // we are going to fail for real in assertEquals (so we are counted by the testing framework). + // prepare ourselves for the comparison + System.out.printf("##### Test %s is going fail #####%n", name); + String pathToExpectedMD5File = getMD5FilePath(expectedMD5, "[No DB file found]"); + String pathToFileMD5File = getMD5FilePath(filemd5sum, "[No DB file found]"); + System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File); + System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File); + System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File); + + // inline differences + DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0); + boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), MAX_RECORDS_TO_READ, params); + if ( success ) + System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n", + pathToExpectedMD5File, pathToFileMD5File); + } + } + + return filemd5sum; + } catch (Exception e) { + throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile, e); + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index d65f4ec34..386c17659 100755 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -53,7 +53,7 @@ public class WalkerTest extends BaseTest { } public String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) { - return assertMatchingMD5(name, resultsFile, expectedMD5, parameterize()); + return MD5DB.assertMatchingMD5(name, resultsFile, expectedMD5, parameterize()); } public void maybeValidateSupplementaryFile(final String name, final File resultFile) { @@ -65,19 +65,22 @@ public class WalkerTest extends BaseTest { throw new StingException("Found an index created for file " + resultFile + " but we can only validate VCF files. Extend this code!"); } - System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile); - Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec()); - Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath()); - - if ( ! indexFromOutputFile.equals(dynamicIndex) ) { - Assert.fail(String.format("Index on disk from indexing on the fly not equal to the index created after the run completed. FileIndex %s vs. on-the-fly %s%n", - indexFromOutputFile.getProperties(), - dynamicIndex.getProperties())); - } + assertOnDiskIndexEqualToNewlyCreatedIndex(indexFile, name, resultFile); } } + public static void assertOnDiskIndexEqualToNewlyCreatedIndex(final File indexFile, final String name, final File resultFile) { + System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile); + Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec()); + Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath()); + + if ( ! indexFromOutputFile.equalsIgnoreTimestamp(dynamicIndex) ) { + Assert.fail(String.format("Index on disk from indexing on the fly not equal to the index created after the run completed. FileIndex %s vs. on-the-fly %s%n", + indexFromOutputFile.getProperties(), + dynamicIndex.getProperties())); + } + } public List assertMatchingMD5s(final String name, List resultFiles, List expectedMD5s) { List md5s = new ArrayList(); @@ -188,7 +191,7 @@ public class WalkerTest extends BaseTest { } protected Pair, List> executeTest(final String name, WalkerTestSpec spec) { - ensureMd5DbDirectory(); // ensure the md5 directory exists + MD5DB.ensureMd5DbDirectory(); // ensure the md5 directory exists List tmpFiles = new ArrayList(); for (int i = 0; i < spec.nOutputFiles; i++) { diff --git a/public/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java new file mode 100644 index 000000000..a6af034cb --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java @@ -0,0 +1,27 @@ +package org.broadinstitute.sting.alignment; + +import org.testng.annotations.Test; +import org.broadinstitute.sting.WalkerTest; + +import java.util.Arrays; + +/** + * Integration tests for the aligner. + * + * @author mhanna + * @version 0.1 + */ +public class AlignerIntegrationTest extends WalkerTest { + @Test + public void testBasicAlignment() { + String md5 = "a2bdf907b18114a86ca47f9fc23791bf"; + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + GATKDataLocation + "bwa/human_b36_both.fasta" + + " -T Align" + + " -I " + validationDataLocation + "NA12878_Pilot1_20.trimmed.unmapped.bam" + + " -o %s", + 1, // just one output file + Arrays.asList(md5)); + executeTest("testBasicAlignment", spec); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java index 96dfec6e8..2ae19264e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java @@ -99,7 +99,7 @@ public class DiffEngineUnitTest extends BaseTest { logger.warn("Test tree1: " + test.tree1.toOneLineString()); logger.warn("Test tree2: " + test.tree2.toOneLineString()); - List diffs = engine.diff(test.tree1, test.tree2); + List diffs = engine.diff(test.tree1, test.tree2); logger.warn("Test expected diff : " + test.differences); logger.warn("Observed diffs : " + diffs); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java new file mode 100644 index 000000000..77159d9c2 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class DiffObjectsIntegrationTest extends WalkerTest { + private class TestParams extends TestDataProvider { + public File master, test; + public String MD5; + + private TestParams(String master, String test, String MD5) { + super(TestParams.class); + this.master = new File(master); + this.test = new File(test); + this.MD5 = MD5; + } + + public String toString() { + return String.format("master=%s,test=%s,md5=%s", master, test, MD5); + } + } + + @DataProvider(name = "data") + public Object[][] createData() { + new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "4d9f4636de05b93c354d05011264546e"); + new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "37e6efd833b5cd6d860a9df3df9713fc"); + return TestParams.getTests(TestParams.class); + } + + @Test(enabled = true, dataProvider = "data") + public void testDiffs(TestParams params) { + WalkerTestSpec spec = new WalkerTestSpec( + "-T DiffObjects -R public/testdata/exampleFASTA.fasta " + + " -m " + params.master + + " -t " + params.test + + " -o %s", + Arrays.asList(params.MD5)); + executeTest("testDiffObjects:"+params, spec).getFirst(); + } +} + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java index a0cb47770..dee7bbd88 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java @@ -87,7 +87,7 @@ public class DiffableReaderUnitTest extends BaseTest { Assert.assertSame(diff.getParent(), DiffElement.ROOT); DiffNode node = diff.getValueAsNode(); - Assert.assertEquals(node.getElements().size(), 10); + Assert.assertEquals(node.getElements().size(), 11); // chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03 DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java index 64579a01b..4e4080bc7 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java @@ -75,10 +75,10 @@ public class DifferenceUnitTest extends BaseTest { @DataProvider(name = "data") public Object[][] createTrees() { - new DifferenceTest("A=X", "A=Y", "A:X!=Y"); - new DifferenceTest("A=Y", "A=X", "A:Y!=X"); - new DifferenceTest(DiffNode.fromString("A=X"), null, "A:X!=MISSING"); - new DifferenceTest(null, DiffNode.fromString("A=X"), "A:MISSING!=X"); + new DifferenceTest("A=X", "A=Y", "A:1:X!=Y"); + new DifferenceTest("A=Y", "A=X", "A:1:Y!=X"); + new DifferenceTest(DiffNode.fromString("A=X"), null, "A:1:X!=MISSING"); + new DifferenceTest(null, DiffNode.fromString("A=X"), "A:1:MISSING!=X"); return DifferenceTest.getTests(DifferenceTest.class); } @@ -87,7 +87,7 @@ public class DifferenceUnitTest extends BaseTest { logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString())); logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString())); logger.warn("Test expected diff : " + test.difference); - SpecificDifference diff = new SpecificDifference(test.tree1, test.tree2); + Difference diff = new Difference(test.tree1, test.tree2); logger.warn("Observed diffs : " + diff); Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference ); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java new file mode 100644 index 000000000..21435dd7d --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class MergeAndMatchHaplotypesIntegrationTest extends WalkerTest { + private static String mergeAndMatchHaplotypesTestDataRoot = validationDataLocation + "/MergeAndMatchHaplotypes"; + private static String fundamentalTestPBTVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.vcf"; + private static String fundamentalTestRBPVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.rbp.vcf"; + + @Test + public void testBasicFunctionality() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T MergeAndMatchHaplotypes", + "-R " + b37KGReference, + "-B:pbt,VCF " + fundamentalTestPBTVCF, + "-B:rbp,VCF " + fundamentalTestRBPVCF, + "-o %s" + ), + 1, + Arrays.asList("") + ); + executeTest("testBasicMergeAndMatchHaplotypesFunctionality", spec); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java new file mode 100644 index 000000000..f62f12082 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -0,0 +1,27 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class PhaseByTransmissionIntegrationTest extends WalkerTest { + private static String phaseByTransmissionTestDataRoot = validationDataLocation + "/PhaseByTransmission"; + private static String fundamentalTestVCF = phaseByTransmissionTestDataRoot + "/" + "FundamentalsTest.unfiltered.vcf"; + + @Test + public void testBasicFunctionality() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T PhaseByTransmission", + "-R " + b37KGReference, + "-B:variant,VCF " + fundamentalTestVCF, + "-f NA12892+NA12891=NA12878", + "-o %s" + ), + 1, + Arrays.asList("45fef0e23113e2fcd9570379e2fc1b75") + ); + executeTest("testBasicFunctionality", spec); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java deleted file mode 100755 index 850a3113e..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.sequenom; - -import org.broadinstitute.sting.WalkerTest; -import org.testng.annotations.Test; - -import java.util.Arrays; - -public class PickSequenomProbesIntegrationTest extends WalkerTest { - @Test - public void testProbes() { - String testVCF = validationDataLocation + "complexExample.vcf4"; - String testArgs = "-R " + b36KGReference + " -T PickSequenomProbes -L 1:10,000,000-11,000,000 -B:input,VCF "+testVCF+" -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("6b5409cc78960f1be855536ed89ea9dd")); - executeTest("Test probes", spec); - } - - @Test - public void testProbesUsingDbSNPMask() { - - String md5 = "46d53491af1d3aa0ee1f1e13d68b732d"; - String testVCF = validationDataLocation + "pickSeqIntegrationTest.vcf"; - - String testArgs = "-snp_mask " + validationDataLocation + "pickSeqIntegrationTest.bed -R " - + b36KGReference + " -omitWindow -nameConvention " - + "-project_id 1kgp3_s4_lf -T PickSequenomProbes -B:input,VCF "+testVCF+" -o %s"; - WalkerTestSpec spec1 = new WalkerTestSpec(testArgs, 1, Arrays.asList(md5)); - executeTest("Test probes", spec1); - - testArgs += " -nmw 1"; - WalkerTestSpec spec2 = new WalkerTestSpec(testArgs, 1, Arrays.asList(md5)); - executeTest("Test probes", spec2); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java new file mode 100755 index 000000000..95f4ac0ae --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java @@ -0,0 +1,56 @@ +package org.broadinstitute.sting.gatk.walkers.validation; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: Ghost + * Date: 7/19/11 + * Time: 7:39 PM + * To change this template use File | Settings | File Templates. + */ +public class ValidationAmpliconsIntegrationTest extends WalkerTest { + + @Test(enabled=true) + public void testWikiExample() { + String siteVCF = validationDataLocation + "sites_to_validate.vcf"; + String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf"; + String intervalTable = validationDataLocation + "amplicon_interval_table1.table"; + String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s"; + testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF; + testArgs += " --virtualPrimerSize 30"; + WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, + Arrays.asList("27f9450afa132888a8994167f0035fd7")); + executeTest("Test probes", spec); + } + + @Test(enabled=true) + public void testWikiExampleNoBWA() { + String siteVCF = validationDataLocation + "sites_to_validate.vcf"; + String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf"; + String intervalTable = validationDataLocation + "amplicon_interval_table1.table"; + String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s"; + testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF; + testArgs += " --virtualPrimerSize 30 --doNotUseBWA"; + WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, + Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1")); + executeTest("Test probes", spec); + } + + @Test(enabled=true) + public void testWikiExampleMonoFilter() { + String siteVCF = validationDataLocation + "sites_to_validate.vcf"; + String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf"; + String intervalTable = validationDataLocation + "amplicon_interval_table1.table"; + String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s"; + testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF; + testArgs += " --virtualPrimerSize 30 --filterMonomorphic"; + WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, + Arrays.asList("77b3f30e38fedad812125bdf6cf3255f")); + executeTest("Test probes", spec); + } + +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 2fec2e70f..057053a1c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; +import org.broadinstitute.sting.MD5DB; import org.broadinstitute.sting.WalkerTest; import org.testng.annotations.Test; import org.testng.annotations.DataProvider; @@ -65,8 +66,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { " -NO_HEADER" + " -B:input,VCF " + params.inVCF + " -o %s" + - " -tranchesFile " + getFileForMD5(params.tranchesMD5) + - " -recalFile " + getFileForMD5(params.recalMD5), + " -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) + + " -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null), Arrays.asList(params.cutVCFMD5)); executeTest("testApplyRecalibration-"+params.inVCF, spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index daaab9425..904a5b29b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -70,7 +70,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); } - @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); } @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); } @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); } @@ -81,9 +80,9 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5b82f37df1f5ba40f0474d71c94142ec", false); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); } - @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "c58dca482bf97069eac6d9f1a07a2cba", false); } + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083", false); } @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); } @@ -101,7 +100,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("8b78339ccf7a5a5a837f79e88a3a38e5")); + Arrays.asList("1de95f91ca15d2a8856de35dee0ce33e")); executeTest("threeWayWithRefs", spec); } @@ -119,7 +118,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); } -// @Test public void complexTestFull() { combineComplexSites("", "64b991fd3850f83614518f7d71f0532f"); } + @Test public void complexTestFull() { combineComplexSites("", "b5a53ee92bdaacd2bb3327e9004ae058"); } @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); } @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f72a178137e25dbe0b931934cdc0079d"); } @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java old mode 100644 new mode 100755 index 72647c8e1..1db712353 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -44,7 +44,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest { @Test(enabled = true) public void testComplexVariantsToTable() { WalkerTestSpec spec = new WalkerTestSpec(variantsToTableCmd(" -AMD"), - Arrays.asList("b2a3712c1bfad8f1383ffada8b5017ba")); + Arrays.asList("e8f771995127b727fb433da91dd4ee98")); executeTest("testComplexVariantsToTable", spec).getFirst(); } diff --git a/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java b/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java index aa6303a6f..77db34cbc 100644 --- a/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java @@ -34,7 +34,6 @@ import org.testng.annotations.Test; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.jna.lsf.v7_0_6.LibBat.*; -import javax.jws.soap.SOAPBinding; import java.io.File; /** @@ -55,25 +54,25 @@ public class LibBatIntegrationTest extends BaseTest { @Test public void testReadConfEnv() { - LibLsf.config_param[] unitsParam = (LibLsf.config_param[]) new LibLsf.config_param().toArray(4); + LibLsf.config_param[] configParams = (LibLsf.config_param[]) new LibLsf.config_param().toArray(4); - unitsParam[0].paramName = "LSF_UNIT_FOR_LIMITS"; - unitsParam[1].paramName = "LSF_CONFDIR"; - unitsParam[2].paramName = "MADE_UP_PARAMETER"; + configParams[0].paramName = "LSF_UNIT_FOR_LIMITS"; + configParams[1].paramName = "LSF_CONFDIR"; + configParams[2].paramName = "MADE_UP_PARAMETER"; - Structure.autoWrite(unitsParam); + Structure.autoWrite(configParams); - if (LibLsf.ls_readconfenv(unitsParam[0], null) != 0) { + if (LibLsf.ls_readconfenv(configParams[0], null) != 0) { Assert.fail(LibLsf.ls_sysmsg()); } - Structure.autoRead(unitsParam); + Structure.autoRead(configParams); - System.out.println("LSF_UNIT_FOR_LIMITS: " + unitsParam[0].paramValue); - Assert.assertNotNull(unitsParam[1].paramValue); - Assert.assertNull(unitsParam[2].paramValue); - Assert.assertNull(unitsParam[3].paramName); - Assert.assertNull(unitsParam[3].paramValue); + System.out.println("LSF_UNIT_FOR_LIMITS: " + configParams[0].paramValue); + Assert.assertNotNull(configParams[1].paramValue); + Assert.assertNull(configParams[2].paramValue); + Assert.assertNull(configParams[3].paramName); + Assert.assertNull(configParams[3].paramValue); } @Test diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java index 2f6b589f4..68a2ecf8d 100755 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java @@ -4,6 +4,7 @@ import org.broad.tribble.Tribble; import org.broad.tribble.index.*; import org.broad.tribble.iterators.CloseableTribbleIterator; import org.broad.tribble.source.BasicFeatureSource; +import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.testng.Assert; import org.testng.annotations.Test; @@ -75,7 +76,7 @@ public class IndexFactoryUnitTest { // test that the input index is the same as the one created from the identical input file // test that the dynamic index is the same as the output index, which is equal to the input index - Assert.assertTrue(IndexFactory.onDiskIndexEqualToNewlyCreatedIndex(outputFile, outputFileIndex, new VCFCodec())); + WalkerTest.assertOnDiskIndexEqualToNewlyCreatedIndex(outputFileIndex, "unittest", outputFile); } } } diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java index c4ca6a551..14e63191d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java @@ -2,15 +2,16 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.testng.annotations.Test; -import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringBufferInputStream; +import java.io.*; +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; /** * Created by IntelliJ IDEA. @@ -40,6 +41,52 @@ public class VCFHeaderUnitTest extends BaseTest { checkMD5ofHeaderFile(header, "ad8c4cf85e868b0261ab49ee2c613088"); } + /** + * a little utility function for all tests to md5sum a file + * Shameless taken from: + * + * http://www.javalobby.org/java/forums/t84420.html + * + * @param file the file + * @return a string + */ + private static String md5SumFile(File file) { + MessageDigest digest; + try { + digest = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new ReviewedStingException("Unable to find MD5 digest"); + } + InputStream is; + try { + is = new FileInputStream(file); + } catch (FileNotFoundException e) { + throw new ReviewedStingException("Unable to open file " + file); + } + byte[] buffer = new byte[8192]; + int read; + try { + while ((read = is.read(buffer)) > 0) { + digest.update(buffer, 0, read); + } + byte[] md5sum = digest.digest(); + BigInteger bigInt = new BigInteger(1, md5sum); + return bigInt.toString(16); + + } + catch (IOException e) { + throw new ReviewedStingException("Unable to process file for MD5", e); + } + finally { + try { + is.close(); + } + catch (IOException e) { + throw new ReviewedStingException("Unable to close input stream for MD5 calculation", e); + } + } + } + private void checkMD5ofHeaderFile(VCFHeader header, String md5sum) { File myTempFile = null; PrintWriter pw = null; diff --git a/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java deleted file mode 100644 index 78ab916db..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java +++ /dev/null @@ -1,402 +0,0 @@ -// our package -package org.broadinstitute.sting.utils.threading; - - -// the imports for unit testing. - - -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.testng.Assert; -import org.testng.annotations.*; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.concurrent.*; - -/** - * Basic unit test for GenomeLoc - */ -public class GenomeLocProcessingTrackerUnitTest extends BaseTest { - IndexedFastaSequenceFile fasta = null; - GenomeLocParser genomeLocParser = null; - String chr1 = null; - private final static String FILE_ROOT = "public/testdata/GLPTFile"; - - @BeforeTest - public void before() { - File referenceFile = new File(hg18Reference); - try { - fasta = new IndexedFastaSequenceFile(referenceFile); - chr1 = fasta.getSequenceDictionary().getSequence(1).getSequenceName(); - genomeLocParser = new GenomeLocParser(fasta); - - } - catch(FileNotFoundException ex) { - throw new UserException.CouldNotReadInputFile(referenceFile,ex); - } - } - - @BeforeMethod - public void beforeMethod(Object[] data) { - if ( data.length > 0 ) - ((TestTarget)data[0]).init(); - } - - @AfterMethod - public void afterMethod(Object[] data) { - if ( data.length > 0 ) { - ((TestTarget)data[0]).getTracker().close(); - ((TestTarget)data[0]).cleanup(); - } - } - - abstract private class TestTarget { - String name; - int nShards; - int shardSize; - File file; - - public void init() { cleanup(); } - - public void cleanup() { - if ( file != null && file.exists() ) - file.delete(); - } - - public boolean isThreadSafe() { return true; } - - protected TestTarget(String name, int nShards, int shardSize, File file) { - this.name = name; - this.nShards = nShards; - this.shardSize = shardSize; - this.file = file; - } - - public abstract GenomeLocProcessingTracker getTracker(); - - public List getShards() { - List shards = new ArrayList(); - for ( int i = 0; i < nShards; i++ ) { - int start = shardSize * i; - int stop = start + shardSize; - shards.add(genomeLocParser.createGenomeLoc(chr1, start, stop)); - } - return shards; - } - - public String toString() { - return String.format("TestTarget %s: nShards=%d shardSize=%d", name, nShards, shardSize); - } - } - - @DataProvider(name = "threadData") - public Object[][] createThreadData() { - // gotta keep the tests small... - return createData(Arrays.asList(10, 100), Arrays.asList(10)); - //return createData(Arrays.asList(10, 100, 1000, 10000), Arrays.asList(10)); - } - - public Object[][] createData(List nShards, List shardSizes) { - List params = new ArrayList(); - - int counter = 0; - String name = null; - for ( int nShard : nShards ) { - for ( int shardSize : shardSizes ) { - // shared mem -- canonical implementation - params.add(new TestTarget("ThreadSafeSharedMemory", nShard, shardSize, null) { - GenomeLocProcessingTracker tracker = new SharedMemoryGenomeLocProcessingTracker(new ClosableReentrantLock()); - public GenomeLocProcessingTracker getTracker() { return tracker; } - }); - - final File file1 = new File(String.format("%s_ThreadSafeFileBacked_%d_%d", FILE_ROOT, counter++, nShard, shardSize)); - params.add(new TestTarget("ThreadSafeFileBacked", nShard, shardSize, file1) { - GenomeLocProcessingTracker tracker = new FileBackedGenomeLocProcessingTracker(file1, genomeLocParser, new ClosableReentrantLock(), null); - public GenomeLocProcessingTracker getTracker() { return tracker; } - }); - - name = "FileBackedSharedFileThreadSafe"; - final File file2 = new File(String.format("%s_%s_%d_%d", FILE_ROOT, name, counter++, nShard, shardSize)); - params.add(new TestTarget(name, nShard, shardSize, file2) { - GenomeLocProcessingTracker tracker = new FileBackedGenomeLocProcessingTracker(file2, genomeLocParser, new SharedFileThreadSafeLock(file2, -1), null); - public GenomeLocProcessingTracker getTracker() { return tracker; } - }); - - name = "FileBackedSharedFile"; - final File file3 = new File(String.format("%s_%s_%d_%d", FILE_ROOT, name, counter++, nShard, shardSize)); - params.add(new TestTarget(name, nShard, shardSize, file3) { - GenomeLocProcessingTracker tracker = new FileBackedGenomeLocProcessingTracker(file3, genomeLocParser, new SharedFileLock(file3, -1), null); - public GenomeLocProcessingTracker getTracker() { return tracker; } - public boolean isThreadSafe() { return false; } - }); - } - } - - List params2 = new ArrayList(); - for ( TestTarget x : params ) params2.add(new Object[]{x}); - return params2.toArray(new Object[][]{}); - } - - @DataProvider(name = "simpleData") - public Object[][] createSimpleData() { - return createData(Arrays.asList(1000), Arrays.asList(100)); - } - - private static final String NAME_ONE = "name1"; - private static final String NAME_TWO = "name2"; - - @Test(enabled = true) - public void testNoop() { - GenomeLocProcessingTracker tracker = new NoOpGenomeLocProcessingTracker(); - for ( int start = 1; start < 100; start++ ) { - for ( int n = 0; n < 2; n++ ) { - GenomeLoc loc = genomeLocParser.createGenomeLoc(chr1, start, start +1); - ProcessingLoc ploc = tracker.claimOwnership(loc, NAME_ONE); - Assert.assertTrue(ploc.isOwnedBy(NAME_ONE)); - Assert.assertEquals(tracker.updateAndGetProcessingLocs(NAME_ONE).size(), 0); - } - } - } - - @Test(dataProvider = "simpleData", enabled = true) - public void testSingleProcessTracker(TestTarget test) { - GenomeLocProcessingTracker tracker = test.getTracker(); - List shards = test.getShards(); - logger.warn("testSingleProcessTracker " + test); - - int counter = 0; - for ( GenomeLoc shard : shards ) { - counter++; - - Assert.assertNull(tracker.findOwner(shard, NAME_ONE)); - Assert.assertFalse(tracker.locIsOwned(shard, NAME_ONE)); - - ProcessingLoc proc = tracker.claimOwnership(shard,NAME_ONE); - Assert.assertNotNull(proc); - Assert.assertNotNull(proc.getLocation()); - Assert.assertNotNull(proc.getOwner()); - Assert.assertEquals(proc.getLocation(), shard); - Assert.assertEquals(proc.getOwner(), NAME_ONE); - Assert.assertEquals(tracker.findOwner(shard, NAME_ONE), proc); - Assert.assertTrue(tracker.locIsOwned(shard, NAME_ONE)); - Assert.assertNotNull(tracker.updateAndGetProcessingLocs(NAME_ONE)); - Assert.assertEquals(tracker.updateAndGetProcessingLocs(NAME_ONE).size(), counter); - - ProcessingLoc badClaimAttempt = tracker.claimOwnership(shard,NAME_TWO); - Assert.assertFalse(badClaimAttempt.getOwner().equals(NAME_TWO)); - Assert.assertEquals(badClaimAttempt.getOwner(), NAME_ONE); - } - } - - @Test(dataProvider = "simpleData", enabled = true) - public void testIterator(TestTarget test) { - GenomeLocProcessingTracker tracker = test.getTracker(); - List shards = test.getShards(); - logger.warn("testIterator " + test); - - List markedShards = new ArrayList(); - List toFind = new ArrayList(); - - for ( int i = 0; i < shards.size(); i++ ) { - if ( ! (i % 10 == 0) ) { - markedShards.add(shards.get(i)); - tracker.claimOwnership(shards.get(i), NAME_TWO); - } else { - toFind.add(shards.get(i)); - } - } - - int nFound = 0; - Iterator it = shards.iterator(); - while ( it.hasNext() ) { - GenomeLoc shard = tracker.claimOwnershipOfNextAvailable(it, NAME_ONE); - - if ( shard == null ) { // everything to get is done - Assert.assertEquals(nFound, toFind.size(), "Didn't find all of the available shards"); - } else { - nFound++; - ProcessingLoc proc = tracker.findOwner(shard, NAME_ONE); - - Assert.assertTrue(proc.isOwnedBy(NAME_ONE)); - Assert.assertTrue(! markedShards.contains(shard), "Ran process was already marked!"); - Assert.assertTrue(toFind.contains(shard), "Claimed shard wasn't one of the unmarked!"); - } - } - } - - @Test(dataProvider = "simpleData", enabled = true) - public void testMarkedProcesses(TestTarget test) { - GenomeLocProcessingTracker tracker = test.getTracker(); - List shards = test.getShards(); - logger.warn("testMarkedProcesses " + test); - - List markedShards = new ArrayList(); - - for ( int i = 0; i < shards.size(); i++ ) { - if ( i % 2 == 0 ) { - markedShards.add(shards.get(i)); - tracker.claimOwnership(shards.get(i), NAME_TWO); - } - } - - for ( GenomeLoc shard : shards ) { - ProcessingLoc proc = tracker.claimOwnership(shard,NAME_ONE); - - Assert.assertTrue(proc.isOwnedBy(NAME_ONE) || proc.isOwnedBy(NAME_TWO)); - - if ( proc.isOwnedBy(NAME_ONE) ) - Assert.assertTrue(! markedShards.contains(shard), "Ran process was already marked!"); - else - Assert.assertTrue(markedShards.contains(shard), "Unran process wasn't marked"); - - if ( ! markedShards.contains(shard) ) { - Assert.assertEquals(tracker.findOwner(shard, NAME_ONE), proc); - } - } - } - - public class TestThread implements Callable { - public TestTarget test; - public String name; - public List ran, toRun; - boolean useIterator; - - public TestThread(TestTarget test, int count, List toRun, boolean useIterator) { - this.test = test; - this.toRun = toRun; - this.name = "thread" + count; - this.ran = new ArrayList(); - this.useIterator = useIterator; - } - - public Integer call() { - //logger.warn(String.format("Call() Thread %s", name)); - if ( useIterator ) { - for ( GenomeLoc shard : test.getTracker().onlyOwned(toRun.iterator(), name) ) { - if ( shard != null ) { // ignore the unclaimable end of the stream - ran.add(shard); - // do some work here - for ( int sum =0, i = 0; i < 100000; i++) sum += i; - } - } - - } else { - for ( GenomeLoc shard : toRun ) { - //System.out.printf("Claiming ownership in %s on %s%n", name, shard); - ProcessingLoc proc = test.getTracker().claimOwnership(shard,name); - //System.out.printf(" => ownership of %s is %s (I own? %b)%n", shard, proc.getOwner(), proc.isOwnedBy(name)); - if ( proc.isOwnedBy(name) ) { - ran.add(proc.getLocation()); - // do some work here - for ( int sum =0, i = 0; i < 100000; i++) sum += i; - } - //logger.warn(String.format("Thread %s on %s -> owned by %s", name, shard, proc.getOwner())); - } - } - - return 1; - } - } - - private static TestThread findOwner(String name, List threads) { - for ( TestThread thread : threads ) { - if ( thread.name.equals(name) ) - return thread; - } - return null; - } - - private static final void assertAllThreadsFinished(List> futures) { - try { - for ( Future f : futures ) { - Assert.assertTrue(f.isDone(), "Thread never finished running"); - Assert.assertTrue(f.get() != null, "Finished successfully"); - } - } catch (InterruptedException e) { - Assert.fail("Thread failed to run to completion", e); - } catch (ExecutionException e) { - Assert.fail("Thread generated an exception", e); - } - } - - private static final List subList(List l, int i) { - List r = new ArrayList(); - for ( int j = 0; j < l.size(); j++ ) { - if ( j % i == 0 ) - r.add(l.get(j)); - } - - return r; - } - - @Test(dataProvider = "threadData", enabled = true) - public void testThreadedProcessesLowLevelFunctions(TestTarget test) { - testThreading(test, false); - } - - @Test(dataProvider = "threadData", enabled = true) - public void testThreadedProcessesIterator(TestTarget test) { - testThreading(test, true); - } - - private void testThreading(TestTarget test, boolean useIterator) { - if ( ! test.isThreadSafe() ) - // skip tests that aren't thread safe - return; - - // start up 3 threads - logger.warn("ThreadedTesting " + test + " using iterator " + useIterator); - List threads = new ArrayList(); - for ( int i = 0; i < 4; i++) { - List toRun = subList(test.getShards(), i+1); - TestThread thread = new TestThread(test, i, toRun, useIterator); - threads.add(thread); - } - ExecutorService exec = java.util.concurrent.Executors.newFixedThreadPool(threads.size()); - - try { - List> results = exec.invokeAll(threads, 300, TimeUnit.SECONDS); - GenomeLocProcessingTracker tracker = test.getTracker(); - List shards = test.getShards(); - - for ( TestThread thread : threads ) - logger.warn(String.format("TestThread %s ran %d jobs of %d to run", thread.name, thread.ran.size(), thread.toRun.size())); - - assertAllThreadsFinished(results); - - // we ran everything - Assert.assertEquals(tracker.updateAndGetProcessingLocs(NAME_ONE).size(), shards.size(), "Not all shards were run"); - - for ( GenomeLoc shard : shards ) { - Assert.assertTrue(tracker.locIsOwned(shard, NAME_ONE), "Unowned shard"); - - ProcessingLoc proc = tracker.findOwner(shard, NAME_ONE); - Assert.assertNotNull(proc, "Proc was null"); - - Assert.assertNotNull(proc.getOwner(), "Owner was null"); - Assert.assertEquals(proc.getLocation(), shard, "Shard loc doesn't make ProcessingLoc"); - - TestThread owner = findOwner(proc.getOwner(), threads); - Assert.assertNotNull(owner, "Couldn't find owner"); - - Assert.assertTrue(owner.ran.contains(shard), "Owner doesn't contain ran shard"); - - for ( TestThread thread : threads ) - if ( ! proc.isOwnedBy(thread.name) && thread.ran.contains(shard) ) - Assert.fail("Shard appears in another run list: proc=" + proc + " shard=" + shard + " also in jobs of " + thread.name + " obj=" + thread.ran.get(thread.ran.indexOf(shard))); - - } - } catch (InterruptedException e) { - Assert.fail("Thread failure", e); - } - } -} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index a344817a0..6ed00f0ea 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -19,14 +19,14 @@ public class VariantContextIntegrationTest extends WalkerTest { static HashMap expectations = new HashMap(); static { - expectations.put("-L 1:1-10000 --printPerLocus", "e4ee2eaa3114888e918a1c82df7a027a"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "5b5635e4877d82e8a27d70dac24bda2f"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "ceced3f270b4fe407ee83bc9028becde"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9a9b9e283553c28bf58de1cafa38fe92"); + expectations.put("-L 1:1-10000 --printPerLocus", "e9d96677a57bc3a10fb6d9ba942c19f0"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "8a1174d2b18b98e624abbe93e6af8fdd"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "3933f1fae5453c54c3f791a23de07599"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "c9cf2f01bf045a58dcc7649fd6ea2396"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "033bd952fca048fe1a4f6422b57ab2ed"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5e40980c02797f90821317874426a87a"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "e5a00766f8c1ff9cf92310bafdec3126"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "a103d856e8bc558c949c6e3f184e8913"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5f2265ac6c6d80d64dc6e69a05c1250b"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "06a3ae4c0afa23b429a9491ab7707f3c"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); } @@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest { // this really just tests that we are seeing the same number of objects over all of chr1 WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s", 1, // just one output file - Arrays.asList("529f936aa6c303658b23caf4e527782f")); + Arrays.asList("045a5b02c86aeb9301dc0b724da0c8f7")); executeTest("testLargeScaleConversion", spec); } } diff --git a/public/packages/PicardPrivate.xml b/public/packages/PicardPrivate.xml index 110b41d3f..581c47979 100644 --- a/public/packages/PicardPrivate.xml +++ b/public/packages/PicardPrivate.xml @@ -7,6 +7,8 @@ + + diff --git a/public/packages/Queue.xml b/public/packages/Queue.xml index 58da4398e..589cb45f5 100644 --- a/public/packages/Queue.xml +++ b/public/packages/Queue.xml @@ -41,6 +41,7 @@ + diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 6a47d4b97..959d073c7 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -72,6 +72,9 @@ class DataProcessingPipeline extends QScript { @Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false) var bwaThreads: Int = 1 + @Input(doc="Dont perform validation on the BAM files", fullName="no_validation", shortName="nv", required=false) + var noValidation: Boolean = false + /**************************************************************************** * Global Variables @@ -135,7 +138,7 @@ class DataProcessingPipeline extends QScript { } } - println("\n\n*** DEBUG ***\n") + println("\n\n*** INPUT FILES ***\n") // Creating one file for each sample in the dataset val sampleBamFiles = scala.collection.mutable.Map.empty[String, File] for ((sample, flist) <- sampleTable) { @@ -149,7 +152,7 @@ class DataProcessingPipeline extends QScript { sampleBamFiles(sample) = sampleFileName add(joinBams(flist, sampleFileName)) } - println("*** DEBUG ***\n\n") + println("*** INPUT FILES ***\n\n") return sampleBamFiles.toMap } @@ -246,7 +249,12 @@ class DataProcessingPipeline extends QScript { val preValidateLog = swapExt(bam, ".bam", ".pre.validation") val postValidateLog = swapExt(bam, ".bam", ".post.validation") - add(validate(bam, preValidateLog)) + // Validation is an optional step for the BAM file generated after + // alignment and the final bam file of the pipeline. + if (!noValidation) { + add(validate(bam, preValidateLog), + validate(recalBam, postValidateLog)) + } if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY) add(target(bam, targetIntervals)) @@ -257,8 +265,8 @@ class DataProcessingPipeline extends QScript { recal(dedupedBam, preRecalFile, recalBam), cov(recalBam, postRecalFile), analyzeCovariates(preRecalFile, preOutPath), - analyzeCovariates(postRecalFile, postOutPath), - validate(recalBam, postValidateLog)) + analyzeCovariates(postRecalFile, postOutPath)) + cohortList :+= recalBam } @@ -282,6 +290,13 @@ class DataProcessingPipeline extends QScript { this.isIntermediate = true } + // General arguments to non-GATK tools + trait ExternalCommonArgs extends CommandLineFunction { + this.memoryLimit = 4 + this.isIntermediate = true + } + + case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs { if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY) this.input_file :+= inBams @@ -300,8 +315,8 @@ class DataProcessingPipeline extends QScript { this.targetIntervals = tIntervals this.out = outBam this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) - if (!indels.isEmpty) - this.rodBind :+= RodBind("indels", "VCF", indels) + if (!qscript.indels.isEmpty) + this.rodBind :+= RodBind("indels", "VCF", qscript.indels) this.consensusDeterminationModel = consensusDeterminationModel this.compress = 0 this.scatterCount = nContigs @@ -332,7 +347,6 @@ class DataProcessingPipeline extends QScript { this.isIntermediate = false this.analysisName = queueLogDir + outBam + ".recalibration" this.jobName = queueLogDir + outBam + ".recalibration" - } @@ -350,48 +364,45 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + inRecalFile + ".analyze_covariates" } - case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates { + case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs { + @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input = List(inBam) this.output = outBam this.metrics = metricsFile - this.memoryLimit = 6 - this.isIntermediate = true this.analysisName = queueLogDir + outBam + ".dedup" this.jobName = queueLogDir + outBam + ".dedup" } - case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles { + case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs { + @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input = inBams this.output = outBam - this.memoryLimit = 4 - this.isIntermediate = true this.analysisName = queueLogDir + outBam + ".joinBams" this.jobName = queueLogDir + outBam + ".joinBams" } - case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam { + case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs { + @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input = List(inSam) this.output = outBam this.sortOrder = sortOrderP - this.memoryLimit = 4 - this.isIntermediate = true this.analysisName = queueLogDir + outBam + ".sortSam" this.jobName = queueLogDir + outBam + ".sortSam" } - case class validate (inBam: File, outLog: File) extends ValidateSamFile { + case class validate (inBam: File, outLog: File) extends ValidateSamFile with ExternalCommonArgs { this.input = List(inBam) this.output = outLog this.maxRecordsInRam = 100000 this.REFERENCE_SEQUENCE = qscript.reference - this.memoryLimit = 4 this.isIntermediate = false this.analysisName = queueLogDir + outLog + ".validate" this.jobName = queueLogDir + outLog + ".validate" } - case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups { + case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups with ExternalCommonArgs { + @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input = List(inBam) this.output = outBam this.RGID = readGroup.id @@ -407,12 +418,7 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outBam + ".rg" } - trait BWACommonArgs extends CommandLineFunction { - this.memoryLimit = 4 - this.isIntermediate = true - } - - case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with BWACommonArgs { + case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai @@ -420,7 +426,7 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outSai + ".bwa_aln_se" } - case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with BWACommonArgs { + case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai @@ -428,7 +434,7 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } - case class bwa_sam_se (inBam: File, inSai: File, outBam: File) extends CommandLineFunction with BWACommonArgs { + case class bwa_sam_se (inBam: File, inSai: File, outBam: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Input(doc="bwa alignment index file") var sai = inSai @Output(doc="output aligned bam file") var alignedBam = outBam @@ -437,7 +443,7 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outBam + ".bwa_sam_se" } - case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File) extends CommandLineFunction with BWACommonArgs { + case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Input(doc="bwa alignment index file for 1st mating pair") var sai1 = inSai1 @Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2 diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 150d78019..934cf2a3c 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -15,8 +15,8 @@ class GATKResourcesBundle extends QScript { @Argument(doc="liftOverPerl", required=false) var liftOverPerl: File = new File("./perl/liftOverVCF.pl") - @Argument(shortName = "svn", doc="The SVN version of this release", required=true) - var SVN_VERSION: String = _ + @Argument(shortName = "ver", doc="The SVN version of this release", required=true) + var VERSION: String = _ @Argument(shortName = "bundleDir", doc="Path to root where resource files will be placed", required=false) val BUNDLE_ROOT = new File("/humgen/gsa-hpprojects/GATK/bundle") @@ -32,8 +32,8 @@ class GATKResourcesBundle extends QScript { val SITES_EXT: String = "sites" - def BUNDLE_DIR: File = BUNDLE_ROOT + "/" + SVN_VERSION - def DOWNLOAD_DIR: File = DOWNLOAD_ROOT + "/" + SVN_VERSION + def BUNDLE_DIR: File = BUNDLE_ROOT + "/" + VERSION + def DOWNLOAD_DIR: File = DOWNLOAD_ROOT + "/" + VERSION // REFERENCES class Reference( val name: String, val file: File ) { } @@ -113,6 +113,12 @@ class GATKResourcesBundle extends QScript { addResource(new Resource(hg19.file, "", hg19, false)) addResource(new Resource(hg18.file, "", hg18, false)) + // + // The b37_decoy reference + // + addResource(new Resource("/humgen/1kg/reference/human_g1k_v37_decoy.fasta", + "IGNORE", b37, false, false)) + // // standard VCF files. Will be lifted to each reference // diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala index fca420816..cbe53db8d 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala @@ -20,14 +20,14 @@ class RecalibrateBaseQualities extends QScript { @Input(doc="input BAM file - or list of BAM files", shortName="i", required=true) var input: File = _ - @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false) - var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R") + @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true) + var R: String = _ - @Input(doc="Reference fasta file", shortName="R", required=false) - var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") + @Input(doc="Reference fasta file", shortName="R", required=true) + var reference: File = _ // new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") - @Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false) - var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf") + @Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=true) + var dbSNP: File = _ // new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf") val queueLogDir: String = ".qlog/" var nContigs: Int = 0 @@ -42,8 +42,8 @@ class RecalibrateBaseQualities extends QScript { val recalFile1: File = swapExt(bam, ".bam", ".recal1.csv") val recalFile2: File = swapExt(bam, ".bam", ".recal2.csv") val recalBam: File = swapExt(bam, ".bam", ".recal.bam") - val path1: String = bam + ".before" - val path2: String = bam + ".after" + val path1: String = recalBam + ".before" + val path2: String = recalBam + ".after" add(cov(bam, recalFile1), recal(bam, recalFile1, recalBam), diff --git a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala index 71970a36b..05c1a1775 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala @@ -45,7 +45,7 @@ class QSettings { var jobPriority: Option[Int] = None @Argument(fullName="default_memory_limit", shortName="memLimit", doc="Default memory limit for jobs, in gigabytes.", required=false) - var memoryLimit: Option[Int] = None + var memoryLimit: Option[Double] = None @Argument(fullName="run_directory", shortName="runDir", doc="Root directory to run functions from.", required=false) var runDirectory = new File(".") diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/CommandLineJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/CommandLineJobRunner.scala index 2fbfab5ec..2e3108136 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/CommandLineJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/CommandLineJobRunner.scala @@ -33,12 +33,29 @@ import org.broadinstitute.sting.queue.util.{Logging, IOUtils} */ trait CommandLineJobRunner extends JobRunner[CommandLineFunction] with Logging { + /** The string representation of the identifier of the running job. */ + def jobIdString: String = null + /** A generated exec shell script. */ protected var jobScript: File = _ /** Which directory to use for the job status files. */ protected def jobStatusDir = function.jobTempDir + /** Amount of time a job can go without status before giving up. */ + private val unknownStatusMaxSeconds = 5 * 60 + + /** Last known status */ + protected var lastStatus: RunnerStatus.Value = _ + + /** The last time the status was updated */ + protected var lastStatusUpdate: Long = _ + + final override def status = this.lastStatus + + def residentRequestMB: Option[Double] = function.memoryLimit.map(_ * 1024) + def residentLimitMB: Option[Double] = residentRequestMB.map( _ * 1.2 ) + override def init() { super.init() var exec = new StringBuilder @@ -53,7 +70,21 @@ trait CommandLineJobRunner extends JobRunner[CommandLineFunction] with Logging { } exec.append(function.commandLine) - this.jobScript = IOUtils.writeTempFile(exec.toString, ".exec", "", jobStatusDir) + this.jobScript = IOUtils.writeTempFile(exec.toString(), ".exec", "", jobStatusDir) + } + + protected def updateStatus(updatedStatus: RunnerStatus.Value) { + this.lastStatus = updatedStatus + this.lastStatusUpdate = System.currentTimeMillis + } + + override def checkUnknownStatus() { + val unknownStatusMillis = (System.currentTimeMillis - lastStatusUpdate) + if (unknownStatusMillis > (unknownStatusMaxSeconds * 1000L)) { + // Unknown status has been returned for a while now. + updateStatus(RunnerStatus.FAILED) + logger.error("Unable to read status for %0.2f minutes: job id %d: %s".format(unknownStatusMillis/(60 * 1000D), jobIdString, function.description)) + } } override def cleanup() { diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/JobManager.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/JobManager.scala index d2be4939a..30187f7e2 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/JobManager.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/JobManager.scala @@ -44,9 +44,9 @@ trait JobManager[TFunction <: QFunction, TRunner <: JobRunner[TFunction]] { /** * Updates the status on a list of functions. * @param runners Runners to update. + * @return runners which were updated. */ - def updateStatus(runners: Set[TRunner]) { - } + def updateStatus(runners: Set[TRunner]): Set[TRunner] = Set.empty /** * Stops a list of functions. diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala index 4b4d44988..de5fbde05 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala @@ -52,6 +52,11 @@ trait JobRunner[TFunction <: QFunction] { */ def status: RunnerStatus.Value + /** + * Checks if the status has been unknown for an extended period of time. + */ + def checkUnknownStatus() {} + /** * Returns the function to be run. */ diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala index 8ed3f84c1..a52e9c561 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala @@ -1005,7 +1005,10 @@ class QGraph extends Logging { .asInstanceOf[Set[JobRunner[QFunction]]] if (managerRunners.size > 0) try { - manager.updateStatus(managerRunners) + val updatedRunners = manager.updateStatus(managerRunners) + for (runner <- managerRunners.diff(updatedRunners)) { + runner.checkUnknownStatus() + } } catch { case e => /* ignore */ } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala index 82edf6221..8c639b5bb 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala @@ -40,12 +40,7 @@ class GridEngineJobRunner(val function: CommandLineFunction) extends CommandLine /** Job Id of the currently executing job. */ private var jobId: String = _ - - /** Last known status */ - private var lastStatus: RunnerStatus.Value = _ - - /** The last time the status was updated */ - protected var lastStatusUpdate: Long = _ + override def jobIdString = jobId def start() { GridEngineJobRunner.gridEngineSession.synchronized { @@ -82,11 +77,14 @@ class GridEngineJobRunner(val function: CommandLineFunction) extends CommandLine nativeSpecString += " -q " + function.jobQueue } - // If the memory limit is set (GB) specify the memory limit - if (function.memoryLimit.isDefined) { - val memAvl: String = function.memoryLimit.get + "G" - val memMax: String = (function.memoryLimit.get * 1.2 * 1024).ceil.toInt + "M" - nativeSpecString += " -l mem_free=" + memAvl + ",h_rss=" + memMax + // If the resident set size is requested pass on the memory request + if (residentRequestMB.isDefined) { + nativeSpecString += " -l mem_free=%dM".format(residentRequestMB.get.ceil.toInt) + } + + // If the resident set size limit is defined specify the memory limit + if (residentLimitMB.isDefined) { + nativeSpecString += " -l h_rss=%dM".format(residentLimitMB.get.ceil.toInt) } // If the priority is set (user specified Int) specify the priority @@ -121,21 +119,11 @@ class GridEngineJobRunner(val function: CommandLineFunction) extends CommandLine logger.info("Submitted Grid Engine job id: " + jobId) } } - - def status = this.lastStatus - - private def updateStatus(updatedStatus: RunnerStatus.Value) { - this.lastStatus = updatedStatus - this.lastStatusUpdate = System.currentTimeMillis - } } object GridEngineJobRunner extends Logging { private val gridEngineSession = SessionFactory.getFactory.getSession - /** Amount of time a job can go without status before giving up. */ - private val unknownStatusMaxSeconds = 5 * 60 - initGridEngine() /** @@ -156,16 +144,14 @@ object GridEngineJobRunner extends Logging { /** * Updates the status of a list of jobs. * @param runners Runners to update. + * @return runners which were updated. */ - def updateStatus(runners: Set[GridEngineJobRunner]) { + def updateStatus(runners: Set[GridEngineJobRunner]) = { var updatedRunners = Set.empty[GridEngineJobRunner] gridEngineSession.synchronized { runners.foreach(runner => if (updateRunnerStatus(runner)) {updatedRunners += runner}) } - - for (runner <- runners.diff(updatedRunners)) { - checkUnknownStatus(runner) - } + updatedRunners } /** @@ -219,20 +205,11 @@ object GridEngineJobRunner extends Logging { logger.warn("Unable to determine status of Grid Engine job id " + runner.jobId, de) } - Option(returnStatus) match { - case Some(returnStatus) => - runner.updateStatus(returnStatus) - return true - case None => return false - } - } - - private def checkUnknownStatus(runner: GridEngineJobRunner) { - val unknownStatusSeconds = (System.currentTimeMillis - runner.lastStatusUpdate) - if (unknownStatusSeconds > (unknownStatusMaxSeconds * 1000L)) { - // Unknown status has been returned for a while now. - runner.updateStatus(RunnerStatus.FAILED) - logger.error("Unable to read Grid Engine status for %d minutes: job id %d: %s".format(unknownStatusSeconds/60, runner.jobId, runner.function.description)) + if (returnStatus != null) { + runner.updateStatus(returnStatus) + true + } else { + false } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobManager.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobManager.scala index c0fff9125..23ddab619 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobManager.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobManager.scala @@ -34,6 +34,6 @@ class Lsf706JobManager extends CommandLineJobManager[Lsf706JobRunner] { def runnerType = classOf[Lsf706JobRunner] def create(function: CommandLineFunction) = new Lsf706JobRunner(function) - override def updateStatus(runners: Set[Lsf706JobRunner]) { Lsf706JobRunner.updateStatus(runners) } + override def updateStatus(runners: Set[Lsf706JobRunner]) = { Lsf706JobRunner.updateStatus(runners) } override def tryStop(runners: Set[Lsf706JobRunner]) { Lsf706JobRunner.tryStop(runners) } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala index ac2f036b4..46dd08332 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala @@ -32,8 +32,8 @@ import org.broadinstitute.sting.utils.Utils import org.broadinstitute.sting.jna.clibrary.LibC import org.broadinstitute.sting.jna.lsf.v7_0_6.LibBat.{submitReply, submit} import com.sun.jna.ptr.IntByReference -import com.sun.jna.{StringArray, NativeLong} import org.broadinstitute.sting.queue.engine.{RunnerStatus, CommandLineJobRunner} +import com.sun.jna.{Structure, StringArray, NativeLong} /** * Runs jobs on an LSF compute cluster. @@ -45,12 +45,7 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR /** Job Id of the currently executing job. */ private var jobId = -1L - - /** Last known status */ - private var lastStatus: RunnerStatus.Value = _ - - /** The last time the status was updated */ - protected var lastStatusUpdate: Long = _ + override def jobIdString = jobId.toString /** * Dispatches the function on the LSF cluster. @@ -85,12 +80,19 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR request.options |= LibBat.SUB_QUEUE } - // If the memory limit is set (GB) specify the memory limit - if (function.memoryLimit.isDefined) { - request.resReq = "rusage[mem=" + function.memoryLimit.get + "]" + // If the resident set size is requested pass on the memory request + if (residentRequestMB.isDefined) { + val memInUnits = Lsf706JobRunner.convertUnits(residentRequestMB.get) + request.resReq = "select[mem>%1$d] rusage[mem=%1$d]".format(memInUnits) request.options |= LibBat.SUB_RES_REQ } + // If the resident set size limit is defined specify the memory limit + if (residentLimitMB.isDefined) { + val memInUnits = Lsf706JobRunner.convertUnits(residentLimitMB.get) + request.rLimits(LibLsf.LSF_RLIMIT_RSS) = memInUnits + } + // If the priority is set (user specified Int) specify the priority if (function.jobPriority.isDefined) { request.userPriority = function.jobPriority.get @@ -122,11 +124,13 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR } } - def status = this.lastStatus - - private def updateStatus(updatedStatus: RunnerStatus.Value) { - this.lastStatus = updatedStatus - this.lastStatusUpdate = System.currentTimeMillis + override def checkUnknownStatus() { + // TODO: Need a second pass through either of the two archive logs using lsb_geteventrecbyline() for disappeared jobs. + // Can also tell if we wake up and the last time we saw status was greater than lsb_parameterinfo().cleanPeriod + // LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct) + // LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist) + logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(jobId)) + super.checkUnknownStatus() } } @@ -137,17 +141,8 @@ object Lsf706JobRunner extends Logging { /** Number of seconds for a non-normal exit status before we give up on expecting LSF to retry the function. */ private val retryExpiredSeconds = 5 * 60 - /** Amount of time a job can go without status before giving up. */ - private val unknownStatusMaxSeconds = 5 * 60 - initLsf() - /** The name of the default queue. */ - private var defaultQueue: String = _ - - /** The run limits for each queue. */ - private var queueRlimitRun = Map.empty[String,Int] - /** * Initialize the Lsf library. */ @@ -161,8 +156,9 @@ object Lsf706JobRunner extends Logging { /** * Bulk updates job statuses. * @param runners Runners to update. + * @return runners which were updated. */ - def updateStatus(runners: Set[Lsf706JobRunner]) { + def updateStatus(runners: Set[Lsf706JobRunner]) = { var updatedRunners = Set.empty[Lsf706JobRunner] Lsf706JobRunner.lsfLibLock.synchronized { @@ -192,70 +188,7 @@ object Lsf706JobRunner extends Logging { } } - for (runner <- runners.diff(updatedRunners)) { - checkUnknownStatus(runner) - } - } - - /** - * Tries to stop any running jobs. - * @param runners Runners to stop. - */ - def tryStop(runners: Set[Lsf706JobRunner]) { - lsfLibLock.synchronized { - // lsb_killbulkjobs does not seem to forward SIGTERM, - // only SIGKILL, so send the Ctrl-C (SIGTERM) one by one. - for (runner <- runners.filterNot(_.jobId < 0)) { - try { - if (LibBat.lsb_signaljob(runner.jobId, SIGTERM) < 0) - logger.error(LibBat.lsb_sperror("Unable to kill job " + runner.jobId)) - } catch { - case e => - logger.error("Unable to kill job " + runner.jobId, e) - } - } - } - } - - - /** - * Returns the run limit in seconds for the queue. - * If the queue name is null returns the length of the default queue. - * @param queue Name of the queue or null for the default queue. - * @return the run limit in seconds for the queue. - */ - private def getRlimitRun(queue: String) = { - lsfLibLock.synchronized { - if (queue == null) { - if (defaultQueue != null) { - queueRlimitRun(defaultQueue) - } else { - // Get the info on the default queue. - val numQueues = new IntByReference(1) - val queueInfo = LibBat.lsb_queueinfo(null, numQueues, null, null, 0) - if (queueInfo == null) - throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for the default queue")) - defaultQueue = queueInfo.queue - val limit = queueInfo.rLimits(LibLsf.LSF_RLIMIT_RUN) - queueRlimitRun += defaultQueue -> limit - limit - } - } else { - queueRlimitRun.get(queue) match { - case Some(limit) => limit - case None => - // Cache miss. Go get the run limits from LSF. - val queues = new StringArray(Array[String](queue)) - val numQueues = new IntByReference(1) - val queueInfo = LibBat.lsb_queueinfo(queues, numQueues, null, null, 0) - if (queueInfo == null) - throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for queue: " + queue)) - val limit = queueInfo.rLimits(LibLsf.LSF_RLIMIT_RUN) - queueRlimitRun += queue -> limit - limit - } - } - } + updatedRunners } private def updateRunnerStatus(runner: Lsf706JobRunner, jobInfo: LibBat.jobInfoEnt) { @@ -280,20 +213,6 @@ object Lsf706JobRunner extends Logging { ) } - private def checkUnknownStatus(runner: Lsf706JobRunner) { - // TODO: Need a second pass through either of the two archive logs using lsb_geteventrecbyline() for disappeared jobs. - // Can also tell if we wake up and the last time we saw status was greater than lsb_parameterinfo().cleanPeriod - // LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct) - // LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist) - logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(runner.jobId)) - val unknownStatusMillis = (System.currentTimeMillis - runner.lastStatusUpdate) - if (unknownStatusMillis > (unknownStatusMaxSeconds * 1000L)) { - // Unknown status has been returned for a while now. - runner.updateStatus(RunnerStatus.FAILED) - logger.error("Unable to read LSF status for %0.2f minutes: job id %d: %s".format(unknownStatusMillis/(60 * 1000D), runner.jobId, runner.function.description)) - } - } - /** * Returns true if LSF is expected to retry running the function. * @param exitInfo The reason the job exited. @@ -309,4 +228,86 @@ object Lsf706JobRunner extends Logging { } } } + + /** + * Tries to stop any running jobs. + * @param runners Runners to stop. + */ + def tryStop(runners: Set[Lsf706JobRunner]) { + lsfLibLock.synchronized { + // lsb_killbulkjobs does not seem to forward SIGTERM, + // only SIGKILL, so send the Ctrl-C (SIGTERM) one by one. + for (runner <- runners.filterNot(_.jobId < 0)) { + try { + if (LibBat.lsb_signaljob(runner.jobId, SIGTERM) < 0) + logger.error(LibBat.lsb_sperror("Unable to kill job " + runner.jobId)) + } catch { + case e => + logger.error("Unable to kill job " + runner.jobId, e) + } + } + } + } + + /** The name of the default queue. */ + private lazy val defaultQueue: String = { + lsfLibLock.synchronized { + val numQueues = new IntByReference(1) + val queueInfo = LibBat.lsb_queueinfo(null, numQueues, null, null, 0) + if (queueInfo == null) + throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for the default queue")) + queueInfo.queue + } + } + + /** The run limits for each queue. */ + private var queueRlimitRun = Map.empty[String,Int] + + /** + * Returns the run limit in seconds for the queue. + * If the queue name is null returns the length of the default queue. + * @param queue Name of the queue or null for the default queue. + * @return the run limit in seconds for the queue. + */ + private def getRlimitRun(queueName: String) = { + lsfLibLock.synchronized { + val queue = if (queueName == null) defaultQueue else queueName + queueRlimitRun.get(queue) match { + case Some(limit) => limit + case None => + // Cache miss. Go get the run limits from LSF. + val queues = new StringArray(Array(queue)) + val numQueues = new IntByReference(1) + val queueInfo = LibBat.lsb_queueinfo(queues, numQueues, null, null, 0) + if (queueInfo == null) + throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for queue: " + queue)) + val limit = queueInfo.rLimits(LibLsf.LSF_RLIMIT_RUN) + queueRlimitRun += queue -> limit + limit + } + } + } + + private lazy val unitDivisor: Double = { + lsfLibLock.synchronized { + val unitsParam: Array[LibLsf.config_param] = new LibLsf.config_param().toArray(2).asInstanceOf[Array[LibLsf.config_param]] + unitsParam(0).paramName = "LSF_UNIT_FOR_LIMITS" + + Structure.autoWrite(unitsParam.asInstanceOf[Array[Structure]]) + if (LibLsf.ls_readconfenv(unitsParam(0), null) != 0) + throw new QException(LibBat.lsb_sperror("ls_readconfenv() failed")) + Structure.autoRead(unitsParam.asInstanceOf[Array[Structure]]) + + unitsParam(0).paramValue match { + case "MB" => 1D + case "GB" => 1024D + case "TB" => 1024D * 1024 + case "PB" => 1024D * 1024 * 1024 + case "EB" => 1024D * 1024 * 1024 * 1024 + case null => 1D + } + } + } + + private def convertUnits(mb: Double) = (mb / unitDivisor).ceil.toInt } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala index 603511a30..128d8773c 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala @@ -50,10 +50,10 @@ class ShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRu // Allow advanced users to update the job. updateJobRun(job) - runStatus = RunnerStatus.RUNNING + updateStatus(RunnerStatus.RUNNING) job.run() - runStatus = RunnerStatus.DONE + updateStatus(RunnerStatus.FAILED) } - def status = runStatus + override def checkUnknownStatus() {} } diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala index 2b1abb2d0..c62fdcd7c 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala @@ -9,7 +9,7 @@ trait CommandLineFunction extends QFunction with Logging { def commandLine: String /** Upper memory limit */ - var memoryLimit: Option[Int] = None + var memoryLimit: Option[Double] = None /** Job project to run the command */ var jobProject: String = _ @@ -56,7 +56,7 @@ trait CommandLineFunction extends QFunction with Logging { if (memoryLimit.isEmpty) memoryLimit = qSettings.memoryLimit - super.freezeFieldValues + super.freezeFieldValues() } /** diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala index 72445442e..e8279f62b 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala @@ -47,7 +47,7 @@ trait JavaCommandLineFunction extends CommandLineFunction { /** * Memory limit for the java executable, or if None will use the default memoryLimit. */ - var javaMemoryLimit: Option[Int] = None + var javaMemoryLimit: Option[Double] = None /** * Returns the java executable to run. @@ -61,8 +61,8 @@ trait JavaCommandLineFunction extends CommandLineFunction { null } - override def freezeFieldValues = { - super.freezeFieldValues + override def freezeFieldValues() { + super.freezeFieldValues() if (javaMemoryLimit.isEmpty && memoryLimit.isDefined) javaMemoryLimit = memoryLimit @@ -72,7 +72,7 @@ trait JavaCommandLineFunction extends CommandLineFunction { } def javaOpts = "%s -Djava.io.tmpdir=%s" - .format(optional(" -Xmx", javaMemoryLimit, "g"), jobTempDir) + .format(optional(" -Xmx", javaMemoryLimit.map(gb => (gb * 1024).ceil.toInt), "m"), jobTempDir) def commandLine = "java%s %s" .format(javaOpts, javaExecutable) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 9fb4fa30d..99aaa9474 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -31,7 +31,7 @@ object QScriptUtils { for (bam <- fromFile(in).getLines) if (!bam.startsWith("#") && !bam.isEmpty ) list :+= new File(bam.trim()) - list + list.sortWith(_.compareTo(_) < 0) } /** diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala index dc3cfd9d4..c2c956118 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala @@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.CommandLineProgram import java.util.Date import java.text.SimpleDateFormat import org.broadinstitute.sting.BaseTest +import org.broadinstitute.sting.MD5DB import org.broadinstitute.sting.queue.QCommandLine import org.broadinstitute.sting.queue.util.{Logging, ProcessController} import java.io.{FileNotFoundException, File} @@ -105,7 +106,7 @@ object PipelineTest extends BaseTest with Logging { private def assertMatchingMD5s(name: String, fileMD5s: Traversable[(File, String)], parameterize: Boolean) { var failed = 0 for ((file, expectedMD5) <- fileMD5s) { - val calculatedMD5 = BaseTest.testFileMD5(name, file, expectedMD5, parameterize) + val calculatedMD5 = MD5DB.testFileMD5(name, file, expectedMD5, parameterize) if (!parameterize && expectedMD5 != "" && expectedMD5 != calculatedMD5) failed += 1 } diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala index 0871e769b..7c76823da 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala @@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} class HelloWorldPipelineTest { @Test - def testHelloWorld { + def testHelloWorld() { val spec = new PipelineTestSpec spec.name = "HelloWorld" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" @@ -37,15 +37,23 @@ class HelloWorldPipelineTest { } @Test - def testHelloWorldWithPrefix { + def testHelloWorldWithPrefix() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithPrefix" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala -jobPrefix HelloWorld" PipelineTest.executeTest(spec) } + @Test + def testHelloWorldWithMemoryLimit() { + val spec = new PipelineTestSpec + spec.name = "HelloWorldWithPrefix" + spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala -memLimit 1.25" + PipelineTest.executeTest(spec) + } + @Test(enabled=false) - def testHelloWorldWithPriority { + def testHelloWorldWithPriority() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithPriority" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala -jobPriority 100" diff --git a/public/testdata/exampleBAM.simple.bai b/public/testdata/exampleBAM.simple.bai new file mode 100644 index 000000000..2d8268b1d Binary files /dev/null and b/public/testdata/exampleBAM.simple.bai differ diff --git a/public/testdata/exampleBAM.simple.bam b/public/testdata/exampleBAM.simple.bam new file mode 100644 index 000000000..c3eb7ae7b Binary files /dev/null and b/public/testdata/exampleBAM.simple.bam differ diff --git a/settings/helpTemplates/common.html b/settings/helpTemplates/common.html new file mode 100644 index 000000000..1554a1d40 --- /dev/null +++ b/settings/helpTemplates/common.html @@ -0,0 +1,15 @@ +<#macro makeHeader title> + + ${title} + + + + +<#macro headerInfo> + + +<#macro footerInfo> +

See also Main index | GATK wiki | GATK support forum

+

GATK version ${version} built at ${timestamp}.

+ + diff --git a/settings/helpTemplates/generic.index.template.html b/settings/helpTemplates/generic.index.template.html new file mode 100644 index 000000000..6c9e9f4e8 --- /dev/null +++ b/settings/helpTemplates/generic.index.template.html @@ -0,0 +1,36 @@ +<#include "common.html"/> + +<#macro emitGroup group> + +

${group.name}

+

+ ${group.summary} +

+

+ + + + <#list data as datum> + <#if datum.group == group.name> + + + + + + +
NameSummary
${datum.name}${datum.summary}
+ + + +<@makeHeader title="GATK documentation index"/> + +

GATK documentation index

+ <@headerInfo /> + <#list groups?sort_by("name") as group> + <@emitGroup group=group/> + + + <@footerInfo /> + + + diff --git a/settings/helpTemplates/generic.template.html b/settings/helpTemplates/generic.template.html new file mode 100644 index 000000000..032407164 --- /dev/null +++ b/settings/helpTemplates/generic.template.html @@ -0,0 +1,120 @@ +<#include "common.html"/> + +<#macro argumentlist name myargs> + <#if myargs?size != 0> + ${name} + <#list myargs as arg> + + ${arg.name} + ${arg.type} + ${arg.defaultValue!"No default"} + ${arg.summary} + + <#-- + ${arg.required} + --> + + + + +<#macro argumentDetails arg> +

${arg.name}<#if arg.synonyms??> / ${arg.synonyms} + (<#if arg.attributes??>${arg.attributes} ${arg.type}<#if arg.defaultValue??> with default value ${arg.defaultValue})

+ ${arg.summary}. ${arg.fulltext}
+ <#if arg.options??> +

The ${arg.name} argument is an enumerated type (${arg.type}), which can have one of the following values:

+
+ <#list arg.options as option> +
${option.name} +
${option.summary} + +
+ + + +<#macro relatedByType name type> + <#list relatedDocs as relatedDoc> + <#if relatedDoc.relation == type> +

${name}

+
    + <#list relatedDocs as relatedDoc> + <#if relatedDoc.relation == type> +
  • ${relatedDoc.name} is a ${relatedDoc.relation}
  • + + +
+ <#break> + + + + + +<@makeHeader title="${name} documentation"/> + +

${name}

+ <@headerInfo /> +

${summary}

+ <#if author??> +

Author

+ ${author} + +

Introduction

+ ${description} + + <#-- Create the argument summary --> + <#if arguments.all?size != 0> +
+

${name} specific arguments

+ + + + + + + + + + + <@argumentlist name="Required" myargs=arguments.required/> + <@argumentlist name="Optional" myargs=arguments.optional/> + <@argumentlist name="Hidden" myargs=arguments.hidden/> + <@argumentlist name="Depreciated" myargs=arguments.depreciated/> + +
NameTypeDefault valueSummary
+ + + <#-- Create references to additional capabilities if appropriate --> + <#if extradocs?size != 0> +
+

Additional capabilities

+ The arguments described in the entries below can be supplied to this tool to modify + its behavior. For example, the -L argument directs the GATK engine restricts processing + to specific genomic intervals. This capability is available to all GATK walkers. + + + + <#-- This class is related to other documented classes via sub/super relationships --> + <#if relatedDocs?size != 0> +
+

Related capabilities

+ <@relatedByType name="Superclasses" type="superclass"/> + <@relatedByType name="Subclasses" type="subclass"/> + + + <#-- List all of the --> + <#if arguments.all?size != 0> +
+ <#-- Create the argument details --> +

Argument details

+ <#list arguments.all as arg> + <@argumentDetails arg=arg/> + + + + <@footerInfo /> + + diff --git a/settings/helpTemplates/style.css b/settings/helpTemplates/style.css new file mode 100644 index 000000000..1d7bcc576 --- /dev/null +++ b/settings/helpTemplates/style.css @@ -0,0 +1,134 @@ +body +{ + background-color: #ffffff; + color: #202020; +} + +body, p, ul, ol, dl +{ + font-family: Corbel, Verdana, "Lucida Grande", "Lucida Sans Unicode", Sans-Serif; +} + +p, ul, ol, dl, dt, dd, td +{ + font-size: 12pt; +} + +p +{ + margin-left: 1em; +} + +p.summary +{ + margin-left: 2em; + margin-top: -20pt; + font-style: italic; +} + +p.see-also +{ + font-size: 10pt; + margin-left: 0em; + margin-top: 3em; + text-align: center; +} + +p.version +{ + font-size: 8pt; + margin-left: 0em; + margin-top: -8pt; + text-align: center; +} + + +h1, h2, h3, h4 +{ + font-family: Corbel, Arial, Helvetica, Sans-Serif; + font-weight: bold; + text-align: left; +} + +h1 +{ + font-size: 32pt; + letter-spacing: -2px; + color: #669; +} + +h2 +{ + font-size: 16pt; + font-weight: bold; + margin-top: 2em; + color: #669; +} + +h3 +{ + font-size: 12pt; + margin-left: 1em; + color: #000; +} + +hr +{ + margin-top: 4em; +} + +/* + * enum DT layout +*/ + +dl { + border: 1px solid #ccc; +} + +dt { + font-weight: bold; + text-decoration: underline; +} + +dd { + margin: 0; + padding: 0 0 0.5em 0; +} + +/* + * clean table layouts +*/ +#hor-minimalist-b +{ + font-family: "Lucida Sans Unicode", "Lucida Grande", Sans-Serif; + font-size: 12px; + background: #fff; + margin: 5px; + width: 100%; + border-collapse: collapse; + text-align: left; +} +#hor-minimalist-b th +{ + font-size: 14px; + font-weight: normal; + color: #039; + padding: 10px 8px; + border-bottom: 2px solid #6678b1; +} +#hor-minimalist-b td +{ + border-bottom: 1px solid #ccc; + color: #669; + padding: 6px 8px; +} +#hor-minimalist-b tbody tr:hover td +{ + color: #009; +} + +th#row-divider +{ + font-weight: bolder; + font-size: larger; +} \ No newline at end of file diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1941.jar b/settings/repository/edu.mit.broad/picard-private-parts-1941.jar deleted file mode 100644 index 760db5cb8..000000000 Binary files a/settings/repository/edu.mit.broad/picard-private-parts-1941.jar and /dev/null differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1959.jar b/settings/repository/edu.mit.broad/picard-private-parts-1959.jar new file mode 100644 index 000000000..ae11e636b Binary files /dev/null and b/settings/repository/edu.mit.broad/picard-private-parts-1959.jar differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1941.xml b/settings/repository/edu.mit.broad/picard-private-parts-1959.xml similarity index 58% rename from settings/repository/edu.mit.broad/picard-private-parts-1941.xml rename to settings/repository/edu.mit.broad/picard-private-parts-1959.xml index 07d51ae53..e7c7e3a21 100644 --- a/settings/repository/edu.mit.broad/picard-private-parts-1941.xml +++ b/settings/repository/edu.mit.broad/picard-private-parts-1959.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/net.sf/picard-1.47.869.xml b/settings/repository/net.sf/picard-1.47.869.xml deleted file mode 100644 index 86d07d4fa..000000000 --- a/settings/repository/net.sf/picard-1.47.869.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.47.869.jar b/settings/repository/net.sf/picard-1.49.895.jar similarity index 87% rename from settings/repository/net.sf/picard-1.47.869.jar rename to settings/repository/net.sf/picard-1.49.895.jar index d277fd217..3ee1f2090 100644 Binary files a/settings/repository/net.sf/picard-1.47.869.jar and b/settings/repository/net.sf/picard-1.49.895.jar differ diff --git a/settings/repository/net.sf/picard-1.49.895.xml b/settings/repository/net.sf/picard-1.49.895.xml new file mode 100644 index 000000000..52d4900c5 --- /dev/null +++ b/settings/repository/net.sf/picard-1.49.895.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.47.869.xml b/settings/repository/net.sf/sam-1.47.869.xml deleted file mode 100644 index 1b76fe2f9..000000000 --- a/settings/repository/net.sf/sam-1.47.869.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/sam-1.47.869.jar b/settings/repository/net.sf/sam-1.49.895.jar similarity index 91% rename from settings/repository/net.sf/sam-1.47.869.jar rename to settings/repository/net.sf/sam-1.49.895.jar index 933b9cfd6..c55ab0b72 100644 Binary files a/settings/repository/net.sf/sam-1.47.869.jar and b/settings/repository/net.sf/sam-1.49.895.jar differ diff --git a/settings/repository/net.sf/sam-1.49.895.xml b/settings/repository/net.sf/sam-1.49.895.xml new file mode 100644 index 000000000..0436ce881 --- /dev/null +++ b/settings/repository/net.sf/sam-1.49.895.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/org.broad/tribble-3.jar b/settings/repository/org.broad/tribble-16.jar similarity index 67% rename from settings/repository/org.broad/tribble-3.jar rename to settings/repository/org.broad/tribble-16.jar index f0ab44a05..331f28ec3 100644 Binary files a/settings/repository/org.broad/tribble-3.jar and b/settings/repository/org.broad/tribble-16.jar differ diff --git a/settings/repository/org.broad/tribble-3.xml b/settings/repository/org.broad/tribble-16.xml similarity index 57% rename from settings/repository/org.broad/tribble-3.xml rename to settings/repository/org.broad/tribble-16.xml index c35358331..e23eec339 100644 --- a/settings/repository/org.broad/tribble-3.xml +++ b/settings/repository/org.broad/tribble-16.xml @@ -1,4 +1,4 @@ -