@@ -30,6 +33,9 @@
+
+
+
diff --git a/public/R/queueJobReport.R b/public/R/queueJobReport.R
new file mode 100644
index 000000000..31916361e
--- /dev/null
+++ b/public/R/queueJobReport.R
@@ -0,0 +1,169 @@
+library(gsalib)
+require("ggplot2")
+require("gplots")
+
+#
+# Standard command line switch. Can we loaded interactively for development
+# or executed with RScript
+#
+args = commandArgs(TRUE)
+onCMDLine = ! is.na(args[1])
+if ( onCMDLine ) {
+ inputFileName = args[1]
+ outputPDF = args[2]
+} else {
+ #inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
+ inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
+ #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
+ outputPDF = NA
+}
+
+RUNTIME_UNITS = "(sec)"
+ORIGINAL_UNITS_TO_SECONDS = 1/1000
+
+#
+# Helper function to aggregate all of the jobs in the report across all tables
+#
+allJobsFromReport <- function(report) {
+ names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts")
+ sub <- lapply(report, function(table) table[,names])
+ do.call("rbind", sub)
+}
+
+#
+# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
+#
+plotJobsGantt <- function(gatkReport, sortOverall) {
+ allJobs = allJobsFromReport(gatkReport)
+ if ( sortOverall ) {
+ title = "All jobs, by analysis, by start time"
+ allJobs = allJobs[order(allJobs$analysisName, allJobs$startTime, decreasing=T), ]
+ } else {
+ title = "All jobs, sorted by start time"
+ allJobs = allJobs[order(allJobs$startTime, decreasing=T), ]
+ }
+ allJobs$index = 1:nrow(allJobs)
+ minTime = min(allJobs$startTime)
+ allJobs$relStartTime = allJobs$startTime - minTime
+ allJobs$relDoneTime = allJobs$doneTime - minTime
+ allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
+ maxRelTime = max(allJobs$relDoneTime)
+ p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
+ p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
+ p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
+ p <- p + xlim(0, maxRelTime * 1.1)
+ p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
+ p <- p + ylab("Job")
+ p <- p + opts(title=title)
+ print(p)
+}
+
+#
+# Plots scheduling efficiency at job events
+#
+plotProgressByTime <- function(gatkReport) {
+ allJobs = allJobsFromReport(gatkReport)
+ nJobs = dim(allJobs)[1]
+ allJobs = allJobs[order(allJobs$startTime, decreasing=F),]
+ allJobs$index = 1:nrow(allJobs)
+
+ minTime = min(allJobs$startTime)
+ allJobs$relStartTime = allJobs$startTime - minTime
+ allJobs$relDoneTime = allJobs$doneTime - minTime
+
+ times = sort(c(allJobs$relStartTime, allJobs$relDoneTime))
+
+ countJobs <- function(p) {
+ s = allJobs$relStartTime
+ e = allJobs$relDoneTime
+ x = c() # I wish I knew how to make this work with apply
+ for ( time in times )
+ x = c(x, sum(p(s, e, time)))
+ x
+ }
+
+ pending = countJobs(function(s, e, t) s > t)
+ done = countJobs(function(s, e, t) e < t)
+ running = nJobs - pending - done
+
+ d = data.frame(times=times, pending=pending, running=running, done=done)
+
+ p <- ggplot(data=melt(d, id.vars=c("times")), aes(x=times, y=value, color=variable))
+ p <- p + facet_grid(variable ~ ., scales="free")
+ p <- p + geom_line(size=2)
+ p <- p + xlab(paste("Time since start of first job", RUNTIME_UNITS))
+ p <- p + opts(title = "Job scheduling")
+ print(p)
+}
+
+#
+# Creates tables for each job in this group
+#
+standardColumns = c("jobName", "startTime", "formattedStartTime", "analysisName", "intermediate", "exechosts", "formattedDoneTime", "doneTime", "runtime")
+plotGroup <- function(groupTable) {
+ name = unique(groupTable$analysisName)[1]
+ groupAnnotations = setdiff(names(groupTable), standardColumns)
+ sub = groupTable[,c("jobName", groupAnnotations, "runtime")]
+ sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ]
+
+ # create a table showing each job and all annotations
+ textplot(sub, show.rownames=F)
+ title(paste("Job summary for", name, "full itemization"), cex=3)
+
+ # create the table for each combination of values in the group, listing iterations in the columns
+ sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean)
+ textplot(as.data.frame(sum), show.rownames=F)
+ title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
+
+ # histogram of job times by groupAnnotations
+ if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
+ # todo -- how do we group by annotations?
+ p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
+ p <- p + xlab("runtime in seconds") + ylab("No. of jobs")
+ p <- p + opts(title=paste("Job runtime histogram for", name))
+ print(p)
+ }
+
+ # as above, but averaging over all iterations
+ groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
+ if ( dim(sub)[1] > 1 ) {
+ sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
+ textplot(as.data.frame(sum), show.rownames=F)
+ title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
+ }
+}
+
+# print out some useful basic information
+print("Report")
+print(paste("Project :", inputFileName))
+
+convertUnits <- function(gatkReportData) {
+ convertGroup <- function(g) {
+ g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
+ g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS
+ g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS
+ g
+ }
+ lapply(gatkReportData, convertGroup)
+}
+
+
+# read the table
+gatkReportData <- gsa.read.gatkreport(inputFileName)
+gatkReportData <- convertUnits(gatkReportData)
+#print(summary(gatkReportData))
+
+if ( ! is.na(outputPDF) ) {
+ pdf(outputPDF, height=8.5, width=11)
+}
+
+plotJobsGantt(gatkReportData, T)
+plotJobsGantt(gatkReportData, F)
+plotProgressByTime(gatkReportData)
+for ( group in gatkReportData ) {
+ plotGroup(group)
+}
+
+if ( ! is.na(outputPDF) ) {
+ dev.off()
+}
diff --git a/public/R/src/gsalib/R/gsa.read.gatkreport.R b/public/R/src/gsalib/R/gsa.read.gatkreport.R
index 9b3ef1ad1..011b5240d 100644
--- a/public/R/src/gsalib/R/gsa.read.gatkreport.R
+++ b/public/R/src/gsalib/R/gsa.read.gatkreport.R
@@ -20,6 +20,20 @@
assign(tableName, d, envir=tableEnv);
}
+# Read a fixed width line of text into a list.
+.gsa.splitFixedWidth <- function(line, columnStarts) {
+ splitStartStop <- function(x) {
+ x = substring(x, starts, stops);
+ x = gsub("^[[:space:]]+|[[:space:]]+$", "", x);
+ x;
+ }
+
+ starts = c(1, columnStarts);
+ stops = c(columnStarts - 1, nchar(line));
+
+ sapply(line, splitStartStop)[,1];
+}
+
# Load all GATKReport tables from a file
gsa.read.gatkreport <- function(filename) {
con = file(filename, "r", blocking = TRUE);
@@ -31,9 +45,10 @@ gsa.read.gatkreport <- function(filename) {
tableName = NA;
tableHeader = c();
tableRows = c();
+ version = NA;
for (line in lines) {
- if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
+ if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) {
headerFields = unlist(strsplit(line, "[[:space:]]+"));
if (!is.na(tableName)) {
@@ -43,13 +58,37 @@ gsa.read.gatkreport <- function(filename) {
tableName = headerFields[2];
tableHeader = c();
tableRows = c();
+
+ # For differences in versions see
+ # $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
+ if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
+ version = "v0.1";
+
+ } else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) {
+ version = "v0.2";
+ columnStarts = c();
+
+ }
+
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
# do nothing
} else if (!is.na(tableName)) {
- row = unlist(strsplit(line, "[[:space:]]+"));
+
+ if (version == "v0.1") {
+ row = unlist(strsplit(line, "[[:space:]]+"));
+
+ } else if (version == "v0.2") {
+ if (length(tableHeader) == 0) {
+ headerChars = unlist(strsplit(line, ""));
+ # Find the first position of non space characters, excluding the first character
+ columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
+ }
+
+ row = .gsa.splitFixedWidth(line, columnStarts);
+ }
if (length(tableHeader) == 0) {
- tableHeader = row;
+ tableHeader = row;
} else {
tableRows = rbind(tableRows, row);
}
diff --git a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java
index 8825c3767..6c8fe1834 100644
--- a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java
+++ b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java
@@ -25,7 +25,6 @@
package net.sf.picard.reference;
-import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*;
@@ -39,8 +38,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
* Produces fai file with same output as samtools faidx
*/
public class FastaSequenceIndexBuilder {
- public File fastaFile;
- ReferenceDataSourceProgressListener progress; // interface that provides a method for updating user on progress of reading file
+ final public File fastaFile;
+ final boolean printProgress;
// keep track of location in file
long bytesRead, endOfLastLine, lastTimestamp, fileLength; // initialized to -1 to keep 0-indexed position in file;
@@ -55,10 +54,10 @@ public class FastaSequenceIndexBuilder {
public enum Status { NONE, CONTIG, FIRST_SEQ_LINE, SEQ_LINE, COMMENT }
Status status = Status.NONE; // keeps state of what is currently being read. better to use int instead of enum?
- public FastaSequenceIndexBuilder(File fastaFile, ReferenceDataSourceProgressListener progress) {
- this.progress = progress;
+ public FastaSequenceIndexBuilder(File fastaFile, boolean printProgress) {
this.fastaFile = fastaFile;
fileLength = fastaFile.length();
+ this.printProgress = printProgress;
}
/**
@@ -252,8 +251,8 @@ public class FastaSequenceIndexBuilder {
if (System.currentTimeMillis() - lastTimestamp > 10000) {
int percentProgress = (int) (100*bytesRead/fileLength);
- if (progress != null)
- progress.percentProgress(percentProgress);
+ if (printProgress)
+ System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percentProgress));
lastTimestamp = System.currentTimeMillis();
}
}
diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
index b9e380295..7ea515591 100755
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@@ -31,30 +31,85 @@ import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.*;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
import java.util.regex.Pattern;
/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Dec 1, 2009
+ * Call R scripts to plot residual error versus the various covariates.
+ *
+ *
+ * After counting covariates in either the initial BAM File or again in the recalibrated BAM File, an analysis tool is available which
+ * reads the .csv file and outputs several PDF (and .dat) files for each read group in the given BAM. These PDF files graphically
+ * show the various metrics and characteristics of the reported quality scores (often in relation to the empirical qualities).
+ * In order to show that any biases in the reported quality scores have been generally fixed through recalibration one should run
+ * CountCovariates again on a bam file produced by TableRecalibration. In this way users can compare the analysis plots generated
+ * by pre-recalibration and post-recalibration .csv files. Our usual chain of commands that we use to generate plots of residual
+ * error is: CountCovariates, TableRecalibrate, samtools index on the recalibrated bam file, CountCovariates again on the recalibrated
+ * bam file, and then AnalyzeCovariates on both the before and after recal_data.csv files to see the improvement in recalibration.
+ *
+ *
+ * The color coding along with the RMSE is included in the plots to give some indication of the number of observations that went into
+ * each of the quality score estimates. It is defined as follows for N, the number of observations:
+ *
+ *
+ * light blue means N < 1,000
+ * cornflower blue means 1,000 <= N < 10,000
+ * dark blue means N >= 10,000
+ * The pink dots indicate points whose quality scores are special codes used by the aligner and which are mathematically
+ * meaningless and so aren't included in any of the numerical calculations.
+ *
+ *
+ *
+ * NOTE: For those running this tool externally from the Broad, it is crucial to note that both the -Rscript and -resources options
+ * must be changed from the default. -Rscript needs to point to your installation of Rscript (this is the scripting version of R,
+ * not the interactive version) while -resources needs to point to the folder holding the R scripts that are used. For those using
+ * this tool as part of the Binary Distribution the -resources should point to the resources folder that is part of the tarball.
+ * For those using this tool by building from the git repository the -resources should point to the R/ subdirectory of the Sting checkout.
+ *
+ *
+ * See the GATK wiki for a tutorial and example recalibration accuracy plots.
+ * http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration
+ *
+ *
Input
+ *
+ * The recalibration table file in CSV format that was generated by the CountCovariates walker.
+ *
+ *
+ * Examples
+ *
+ * java -Xmx4g -jar AnalyzeCovariates.jar \
+ * -recalFile /path/to/recal.table.csv \
+ * -outputDir /path/to/output_dir/ \
+ * -resources resources/ \
+ * -ignoreQ 5
+ *
*
- * Create collapsed versions of the recal csv file and call R scripts to plot residual error versus the various covariates.
*/
+@DocumentedGATKFeature(
+ groupName = "AnalyzeCovariates",
+ summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator")
public class AnalyzeCovariates extends CommandLineProgram {
/////////////////////////////
// Command Line Arguments
/////////////////////////////
-
+ /**
+ * After the header, data records occur one per line until the end of the file. The first several items on a line are the
+ * values of the individual covariates and will change depending on which covariates were specified at runtime. The last
+ * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
+ * and the raw empirical quality score calculated by phred-scaling the mismatch rate.
+ */
@Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
private String RECAL_FILE = "output.recal_data.csv";
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
@@ -65,13 +120,22 @@ public class AnalyzeCovariates extends CommandLineProgram {
private String PATH_TO_RESOURCES = "public/R/";
@Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
private int IGNORE_QSCORES_LESS_THAN = 5;
- @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
+ @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups
+
+ /**
+ * Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation
+ * by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later.
+ */
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 50")
private int MAX_QUALITY_SCORE = 50;
+
+ /**
+ * This argument is useful for comparing before/after plots and you want the axes to match each other.
+ */
@Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots")
private int MAX_HISTOGRAM_VALUE = 0;
- @Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, this value will be the max value of the histogram plots")
+ @Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, do indel quality plotting")
private boolean DO_INDEL_QUALITY = false;
@@ -261,13 +325,14 @@ public class AnalyzeCovariates extends CommandLineProgram {
}
private void callRScripts() {
+ RScriptExecutor.RScriptArgumentCollection argumentCollection =
+ new RScriptExecutor.RScriptArgumentCollection(PATH_TO_RSCRIPT, Arrays.asList(PATH_TO_RESOURCES));
+ RScriptExecutor executor = new RScriptExecutor(argumentCollection, true);
int numReadGroups = 0;
-
+
// for each read group
for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
-
- Process p;
if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) {
String readGroup = readGroupKey.toString();
@@ -276,35 +341,19 @@ public class AnalyzeCovariates extends CommandLineProgram {
// for each covariate
for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
Covariate cov = requestedCovariates.get(iii);
- try {
-
- if (DO_INDEL_QUALITY) {
- p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_indelQuality.R" + " " +
- OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
- cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
- p.waitFor();
-
- } else {
+ final String outputFilename = OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat";
+ if (DO_INDEL_QUALITY) {
+ executor.callRScripts("plot_indelQuality.R", outputFilename,
+ cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
+ } else {
if( iii == 1 ) {
- // Analyze reported quality
- p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " +
- OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
- IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE + " " + MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
- p.waitFor();
- } else { // Analyze all other covariates
- p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_OtherCovariate.R" + " " +
- OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
- cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
- p.waitFor();
- }
+ // Analyze reported quality
+ executor.callRScripts("plot_residualError_QualityScoreCovariate.R", outputFilename,
+ IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
+ } else { // Analyze all other covariates
+ executor.callRScripts("plot_residualError_OtherCovariate.R", outputFilename,
+ cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
}
- } catch (InterruptedException e) {
- e.printStackTrace();
- System.exit(-1);
- } catch (IOException e) {
- System.out.println("Fatal Exception: Perhaps RScript jobs are being spawned too quickly? One work around is to process fewer read groups using the -numRG option.");
- e.printStackTrace();
- System.exit(-1);
}
}
} else { // at the maximum number of read groups so break out
diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/package-info.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/package-info.java
new file mode 100644
index 000000000..9350e4a66
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Package to plot residual accuracy versus error covariates for the base quality score recalibrator.
+ */
+package org.broadinstitute.sting.analyzecovariates;
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/commandline/Advanced.java b/public/java/src/org/broadinstitute/sting/commandline/Advanced.java
new file mode 100644
index 000000000..7aeefe261
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/commandline/Advanced.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.commandline;
+
+import java.lang.annotation.*;
+
+/**
+ * Indicates that a walker argument should is considered an advanced option.
+ *
+ * @author Mark DePristo
+ * @version 0.1
+ */
+@Documented
+@Inherited
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.TYPE,ElementType.FIELD})
+public @interface Advanced {
+}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java
index 9f92df6e0..8e3f753a8 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java
@@ -174,7 +174,8 @@ public class ArgumentDefinitions implements Iterable {
static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() {
public boolean matches( ArgumentDefinition definition, Object key ) {
- return definition.validation != null;
+ // We can perform some sort of validation for anything that isn't a flag.
+ return !definition.isFlag;
}
};
}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
index 60ed8c899..351583c07 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
@@ -44,7 +44,7 @@ public class ArgumentMatch implements Iterable {
public final String label;
/**
- * Maps indicies of command line arguments to values paired with that argument.
+ * Maps indices of command line arguments to values paired with that argument.
*/
public final SortedMap> indices = new TreeMap>();
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java
index f48ca864a..8ec0d650a 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java
@@ -151,6 +151,14 @@ public class ArgumentSource {
return field.isAnnotationPresent(Hidden.class) || field.isAnnotationPresent(Deprecated.class);
}
+ /**
+ * Is the given argument considered an advanced option when displaying on the command-line argument system.
+ * @return True if so. False otherwise.
+ */
+ public boolean isAdvanced() {
+ return field.isAnnotationPresent(Advanced.class);
+ }
+
/**
* Is this command-line argument dependent on some primitive argument types?
* @return True if this command-line argument depends on other arguments; false otherwise.
@@ -175,13 +183,17 @@ public class ArgumentSource {
return typeDescriptor.createsTypeDefault(this);
}
+ public String typeDefaultDocString() {
+ return typeDescriptor.typeDefaultDocString(this);
+ }
+
/**
* Generates a default for the given type.
* @param parsingEngine the parsing engine used to validate this argument type descriptor.
* @return A default value for the given type.
*/
public Object createTypeDefault(ParsingEngine parsingEngine) {
- return typeDescriptor.createTypeDefault(parsingEngine,this,field.getType());
+ return typeDescriptor.createTypeDefault(parsingEngine,this,field.getGenericType());
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
index 9c33e084d..5fff8f609 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
@@ -26,6 +26,8 @@
package org.broadinstitute.sting.commandline;
import org.apache.log4j.Logger;
+import org.broad.tribble.Feature;
+import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.walkers.Multiplex;
import org.broadinstitute.sting.gatk.walkers.Multiplexer;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
@@ -33,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
+import java.io.File;
import java.lang.annotation.Annotation;
import java.lang.reflect.*;
import java.util.*;
@@ -80,14 +83,26 @@ public abstract class ArgumentTypeDescriptor {
*/
public boolean createsTypeDefault(ArgumentSource source) { return false; }
+ /**
+ * Returns a documentation-friendly value for the default of a type descriptor.
+ * Must be overridden if createsTypeDefault return true. cannot be called otherwise
+ * @param source Source of the command-line argument.
+ * @return Friendly string of the default value, for documentation. If doesn't create a default, throws
+ * and UnsupportedOperationException
+ */
+ public String typeDefaultDocString(ArgumentSource source) {
+ throw new UnsupportedOperationException();
+ }
+
/**
* Generates a default for the given type.
+ *
* @param parsingEngine the parsing engine used to validate this argument type descriptor.
* @param source Source of the command-line argument.
* @param type Type of value to create, in case the command-line argument system wants influence.
* @return A default value for the given type.
*/
- public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class> type) { throw new UnsupportedOperationException("Unable to create default for type " + getClass()); }
+ public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { throw new UnsupportedOperationException("Unable to create default for type " + getClass()); }
/**
* Given the given argument source and attributes, synthesize argument definitions for command-line arguments.
@@ -109,7 +124,7 @@ public abstract class ArgumentTypeDescriptor {
* @return The parsed object.
*/
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) {
- return parse(parsingEngine, source, source.field.getType(), matches);
+ return parse(parsingEngine, source, source.field.getGenericType(), matches);
}
/**
@@ -131,18 +146,18 @@ public abstract class ArgumentTypeDescriptor {
protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) {
Annotation argumentAnnotation = getArgumentAnnotation(source);
return new ArgumentDefinition( ArgumentIOType.getIOType(argumentAnnotation),
- source.field.getType(),
- ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
- ArgumentDefinition.getShortName(argumentAnnotation),
- ArgumentDefinition.getDoc(argumentAnnotation),
- source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
- source.isFlag(),
- source.isMultiValued(),
- source.isHidden(),
- getCollectionComponentType(source.field),
- ArgumentDefinition.getExclusiveOf(argumentAnnotation),
- ArgumentDefinition.getValidationRegex(argumentAnnotation),
- getValidOptions(source) );
+ source.field.getType(),
+ ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
+ ArgumentDefinition.getShortName(argumentAnnotation),
+ ArgumentDefinition.getDoc(argumentAnnotation),
+ source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
+ source.isFlag(),
+ source.isMultiValued(),
+ source.isHidden(),
+ makeRawTypeIfNecessary(getCollectionComponentType(source.field)),
+ ArgumentDefinition.getExclusiveOf(argumentAnnotation),
+ ArgumentDefinition.getValidationRegex(argumentAnnotation),
+ getValidOptions(source) );
}
/**
@@ -151,7 +166,7 @@ public abstract class ArgumentTypeDescriptor {
* @return The parameterized component type, or String.class if the parameterized type could not be found.
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
*/
- protected Class getCollectionComponentType( Field field ) {
+ protected Type getCollectionComponentType( Field field ) {
return null;
}
@@ -162,7 +177,7 @@ public abstract class ArgumentTypeDescriptor {
* @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection.
* @return The individual parsed object matching the argument match with Class type.
*/
- public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches );
+ public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches );
/**
* If the argument source only accepts a small set of options, populate the returned list with
@@ -273,6 +288,123 @@ public abstract class ArgumentTypeDescriptor {
public static boolean isArgumentHidden(Field field) {
return field.isAnnotationPresent(Hidden.class);
}
+
+ public Class makeRawTypeIfNecessary(Type t) {
+ if ( t == null )
+ return null;
+ else if ( t instanceof ParameterizedType )
+ return (Class)((ParameterizedType) t).getRawType();
+ else if ( t instanceof Class ) {
+ return (Class)t;
+ } else {
+ throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t);
+ }
+ }
+}
+
+/**
+ * Parser for RodBinding objects
+ */
+class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
+ /**
+ * We only want RodBinding class objects
+ * @param type The type to check.
+ * @return true if the provided class is a RodBinding.class
+ */
+ @Override
+ public boolean supports( Class type ) {
+ return isRodBinding(type);
+ }
+
+ public static boolean isRodBinding( Class type ) {
+ return RodBinding.class.isAssignableFrom(type);
+ }
+
+ @Override
+ public boolean createsTypeDefault(ArgumentSource source) { return ! source.isRequired(); }
+
+ @Override
+ public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) {
+ Class parameterType = JVMUtils.getParameterizedTypeClass(type);
+ return RodBinding.makeUnbound((Class extends Feature>)parameterType);
+ }
+
+ @Override
+ public String typeDefaultDocString(ArgumentSource source) {
+ return "none";
+ }
+
+ @Override
+ public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
+ ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
+ String value = getArgumentValue( defaultDefinition, matches );
+ Class extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
+
+ try {
+ String name = defaultDefinition.fullName;
+ String tribbleType = null;
+ Tags tags = getArgumentTags(matches);
+ // must have one or two tag values here
+ if ( tags.getPositionalTags().size() > 2 ) {
+ throw new UserException.CommandLineException(
+ String.format("Unexpected number of positional tags for argument %s : %s. " +
+ "Rod bindings only suport -X:type and -X:name,type argument styles",
+ value, source.field.getName()));
+ } if ( tags.getPositionalTags().size() == 2 ) {
+ // -X:name,type style
+ name = tags.getPositionalTags().get(0);
+ tribbleType = tags.getPositionalTags().get(1);
+ } else {
+ // case with 0 or 1 positional tags
+ FeatureManager manager = new FeatureManager();
+
+ // -X:type style is a type when we cannot determine the type dynamically
+ String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
+ if ( tag1 != null ) {
+ if ( manager.getByName(tag1) != null ) // this a type
+ tribbleType = tag1;
+ else
+ name = tag1;
+ }
+
+ if ( tribbleType == null ) {
+ // try to determine the file type dynamically
+ File file = new File(value);
+ if ( file.canRead() && file.isFile() ) {
+ FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
+ if ( featureDescriptor != null ) {
+ tribbleType = featureDescriptor.getName();
+ logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
+ }
+ }
+
+ if ( tribbleType == null )
+ if ( ! file.canRead() | ! file.isFile() ) {
+ throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
+ } else {
+ throw new UserException.CommandLineException(
+ String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
+ "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
+ manager.userFriendlyListOfAvailableFeatures(parameterType)));
+ }
+ }
+ }
+
+ Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
+ RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
+ parsingEngine.addTags(result,tags);
+ parsingEngine.addRodBinding(result);
+ return result;
+ } catch (InvocationTargetException e) {
+ throw new UserException.CommandLineException(
+ String.format("Failed to parse value %s for argument %s.",
+ value, source.field.getName()));
+ } catch (Exception e) {
+ throw new UserException.CommandLineException(
+ String.format("Failed to parse value %s for argument %s. Message: %s",
+ value, source.field.getName(), e.getMessage()));
+ }
+ }
}
/**
@@ -282,9 +414,10 @@ public abstract class ArgumentTypeDescriptor {
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public boolean supports( Class type ) {
- if( type.isPrimitive() ) return true;
- if( type.isEnum() ) return true;
- if( primitiveToWrapperMap.containsValue(type) ) return true;
+ if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
+ if ( type.isPrimitive() ) return true;
+ if ( type.isEnum() ) return true;
+ if ( primitiveToWrapperMap.containsValue(type) ) return true;
try {
type.getConstructor(String.class);
@@ -298,7 +431,8 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
@Override
- public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
+ public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type fulltype, ArgumentMatches matches) {
+ Class type = makeRawTypeIfNecessary(fulltype);
if (source.isFlag())
return true;
@@ -339,7 +473,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
throw e;
} catch (InvocationTargetException e) {
throw new UserException.CommandLineException(String.format("Failed to parse value %s for argument %s. This is most commonly caused by providing an incorrect data type (e.g. a double when an int is required)",
- value, source.field.getName()));
+ value, source.field.getName()));
} catch (Exception e) {
throw new DynamicClassResolutionException(String.class, e);
}
@@ -351,7 +485,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
return result;
}
-
+
/**
* A mapping of the primitive types to their associated wrapper classes. Is there really no way to infer
@@ -382,10 +516,10 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
@SuppressWarnings("unchecked")
- public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) {
- Class componentType;
+ public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Type fulltype, ArgumentMatches matches) {
+ Class type = makeRawTypeIfNecessary(fulltype);
+ Type componentType;
Object result;
- Tags tags;
if( Collection.class.isAssignableFrom(type) ) {
@@ -399,7 +533,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
componentType = getCollectionComponentType( source.field );
- ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType);
+ ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
Collection collection;
try {
@@ -428,7 +562,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
else if( type.isArray() ) {
componentType = type.getComponentType();
- ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType);
+ ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
// Assemble a collection of individual values used in this computation.
Collection values = new ArrayList();
@@ -436,7 +570,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
for( ArgumentMatch value: match )
values.add(value);
- result = Array.newInstance(componentType,values.size());
+ result = Array.newInstance(makeRawTypeIfNecessary(componentType),values.size());
int i = 0;
for( ArgumentMatch value: values ) {
@@ -459,16 +593,16 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
*/
@Override
- protected Class getCollectionComponentType( Field field ) {
- // If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
- if( field.getGenericType() instanceof ParameterizedType) {
- ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
- if( parameterizedType.getActualTypeArguments().length > 1 )
- throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
- return (Class)parameterizedType.getActualTypeArguments()[0];
- }
- else
- return String.class;
+ protected Type getCollectionComponentType( Field field ) {
+ // If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
+ if( field.getGenericType() instanceof ParameterizedType) {
+ ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
+ if( parameterizedType.getActualTypeArguments().length > 1 )
+ throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
+ return parameterizedType.getActualTypeArguments()[0];
+ }
+ else
+ return String.class;
}
}
@@ -510,12 +644,12 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
@Override
- public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class> type) {
+ public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(multiplexer == null || multiplexedIds == null)
throw new ReviewedStingException("No multiplexed ids available");
Map multiplexedMapping = new HashMap();
- Class componentType = getCollectionComponentType(source.field);
+ Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field));
ArgumentTypeDescriptor componentTypeDescriptor = parsingEngine.selectBestTypeDescriptor(componentType);
for(Object id: multiplexedIds) {
@@ -527,15 +661,19 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
return multiplexedMapping;
}
+ @Override
+ public String typeDefaultDocString(ArgumentSource source) {
+ return "None";
+ }
@Override
- public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
+ public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
if(multiplexedIds == null)
throw new ReviewedStingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first.");
Map multiplexedMapping = new HashMap();
- Class componentType = getCollectionComponentType(source.field);
+ Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field));
for(Object id: multiplexedIds) {
@@ -606,7 +744,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
*/
@Override
- protected Class getCollectionComponentType( Field field ) {
+ protected Type getCollectionComponentType( Field field ) {
// Multiplex arguments must resolve to maps from which the clp should extract the second type.
if( field.getGenericType() instanceof ParameterizedType) {
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
index aba4fc109..d88e7030e 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
@@ -43,7 +43,7 @@ import java.util.Locale;
public abstract class CommandLineProgram {
/** The command-line program and the arguments it returned. */
- protected ParsingEngine parser = null;
+ public ParsingEngine parser = null;
/** the default log level */
@Argument(fullName = "logging_level",
@@ -144,6 +144,11 @@ public abstract class CommandLineProgram {
public static int result = -1;
+ @SuppressWarnings("unchecked")
+ public static void start(CommandLineProgram clp, String[] args) throws Exception {
+ start(clp, args, false);
+ }
+
/**
* This function is called to start processing the command line, and kick
* off the execute message of the program.
@@ -153,7 +158,7 @@ public abstract class CommandLineProgram {
* @throws Exception when an exception occurs
*/
@SuppressWarnings("unchecked")
- public static void start(CommandLineProgram clp, String[] args) throws Exception {
+ public static void start(CommandLineProgram clp, String[] args, boolean dryRun) throws Exception {
try {
// setup our log layout
@@ -180,8 +185,9 @@ public abstract class CommandLineProgram {
// - InvalidArgument in case these arguments are specified by plugins.
// - MissingRequiredArgument in case the user requested help. Handle that later, once we've
// determined the full complement of arguments.
- parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument,
- ParsingEngine.ValidationType.InvalidArgument));
+ if ( ! dryRun )
+ parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument,
+ ParsingEngine.ValidationType.InvalidArgument));
parser.loadArgumentsIntoObject(clp);
// Initialize the logger using the loaded command line.
@@ -195,36 +201,40 @@ public abstract class CommandLineProgram {
if (isHelpPresent(parser))
printHelpAndExit(clp, parser);
- parser.validate();
+ if ( ! dryRun ) parser.validate();
} else {
parser.parse(args);
- if (isHelpPresent(parser))
- printHelpAndExit(clp, parser);
+ if ( ! dryRun ) {
+ if (isHelpPresent(parser))
+ printHelpAndExit(clp, parser);
- parser.validate();
+ parser.validate();
+ }
parser.loadArgumentsIntoObject(clp);
// Initialize the logger using the loaded command line.
clp.setupLoggerLevel(layout);
}
- // if they specify a log location, output our data there
- if (clp.toFile != null) {
- FileAppender appender;
- try {
- appender = new FileAppender(layout, clp.toFile, false);
- logger.addAppender(appender);
- } catch (IOException e) {
- throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists");
+ if ( ! dryRun ) {
+ // if they specify a log location, output our data there
+ if (clp.toFile != null) {
+ FileAppender appender;
+ try {
+ appender = new FileAppender(layout, clp.toFile, false);
+ logger.addAppender(appender);
+ } catch (IOException e) {
+ throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists");
+ }
}
+
+ // regardless of what happens next, generate the header information
+ HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
+
+ // call the execute
+ CommandLineProgram.result = clp.execute();
}
-
- // regardless of what happens next, generate the header information
- HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
-
- // call the execute
- CommandLineProgram.result = clp.execute();
}
catch (ArgumentException e) {
clp.parser.printHelp(clp.getApplicationDetails());
diff --git a/public/java/src/org/broadinstitute/sting/commandline/Output.java b/public/java/src/org/broadinstitute/sting/commandline/Output.java
index 22565dbf5..f8aef0355 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/Output.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/Output.java
@@ -55,7 +55,7 @@ public @interface Output {
* --help argument is specified.
* @return Doc string associated with this command-line argument.
*/
- String doc() default "An output file presented to the walker. Will overwrite contents if file exists.";
+ String doc() default "An output file created by the walker. Will overwrite contents if file exists";
/**
* Is this argument required. If true, the command-line argument system will
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
index 8423bb2f2..fbf8c6516 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
@@ -25,6 +25,7 @@
package org.broadinstitute.sting.commandline;
+import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
@@ -41,11 +42,16 @@ import java.util.*;
* A parser for Sting command-line arguments.
*/
public class ParsingEngine {
+ /**
+ * The loaded argument sources along with their back definitions.
+ */
+ private Map argumentSourcesByDefinition = new HashMap();
+
/**
* A list of defined arguments against which command lines are matched.
* Package protected for testing access.
*/
- ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions();
+ public ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions();
/**
* A list of matches from defined arguments to command-line text.
@@ -59,11 +65,17 @@ public class ParsingEngine {
*/
private List parsingMethods = new ArrayList();
+ /**
+ * All of the RodBinding objects we've seen while parsing
+ */
+ private List rodBindings = new ArrayList();
+
/**
* Class reference to the different types of descriptors that the create method can create.
* The type of set used must be ordered (but not necessarily sorted).
*/
private static final Set STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet( Arrays.asList(new SimpleArgumentTypeDescriptor(),
+ new RodBindingArgumentTypeDescriptor(),
new CompoundArgumentTypeDescriptor(),
new MultiplexArgumentTypeDescriptor()) );
@@ -80,6 +92,7 @@ public class ParsingEngine {
protected static Logger logger = Logger.getLogger(ParsingEngine.class);
public ParsingEngine( CommandLineProgram clp ) {
+ RodBinding.resetNameCounter();
parsingMethods.add( ParsingMethod.FullNameParsingMethod );
parsingMethods.add( ParsingMethod.ShortNameParsingMethod );
@@ -107,8 +120,13 @@ public class ParsingEngine {
*/
public void addArgumentSource( String sourceName, Class sourceClass ) {
List argumentsFromSource = new ArrayList();
- for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) )
- argumentsFromSource.addAll( argumentSource.createArgumentDefinitions() );
+ for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) {
+ List argumentDefinitions = argumentSource.createArgumentDefinitions();
+ for(ArgumentDefinition argumentDefinition: argumentDefinitions) {
+ argumentSourcesByDefinition.put(argumentDefinition,argumentSource);
+ argumentsFromSource.add( argumentDefinition );
+ }
+ }
argumentDefinitions.add( new ArgumentDefinitionGroup(sourceName, argumentsFromSource) );
}
@@ -199,16 +217,25 @@ public class ParsingEngine {
throw new InvalidArgumentException( invalidArguments );
}
- // Find invalid argument values (arguments that fail the regexp test.
+ // Find invalid argument values -- invalid arguments are either completely missing or fail the specified 'validation' regular expression.
if( !skipValidationOf.contains(ValidationType.InvalidArgumentValue) ) {
Collection verifiableArguments =
argumentDefinitions.findArgumentDefinitions( null, ArgumentDefinitions.VerifiableDefinitionMatcher );
Collection> invalidValues = new ArrayList>();
for( ArgumentDefinition verifiableArgument: verifiableArguments ) {
ArgumentMatches verifiableMatches = argumentMatches.findMatches( verifiableArgument );
+ // Check to see whether an argument value was specified. Argument values must be provided
+ // when the argument name is specified and the argument is not a flag type.
+ for(ArgumentMatch verifiableMatch: verifiableMatches) {
+ ArgumentSource argumentSource = argumentSourcesByDefinition.get(verifiableArgument);
+ if(verifiableMatch.values().size() == 0 && !verifiableArgument.isFlag && argumentSource.createsTypeDefault())
+ invalidValues.add(new Pair(verifiableArgument,null));
+ }
+
+ // Ensure that the field contents meet the validation criteria specified by the regular expression.
for( ArgumentMatch verifiableMatch: verifiableMatches ) {
for( String value: verifiableMatch.values() ) {
- if( !value.matches(verifiableArgument.validation) )
+ if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) )
invalidValues.add( new Pair(verifiableArgument, value) );
}
}
@@ -304,7 +331,17 @@ public class ParsingEngine {
if(!tags.containsKey(key))
return new Tags();
return tags.get(key);
- }
+ }
+
+ /**
+ * Add a RodBinding type argument to this parser. Called during parsing to allow
+ * us to track all of the RodBindings discovered in the command line.
+ * @param rodBinding the rodbinding to add. Must not be added twice
+ */
+ @Requires("rodBinding != null")
+ public void addRodBinding(final RodBinding rodBinding) {
+ rodBindings.add(rodBinding);
+ }
/**
* Notify the user that a deprecated command-line argument has been used.
@@ -327,7 +364,7 @@ public class ParsingEngine {
*/
private void loadValueIntoObject( ArgumentSource source, Object instance, ArgumentMatches argumentMatches ) {
// Nothing to load
- if( argumentMatches.size() == 0 && !(source.createsTypeDefault() && source.isRequired()))
+ if( argumentMatches.size() == 0 && ! source.createsTypeDefault() )
return;
// Target instance into which to inject the value.
@@ -344,6 +381,10 @@ public class ParsingEngine {
}
}
+ public Collection getRodBindings() {
+ return Collections.unmodifiableCollection(rodBindings);
+ }
+
/**
* Gets a collection of the container instances of the given type stored within the given target.
* @param source Argument source.
@@ -390,7 +431,6 @@ public class ParsingEngine {
return ArgumentTypeDescriptor.selectBest(argumentTypeDescriptors,type);
}
-
private List extractArgumentSources(Class sourceClass, Field[] parentFields) {
// now simply call into the truly general routine extract argument bindings but with a null
// object so bindings aren't computed
@@ -515,10 +555,14 @@ class InvalidArgumentValueException extends ArgumentException {
private static String formatArguments( Collection> invalidArgumentValues ) {
StringBuilder sb = new StringBuilder();
for( Pair invalidValue: invalidArgumentValues ) {
- sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
- invalidValue.first.fullName,
- invalidValue.second,
- invalidValue.first.validation) );
+ if(invalidValue.getSecond() == null)
+ sb.append( String.format("%nArgument '--%s' requires a value but none was provided",
+ invalidValue.first.fullName) );
+ else
+ sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
+ invalidValue.first.fullName,
+ invalidValue.second,
+ invalidValue.first.validation) );
}
return sb.toString();
}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java
new file mode 100644
index 000000000..e0b1154c4
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.commandline;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import org.broad.tribble.Feature;
+
+import java.util.*;
+
+/**
+ * A RodBinding representing a walker argument that gets bound to a ROD track.
+ *
+ * The RodBinding is a formal GATK argument that bridges between a walker and
+ * the RefMetaDataTracker to obtain data about this rod track at runtime. The RodBinding
+ * is explicitly typed with type of the Tribble.Feature expected to be produced by this
+ * argument. The GATK Engine takes care of initializing the binding and connecting it
+ * to the RMD system.
+ *
+ * It is recommended that optional RodBindings be initialized to the value returned
+ * by the static method makeUnbound().
+ *
+ * Note that this class is immutable.
+ */
+public final class RodBinding {
+ protected final static String UNBOUND_VARIABLE_NAME = "";
+ protected final static String UNBOUND_SOURCE = "UNBOUND";
+ protected final static String UNBOUND_TRIBBLE_TYPE = "";
+
+ /**
+ * Create an unbound Rodbinding of type. This is the correct programming
+ * style for an optional RodBinding
+ *
+ * At Input()
+ * RodBinding x = RodBinding.makeUnbound(T.class)
+ *
+ * The unbound binding is guaranteed to never match any binding. It uniquely
+ * returns false to isBound().
+ *
+ * @param type the Class type produced by this unbound object
+ * @param any class extending Tribble Feature
+ * @return the UNBOUND RodBinding producing objects of type T
+ */
+ @Requires("type != null")
+ protected final static RodBinding makeUnbound(Class type) {
+ return new RodBinding(type);
+ }
+
+ /** The name of this binding. Often the name of the field itself, but can be overridden on cmdline */
+ final private String name;
+ /** where the data for this ROD is coming from. A file or special value if coming from stdin */
+ final private String source;
+ /** the string name of the tribble type, such as vcf, bed, etc. */
+ final private String tribbleType;
+ /** The command line tags associated with this RodBinding */
+ final private Tags tags;
+ /** The Java class expected for this RodBinding. Must correspond to the type emited by Tribble */
+ final private Class type;
+ /** True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments */
+ final private boolean bound;
+
+ /**
+ * The name counter. This is how we create unique names for collections of RodBindings
+ * on the command line. If you have provide the GATK with -X file1 and -X file2 to a
+ * RodBinding argument as List> then each binding will receive automatically
+ * the name of X and X2.
+ */
+ final private static Map nameCounter = new HashMap();
+
+ /** for UnitTests */
+ final public static void resetNameCounter() {
+ nameCounter.clear();
+ }
+
+ @Requires("rawName != null")
+ @Ensures("result != null")
+ final private static synchronized String countedVariableName(final String rawName) {
+ Integer count = nameCounter.get(rawName);
+ if ( count == null ) {
+ nameCounter.put(rawName, 1);
+ return rawName;
+ } else {
+ nameCounter.put(rawName, count + 1);
+ return rawName + (count + 1);
+ }
+ }
+
+ @Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
+ public RodBinding(Class type, final String rawName, final String source, final String tribbleType, final Tags tags) {
+ this.type = type;
+ this.name = countedVariableName(rawName);
+ this.source = source;
+ this.tribbleType = tribbleType;
+ this.tags = tags;
+ this.bound = true;
+ }
+
+ /**
+ * Make an unbound RodBinding. Only available for creating the globally unique UNBOUND object
+ * @param type class this unbound RodBinding creates
+ */
+ @Requires({"type != null"})
+ private RodBinding(Class type) {
+ this.type = type;
+ this.name = UNBOUND_VARIABLE_NAME; // special value can never be found in RefMetaDataTracker
+ this.source = UNBOUND_SOURCE;
+ this.tribbleType = UNBOUND_TRIBBLE_TYPE;
+ this.tags = new Tags();
+ this.bound = false;
+ }
+
+
+ /**
+ * @return True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments
+ */
+ final public boolean isBound() {
+ return bound;
+ }
+
+ /**
+ * @return The name of this binding. Often the name of the field itself, but can be overridden on cmdline
+ */
+ @Ensures({"result != null"})
+ final public String getName() {
+ return name;
+ }
+
+ /**
+ * @return the string name of the tribble type, such as vcf, bed, etc.
+ */
+ @Ensures({"result != null"})
+ final public Class getType() {
+ return type;
+ }
+
+ /**
+ * @return where the data for this ROD is coming from. A file or special value if coming from stdin
+ */
+ @Ensures({"result != null"})
+ final public String getSource() {
+ return source;
+ }
+
+ /**
+ * @return The command line tags associated with this RodBinding. Will include the tags used to
+ * determine the name and type of this RodBinding
+ */
+ @Ensures({"result != null"})
+ final public Tags getTags() {
+ return tags;
+ }
+
+ /**
+ * @return The Java class expected for this RodBinding. Must correspond to the type emited by Tribble
+ */
+ @Ensures({"result != null"})
+ final public String getTribbleType() {
+ return tribbleType;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("(RodBinding name=%s source=%s)", getName(), getSource());
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
index a080ab439..32002e093 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
@@ -25,21 +25,20 @@
package org.broadinstitute.sting.gatk;
-import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
-import org.broadinstitute.sting.commandline.CommandLineProgram;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
-import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
+import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.text.ListFileUtils;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
+import java.util.*;
/**
* @author aaron
@@ -64,6 +63,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
*/
private final Collection argumentSources = new ArrayList();
+ protected static Logger logger = Logger.getLogger(CommandLineExecutable.class);
+
/**
* this is the function that the inheriting class can expect to have called
* when the command line system has initialized.
@@ -81,7 +82,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
engine.setSAMFileIDs(ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles,parser));
- engine.setReferenceMetaDataFiles(ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings,getArgumentCollection().DBSNPFile,parser));
engine.setWalker(walker);
walker.setToolkit(engine);
@@ -96,6 +96,24 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
loadArgumentsIntoObject(walker);
argumentSources.add(walker);
+ Collection rodBindings = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser);
+
+ // todo: remove me when the old style system is removed
+ if ( getArgumentCollection().RODBindings.size() > 0 ) {
+ logger.warn("################################################################################");
+ logger.warn("################################################################################");
+ logger.warn("Deprecated -B rod binding syntax detected. This syntax has been eliminated in GATK 1.2.");
+ logger.warn("Please use arguments defined by each specific walker instead.");
+ for ( String oldStyleRodBinding : getArgumentCollection().RODBindings ) {
+ logger.warn(" -B rod binding with value " + oldStyleRodBinding + " tags: " + parser.getTags(oldStyleRodBinding).getPositionalTags());
+ }
+ logger.warn("################################################################################");
+ logger.warn("################################################################################");
+ System.exit(1);
+ }
+
+ engine.setReferenceMetaDataFiles(rodBindings);
+
for (ReadFilter filter: filters) {
loadArgumentsIntoObject(filter);
argumentSources.add(filter);
@@ -112,6 +130,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
return 0;
}
+
/**
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.
* This report will be written to either STDOUT or to the run repository, depending on the options
@@ -142,7 +161,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
*/
protected Collection getArgumentTypeDescriptors() {
return Arrays.asList( new VCFWriterArgumentTypeDescriptor(engine,System.out,argumentSources),
- new SAMFileReaderArgumentTypeDescriptor(engine),
new SAMFileWriterArgumentTypeDescriptor(engine,System.out),
new OutputStreamArgumentTypeDescriptor(engine,System.out) );
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
index da2be74bf..b8488dc9a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@@ -30,25 +30,27 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
+import org.broadinstitute.sting.gatk.filters.ReadFilter;
+import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.walkers.Attribution;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.help.ApplicationDetails;
+import org.broadinstitute.sting.utils.help.*;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.util.*;
/**
- * @author aaron
- * @version 1.0
- * @date May 8, 2009
- *
- * Class CommandLineGATK
- *
+ * The GATK engine itself. Manages map/reduce data access and runs walkers.
+ *
* We run command line GATK programs using this class. It gets the command line args, parses them, and hands the
* gatk all the parsed out information. Pretty much anything dealing with the underlying system should go here,
* the gatk engine should deal with any data related information.
*/
+@DocumentedGATKFeature(
+ groupName = "GATK Engine",
+ summary = "Features and arguments for the GATK engine itself, available to all walkers.",
+ extraDocs = { UserException.class })
public class CommandLineGATK extends CommandLineExecutable {
@Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
private String analysisName = null;
@@ -173,12 +175,12 @@ public class CommandLineGATK extends CommandLineExecutable {
StringBuilder additionalHelp = new StringBuilder();
Formatter formatter = new Formatter(additionalHelp);
- formatter.format("Description:%n");
+ formatter.format("Available Reference Ordered Data types:%n");
+ formatter.format(new FeatureManager().userFriendlyListOfAvailableFeatures());
+ formatter.format("%n");
- WalkerManager walkerManager = engine.getWalkerManager();
- String walkerHelpText = walkerManager.getWalkerDescriptionText(walkerType);
-
- printDescriptorLine(formatter,WALKER_INDENT,"",WALKER_INDENT,FIELD_SEPARATOR,walkerHelpText,TextFormattingUtils.DEFAULT_LINE_WIDTH);
+ formatter.format("For a full description of this walker, see its GATKdocs at:%n");
+ formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType));
return additionalHelp.toString();
}
@@ -192,8 +194,6 @@ public class CommandLineGATK extends CommandLineExecutable {
StringBuilder additionalHelp = new StringBuilder();
Formatter formatter = new Formatter(additionalHelp);
- formatter.format("Available analyses:%n");
-
// Get the list of walker names from the walker manager.
WalkerManager walkerManager = engine.getWalkerManager();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index 918bc1251..5b9ebd99b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
-import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.*;
@@ -370,33 +370,6 @@ public class GenomeAnalysisEngine {
throw new ArgumentException("Walker does not allow a reference but one was provided.");
}
- /**
- * Verifies that all required reference-ordered data has been supplied, and any reference-ordered data that was not
- * 'allowed' is still present.
- *
- * @param rods Reference-ordered data to load.
- */
- protected void validateSuppliedReferenceOrderedData(List rods) {
- // Check to make sure that all required metadata is present.
- List allRequired = WalkerManager.getRequiredMetaData(walker);
- for (RMD required : allRequired) {
- boolean found = false;
- for (ReferenceOrderedDataSource rod : rods) {
- if (rod.matchesNameAndRecordType(required.name(), required.type()))
- found = true;
- }
- if (!found)
- throw new ArgumentException(String.format("Walker requires reference metadata to be supplied named '%s' of type '%s', but this metadata was not provided. " +
- "Please supply the specified metadata file.", required.name(), required.type().getSimpleName()));
- }
-
- // Check to see that no forbidden rods are present.
- for (ReferenceOrderedDataSource rod : rods) {
- if (!WalkerManager.isAllowed(walker, rod))
- throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s", walker.getClass(), rod.getName()));
- }
- }
-
protected void validateSuppliedIntervals() {
// Only read walkers support '-L unmapped' intervals. Trap and validate any other instances of -L unmapped.
if(!(walker instanceof ReadWalker)) {
@@ -716,8 +689,6 @@ public class GenomeAnalysisEngine {
validateSuppliedReads();
readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference());
- sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
-
for (ReadFilter filter : filters)
filter.initialize(this);
@@ -926,9 +897,6 @@ public class GenomeAnalysisEngine {
GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType) {
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType);
- // try and make the tracks given their requests
- // create of live instances of the tracks
- List tracks = new ArrayList();
List dataSources = new ArrayList();
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
@@ -939,7 +907,6 @@ public class GenomeAnalysisEngine {
flashbackData()));
// validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match.
- validateSuppliedReferenceOrderedData(dataSources);
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder);
return dataSources;
@@ -994,7 +961,7 @@ public class GenomeAnalysisEngine {
/**
* Get the list of intervals passed to the engine.
- * @return List of intervals.
+ * @return List of intervals, or null if no intervals are in use
*/
public GenomeLocSortedSet getIntervals() {
return this.intervals;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
index cf190835e..f053c299c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
@@ -33,9 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.help.DescriptionTaglet;
-import org.broadinstitute.sting.utils.help.DisplayNameTaglet;
-import org.broadinstitute.sting.utils.help.SummaryTaglet;
+import org.broadinstitute.sting.utils.help.ResourceBundleExtractorDoclet;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.util.*;
@@ -82,19 +80,10 @@ public class WalkerManager extends PluginManager {
* @return A suitable display name for the package.
*/
public String getPackageDisplayName(String packageName) {
- // Try to find an override for the display name of this package.
- String displayNameKey = String.format("%s.%s",packageName,DisplayNameTaglet.NAME);
- String displayName;
- if(helpText.containsKey(displayNameKey)) {
- displayName = helpText.getString(displayNameKey);
- }
- else {
- // If no override exists...
- // ...try to compute the override from the text of the package name, while accounting for
- // unpackaged walkers.
- displayName = packageName.substring(packageName.lastIndexOf('.')+1);
- if(displayName.trim().equals("")) displayName = "";
- }
+ // ...try to compute the override from the text of the package name, while accounting for
+ // unpackaged walkers.
+ String displayName = packageName.substring(packageName.lastIndexOf('.')+1);
+ if (displayName.trim().equals("")) displayName = "";
return displayName;
}
@@ -104,7 +93,7 @@ public class WalkerManager extends PluginManager {
* @return Package help text, or "" if none exists.
*/
public String getPackageSummaryText(String packageName) {
- String key = String.format("%s.%s",packageName,SummaryTaglet.NAME);
+ String key = String.format("%s.%s",packageName, ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME);
if(!helpText.containsKey(key))
return "";
return helpText.getString(key);
@@ -116,7 +105,7 @@ public class WalkerManager extends PluginManager {
* @return Walker summary description, or "" if none exists.
*/
public String getWalkerSummaryText(Class extends Walker> walkerType) {
- String walkerSummary = String.format("%s.%s",walkerType.getName(), SummaryTaglet.NAME);
+ String walkerSummary = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME);
if(!helpText.containsKey(walkerSummary))
return "";
return helpText.getString(walkerSummary);
@@ -137,7 +126,7 @@ public class WalkerManager extends PluginManager {
* @return Walker full description, or "" if none exists.
*/
public String getWalkerDescriptionText(Class extends Walker> walkerType) {
- String walkerDescription = String.format("%s.%s",walkerType.getName(), DescriptionTaglet.NAME);
+ String walkerDescription = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.DESCRIPTION_TAGLET_NAME);
if(!helpText.containsKey(walkerDescription))
return "";
return helpText.getString(walkerDescription);
@@ -188,19 +177,7 @@ public class WalkerManager extends PluginManager {
* @return The list of allowed reference meta data.
*/
public static List getAllowsMetaData(Class extends Walker> walkerClass) {
- Allows allowsDataSource = getWalkerAllowed(walkerClass);
- if (allowsDataSource == null)
- return Collections.emptyList();
- return Arrays.asList(allowsDataSource.referenceMetaData());
- }
-
- /**
- * Get a list of RODs allowed by the walker.
- * @param walker Walker to query.
- * @return The list of allowed reference meta data.
- */
- public static List getAllowsMetaData(Walker walker) {
- return getAllowsMetaData(walker.getClass());
+ return Collections.emptyList();
}
/**
@@ -237,24 +214,7 @@ public class WalkerManager extends PluginManager {
* @return True if the walker forbids this data type. False otherwise.
*/
public static boolean isAllowed(Class extends Walker> walkerClass, ReferenceOrderedDataSource rod) {
- Allows allowsDataSource = getWalkerAllowed(walkerClass);
-
- // Allows is less restrictive than requires. If an allows
- // clause is not specified, any kind of data is allowed.
- if( allowsDataSource == null )
- return true;
-
- // The difference between unspecified RMD and the empty set of metadata can't be detected.
- // Treat an empty 'allows' as 'allow everything'. Maybe we can have a special RMD flag to account for this
- // case in the future.
- if( allowsDataSource.referenceMetaData().length == 0 )
- return true;
-
- for( RMD allowed: allowsDataSource.referenceMetaData() ) {
- if( rod.matchesNameAndRecordType(allowed.name(),allowed.type()) )
- return true;
- }
- return false;
+ return true;
}
/**
@@ -294,8 +254,7 @@ public class WalkerManager extends PluginManager {
* @return The list of required reference meta data.
*/
public static List getRequiredMetaData(Class extends Walker> walkerClass) {
- Requires requiresDataSource = getWalkerRequirements(walkerClass);
- return Arrays.asList(requiresDataSource.referenceMetaData());
+ return Collections.emptyList();
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java
similarity index 68%
rename from public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java
rename to public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java
index 8dace8fe4..2f4dd06e2 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java
@@ -23,8 +23,26 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-package org.broadinstitute.sting.gatk.datasources.reference;
+package org.broadinstitute.sting.gatk.arguments;
+
+
+import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.simpleframework.xml.*;
+
+/**
+ * @author ebanks
+ * @version 1.0
+ */
+@Root
+public class DbsnpArgumentCollection {
+
+ /**
+ * A dbSNP VCF file.
+ */
+ @Input(fullName="dbsnp", shortName = "D", doc="dbSNP file", required=false)
+ public RodBinding dbsnp;
-public interface ReferenceDataSourceProgressListener {
- public void percentProgress(int percent);
}
+
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index ee2e85025..fd39d46b0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -101,6 +101,8 @@ public class GATKArgumentCollection {
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
public File referenceFile = null;
+ @Deprecated
+ @Hidden
@ElementList(required = false)
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :, ", required = false)
public ArrayList RODBindings = new ArrayList();
@@ -117,11 +119,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
public boolean nonDeterministicRandomSeed = false;
-
- @Element(required = false)
- @Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
- public String DBSNPFile = null;
-
/**
* The override mechanism in the GATK, by default, populates the command-line arguments, then
* the defaults from the walker annotations. Unfortunately, walker annotations should be trumped
@@ -345,14 +342,6 @@ public class GATKArgumentCollection {
return false;
}
}
- if (other.RODBindings.size() != RODBindings.size()) {
- return false;
- }
- for (int x = 0; x < RODBindings.size(); x++) {
- if (!RODBindings.get(x).equals(other.RODBindings.get(x))) {
- return false;
- }
- }
if (!other.samFiles.equals(this.samFiles)) {
return false;
}
@@ -380,9 +369,6 @@ public class GATKArgumentCollection {
if (!other.excludeIntervals.equals(this.excludeIntervals)) {
return false;
}
- if (!other.DBSNPFile.equals(this.DBSNPFile)) {
- return false;
- }
if (!other.unsafe.equals(this.unsafe)) {
return false;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java
new file mode 100644
index 000000000..654770fe7
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.arguments;
+
+
+import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.simpleframework.xml.Root;
+
+/**
+ * @author ebanks
+ * @version 1.0
+ */
+@Root
+public class StandardVariantContextInputArgumentCollection {
+
+ /**
+ * Variants from this VCF file are used by this tool as input.
+ * The file must at least contain the standard VCF header lines, but
+ * can be empty (i.e., no variants are contained in the file).
+ */
+ @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
+ public RodBinding variants;
+
+}
+
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java
index 223659a46..d065635c8 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java
@@ -1,8 +1,10 @@
package org.broadinstitute.sting.gatk.datasources.providers;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
+import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.ArrayList;
@@ -49,11 +51,14 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
* @param loc Locus at which to track.
* @return A tracker containing information about this locus.
*/
- public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
- RefMetaDataTracker tracks = new RefMetaDataTracker(states.size());
+ public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
+ List bindings = states.isEmpty() ? Collections.emptyList() : new ArrayList(states.size());
+
for ( ReferenceOrderedDataState state: states )
- tracks.bind( state.dataSource.getName(), state.iterator.seekForward(loc) );
- return tracks;
+ // todo -- warning, I removed the reference to the name from states
+ bindings.add( state.iterator.seekForward(loc) );
+
+ return new RefMetaDataTracker(bindings, referenceContext);
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java
index 2d46a85ac..939cbfe35 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java
@@ -1,8 +1,9 @@
package org.broadinstitute.sting.gatk.datasources.providers;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
public interface ReferenceOrderedView extends View {
- RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc );
+ RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext refContext );
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java
index 39c632539..c38b09334 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java
@@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
@@ -45,7 +46,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
*/
private RODMergingIterator rodQueue = null;
- RefMetaDataTracker tracker = null;
+ Collection allTracksHere;
+
GenomeLoc lastLoc = null;
RODRecordList interval = null;
@@ -94,12 +96,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
}
rodQueue = new RODMergingIterator(iterators);
-
- //throw new StingException("RodLocusView currently disabled");
}
- public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
- return tracker;
+ public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
+ // special case the interval again -- add it into the ROD
+ if ( interval != null ) { allTracksHere.add(interval); }
+ return new RefMetaDataTracker(allTracksHere, referenceContext);
}
public boolean hasNext() {
@@ -122,10 +124,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
if ( DEBUG ) System.out.printf("In RodLocusView.next(): creating tracker...%n");
- // Update the tracker here for use
- Collection allTracksHere = getSpanningTracks(datum);
- tracker = createTracker(allTracksHere);
-
+ allTracksHere = getSpanningTracks(datum);
GenomeLoc rodSite = datum.getLocation();
GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart());
@@ -137,19 +136,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases);
}
- private RefMetaDataTracker createTracker( Collection allTracksHere ) {
- RefMetaDataTracker t = new RefMetaDataTracker(allTracksHere.size());
- for ( RODRecordList track : allTracksHere ) {
- if ( ! t.hasROD(track.getName()) )
- t.bind(track.getName(), track);
- }
-
- // special case the interval again -- add it into the ROD
- if ( interval != null ) { t.bind(interval.getName(), interval); }
-
- return t;
- }
-
private Collection getSpanningTracks(RODRecordList marker) {
return rodQueue.allElementsLTE(marker);
}
@@ -197,10 +183,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
return getSkippedBases(getLocOneBeyondShard());
}
- public RefMetaDataTracker getTracker() {
- return tracker;
- }
-
/**
* Closes the current view.
*/
@@ -209,6 +191,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
state.dataSource.close( state.iterator );
rodQueue = null;
- tracker = null;
+ allTracksHere = null;
}
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
index 198f7d7d3..ba6321121 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
@@ -59,8 +59,8 @@ public class LowMemoryIntervalSharder implements Iterator {
*/
public FilePointer next() {
FilePointer current = wrappedIterator.next();
- //while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
- // current = current.combine(parser,wrappedIterator.next());
+ while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
+ current = current.combine(parser,wrappedIterator.next());
return current;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
index 6064806f3..572970349 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@@ -893,6 +893,7 @@ public class SAMDataSource {
* Custom representation of interval bounds.
* Makes it simpler to track current position.
*/
+ private int[] intervalContigIndices;
private int[] intervalStarts;
private int[] intervalEnds;
@@ -917,12 +918,14 @@ public class SAMDataSource {
if(foundMappedIntervals) {
if(keepOnlyUnmappedReads)
throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals. Please apply this filter to only mapped or only unmapped reads");
+ this.intervalContigIndices = new int[intervals.size()];
this.intervalStarts = new int[intervals.size()];
this.intervalEnds = new int[intervals.size()];
int i = 0;
for(GenomeLoc interval: intervals) {
- intervalStarts[i] = (int)interval.getStart();
- intervalEnds[i] = (int)interval.getStop();
+ intervalContigIndices[i] = interval.getContigIndex();
+ intervalStarts[i] = interval.getStart();
+ intervalEnds[i] = interval.getStop();
i++;
}
}
@@ -961,11 +964,10 @@ public class SAMDataSource {
while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) {
if(!keepOnlyUnmappedReads) {
// Mapped read filter; check against GenomeLoc-derived bounds.
- if(candidateRead.getAlignmentEnd() >= intervalStarts[currentBound] ||
- (candidateRead.getReadUnmappedFlag() && candidateRead.getAlignmentStart() >= intervalStarts[currentBound])) {
- // This read ends after the current interval begins (or, if unmapped, starts within the bounds of the interval.
+ if(readEndsOnOrAfterStartingBound(candidateRead)) {
+ // This read ends after the current interval begins.
// Promising, but this read must be checked against the ending bound.
- if(candidateRead.getAlignmentStart() <= intervalEnds[currentBound]) {
+ if(readStartsOnOrBeforeEndingBound(candidateRead)) {
// Yes, this read is within both bounds. This must be our next read.
nextRead = candidateRead;
break;
@@ -993,6 +995,37 @@ public class SAMDataSource {
candidateRead = iterator.next();
}
}
+
+ /**
+ * Check whether the read lies after the start of the current bound. If the read is unmapped but placed, its
+ * end will be distorted, so rely only on the alignment start.
+ * @param read The read to position-check.
+ * @return True if the read starts after the current bounds. False otherwise.
+ */
+ private boolean readEndsOnOrAfterStartingBound(final SAMRecord read) {
+ return
+ // Read ends on a later contig, or...
+ read.getReferenceIndex() > intervalContigIndices[currentBound] ||
+ // Read ends of this contig...
+ (read.getReferenceIndex() == intervalContigIndices[currentBound] &&
+ // either after this location, or...
+ (read.getAlignmentEnd() >= intervalStarts[currentBound] ||
+ // read is unmapped but positioned and alignment start is on or after this start point.
+ (read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound])));
+ }
+
+ /**
+ * Check whether the read lies before the end of the current bound.
+ * @param read The read to position-check.
+ * @return True if the read starts after the current bounds. False otherwise.
+ */
+ private boolean readStartsOnOrBeforeEndingBound(final SAMRecord read) {
+ return
+ // Read starts on a prior contig, or...
+ read.getReferenceIndex() < intervalContigIndices[currentBound] ||
+ // Read starts on this contig and the alignment start is registered before this end point.
+ (read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]);
+ }
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java
index ef69a8e5f..c8c79bb14 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java
@@ -41,7 +41,7 @@ import java.io.File;
* Loads reference data from fasta file
* Looks for fai and dict files, and tries to create them if they don't exist
*/
-public class ReferenceDataSource implements ReferenceDataSourceProgressListener {
+public class ReferenceDataSource {
private IndexedFastaSequenceFile index;
/** our log, which we want to capture anything from this class */
@@ -75,7 +75,7 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener
// get exclusive lock
if (!indexLock.exclusiveLock())
throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile);
- FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, this);
+ FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, true);
FastaSequenceIndex sequenceIndex = faiBuilder.createIndex();
FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile);
}
@@ -194,13 +194,4 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener
public IndexedFastaSequenceFile getReference() {
return this.index;
}
-
- /**
- * Notify user of progress in creating fai file
- * @param percent Percent of fasta file read as a percent
- */
- public void percentProgress(int percent) {
- System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percent));
- }
-
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java
index abd5929eb..9d5a54f58 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java
@@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.rmd;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
-import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java
index 60b68bda5..18679dd77 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java
@@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
-import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLoc;
@@ -110,11 +110,11 @@ public class ReferenceOrderedDataSource {
}
public Class getType() {
- return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
+ return builder.getFeatureManager().getByTriplet(fileDescriptor).getCodecClass();
}
public Class getRecordType() {
- return builder.createCodec(getType(),getName()).getFeatureType();
+ return builder.getFeatureManager().getByTriplet(fileDescriptor).getFeatureClass();
}
public File getFile() {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java
new file mode 100644
index 000000000..4541a0537
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.examples;
+
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.ArgumentCollection;
+import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.RodWalker;
+
+/**
+ * [Short one sentence description of this walker]
+ *
+ *
+ * [Functionality of this walker]
+ *
+ *
+ * Input
+ *
+ * [Input description]
+ *
+ *
+ * Output
+ *
+ * [Output description]
+ *
+ *
+ * Examples
+ *
+ * java
+ * -jar GenomeAnalysisTK.jar
+ * -T $WalkerName
+ *
+ *
+ * @author Your Name
+ * @since Date created
+ */
+public class GATKDocsExample extends RodWalker {
+ /**
+ * Put detailed documentation about the argument here. No need to duplicate the summary information
+ * in doc annotation field, as that will be added before this text in the documentation page.
+ *
+ * Notes:
+ *
+ * This field can contain HTML as a normal javadoc
+ * Don't include information about the default value, as gatkdocs adds this automatically
+ * Try your best to describe in detail the behavior of the argument, as ultimately confusing
+ * docs here will just result in user posts on the forum
+ *
+ */
+ @Argument(fullName="full", shortName="short", doc="Brief summary of argument [~ 80 characters of text]", required=false)
+ private boolean myWalkerArgument = false;
+
+ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return 0; }
+ public Integer reduceInit() { return 0; }
+ public Integer reduce(Integer value, Integer sum) { return value + sum; }
+ public void onTraversalDone(Integer result) { }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
index 59fb4aa9e..3b9e35311 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
@@ -97,7 +97,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
if (!( walker instanceof TreeReducible ))
throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers");
- traversalEngine.startTimers();
ReduceTree reduceTree = new ReduceTree(this);
initializeWalker(walker);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
index 9466fdf75..09ab4bd44 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@@ -44,15 +44,16 @@ public class LinearMicroScheduler extends MicroScheduler {
* @param shardStrategy A strategy for sharding the data.
*/
public Object execute(Walker walker, ShardStrategy shardStrategy) {
- traversalEngine.startTimers();
walker.initialize();
Accumulator accumulator = Accumulator.create(engine,walker);
+ boolean done = walker.isDone();
int counter = 0;
- for (Shard shard : processingTracker.onlyOwned(shardStrategy, engine.getName())) {
- if ( shard == null ) // we ran out of shards that aren't owned
+ for (Shard shard : shardStrategy ) {
+ if ( done || shard == null ) // we ran out of shards that aren't owned
break;
+ traversalEngine.startTimersIfNecessary();
if(shard.getShardType() == Shard.ShardType.LOCUS) {
LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
@@ -61,6 +62,7 @@ public class LinearMicroScheduler extends MicroScheduler {
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
accumulator.accumulate(dataProvider,result);
dataProvider.close();
+ if ( walker.isDone() ) break;
}
windowMaker.close();
}
@@ -70,6 +72,8 @@ public class LinearMicroScheduler extends MicroScheduler {
accumulator.accumulate(dataProvider,result);
dataProvider.close();
}
+
+ done = walker.isDone();
}
Object result = accumulator.finishTraversal();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
index 23e5769f1..e731b9864 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@@ -39,14 +39,10 @@ import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.threading.*;
import javax.management.JMException;
import javax.management.MBeanServer;
import javax.management.ObjectName;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.PrintStream;
import java.lang.management.ManagementFactory;
import java.util.Collection;
@@ -83,8 +79,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
private final MBeanServer mBeanServer;
private final ObjectName mBeanName;
- protected GenomeLocProcessingTracker processingTracker;
-
/**
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
* selected walker.
@@ -98,11 +92,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
* @return The best-fit microscheduler.
*/
public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods, int nThreadsToUse) {
- if (engine.getArguments().processingTrackerFile != null) {
- if ( walker instanceof ReadWalker )
- throw new UserException.BadArgumentValue("C", String.format("Distributed GATK processing not enabled for read walkers"));
- }
-
if (walker instanceof TreeReducible && nThreadsToUse > 1) {
if(walker.isReduceByInterval())
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
@@ -157,33 +146,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
catch (JMException ex) {
throw new ReviewedStingException("Unable to register microscheduler with JMX", ex);
}
-
- //
- // create the processing tracker
- //
- if ( engine.getArguments().processingTrackerFile != null ) {
- logger.warn("Distributed GATK is an experimental engine feature, and is likely to not work correctly or reliably.");
- if ( engine.getArguments().restartProcessingTracker && engine.getArguments().processingTrackerFile.exists() ) {
- engine.getArguments().processingTrackerFile.delete();
- logger.info("Deleting ProcessingTracker file " + engine.getArguments().processingTrackerFile);
- }
-
- PrintStream statusStream = null;
- if ( engine.getArguments().processingTrackerStatusFile != null ) {
- try {
- statusStream = new PrintStream(new FileOutputStream(engine.getArguments().processingTrackerStatusFile));
- } catch ( FileNotFoundException e) {
- throw new UserException.CouldNotCreateOutputFile(engine.getArguments().processingTrackerStatusFile, e);
- }
- }
-
- ClosableReentrantLock lock = new SharedFileThreadSafeLock(engine.getArguments().processingTrackerFile, engine.getArguments().processTrackerID);
- processingTracker = new FileBackedGenomeLocProcessingTracker(engine.getArguments().processingTrackerFile, engine.getGenomeLocParser(), lock, statusStream) ;
- logger.info("Creating ProcessingTracker using shared file " + engine.getArguments().processingTrackerFile + " process.id = " + engine.getName() + " CID = " + engine.getArguments().processTrackerID);
- } else {
- // create a NoOp version that doesn't do anything but say "yes"
- processingTracker = new NoOpGenomeLocProcessingTracker();
- }
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java
index 6136bd68d..2b6488ada 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java
@@ -57,6 +57,7 @@ public class ShardTraverser implements Callable {
public Object call() {
try {
+ traversalEngine.startTimersIfNecessary();
long startTime = System.currentTimeMillis();
Object accumulator = walker.reduceInit();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java
similarity index 95%
rename from public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java
rename to public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java
index cd77a9e7e..4ec451567 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java
@@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
* Filter out FailsVendorQualityCheck reads.
*/
-public class FailsVendorQualityCheckReadFilter extends ReadFilter {
+public class FailsVendorQualityCheckFilter extends ReadFilter {
public boolean filterOut( final SAMRecord read ) {
return read.getReadFailsVendorQualityCheckFlag();
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityFilter.java
similarity index 96%
rename from public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java
rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityFilter.java
index 75369b306..ed9c37dca 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityFilter.java
@@ -35,7 +35,7 @@ import org.broadinstitute.sting.commandline.Argument;
* @version 0.1
*/
-public class MappingQualityReadFilter extends ReadFilter {
+public class MappingQualityFilter extends ReadFilter {
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
public int MIN_MAPPING_QUALTY_SCORE = 10;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableFilter.java
similarity index 95%
rename from public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java
rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableFilter.java
index 1afec36d1..ccdb40d31 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableFilter.java
@@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
* @version 0.1
*/
-public class MappingQualityUnavailableReadFilter extends ReadFilter {
+public class MappingQualityUnavailableFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroFilter.java
similarity index 95%
rename from public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java
rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroFilter.java
index e49d4117c..57db8419c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroFilter.java
@@ -33,7 +33,7 @@ import net.sf.samtools.SAMRecord;
* @version 0.1
*/
-public class MappingQualityZeroReadFilter extends ReadFilter {
+public class MappingQualityZeroFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == 0);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java
similarity index 95%
rename from public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java
rename to public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java
index 31c2144ce..50cd30f71 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java
@@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
* Filter out duplicate reads.
*/
-public class NotPrimaryAlignmentReadFilter extends ReadFilter {
+public class NotPrimaryAlignmentFilter extends ReadFilter {
public boolean filterOut( final SAMRecord read ) {
return read.getNotPrimaryAlignmentFlag();
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
index 30b2f828d..8e241bb2c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
@@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
* @version 0.1
*/
public class PlatformFilter extends ReadFilter {
- @Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this strign", required=false)
+ @Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this string", required=false)
protected String[] PLFilterNames;
public boolean filterOut(SAMRecord rec) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java
index 227637761..bf3ce352a 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java
@@ -2,10 +2,14 @@ package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
/**
* A SamRecordFilter that also depends on the header.
*/
+@DocumentedGATKFeature(
+ groupName = "Read filters",
+ summary = "GATK Engine arguments that filter or transfer incoming SAM/BAM data files" )
public abstract class ReadFilter implements SamRecordFilter {
/**
* Sets the header for use by this filter.
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java
new file mode 100644
index 000000000..50a1384fa
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2009 The Broad Institute
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.filters;
+
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.commandline.Argument;
+
+/**
+ * A read filter (transformer) that sets all reads mapping quality to a given value.
+ *
+ *
+ * If a BAM file contains erroneous or missing mapping qualities, this 'filter' will set
+ * all your mapping qualities to a given value. Default being 60.
+ *
+ *
+ *
+ * Input
+ *
+ * BAM file(s)
+ *
+ *
+ *
+ * Output
+ *
+ * BAM file(s) with all reads mapping qualities reassigned
+ *
+ *
+ * Examples
+ *
+ * java
+ * -jar GenomeAnalysisTK.jar
+ * -rf ReassignMappingQuality
+ * -DMQ 35
+ *
+ *
+ * @author carneiro
+ * @since 8/8/11
+ */
+
+public class ReassignMappingQualityFilter extends ReadFilter {
+
+ @Argument(fullName = "default_mapping_quality", shortName = "DMQ", doc = "Default read mapping quality to assign to all reads", required = false)
+ public int defaultMappingQuality = 60;
+
+ public boolean filterOut(SAMRecord rec) {
+ rec.setMappingQuality(defaultMappingQuality);
+ return false;
+ }
+}
+
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java
index 1da03e9c2..ebb4cbe66 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java
@@ -87,8 +87,8 @@ public class VCFWriterStorage implements Storage, VCFWriter {
writer.writeHeader(stub.getVCFHeader());
}
- public void add(VariantContext vc, byte ref) {
- writer.add(vc, ref);
+ public void add(VariantContext vc) {
+ writer.add(vc);
}
/**
@@ -117,7 +117,7 @@ public class VCFWriterStorage implements Storage, VCFWriter {
BasicFeatureSource source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false);
for ( VariantContext vc : source.iterator() ) {
- target.writer.add(vc, vc.getReferenceBaseForIndel());
+ target.writer.add(vc);
}
source.close();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java
index 8bc97c886..da4eb3955 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java
@@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.io.OutputStream;
import java.lang.reflect.Constructor;
+import java.lang.reflect.Type;
/**
* Insert an OutputStreamStub instead of a full-fledged concrete OutputStream implementations.
@@ -69,16 +70,21 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
@Override
- public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class type) {
+ public String typeDefaultDocString(ArgumentSource source) {
+ return "stdout";
+ }
+
+ @Override
+ public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
OutputStreamStub stub = new OutputStreamStub(defaultOutputStream);
engine.addOutput(stub);
- return createInstanceOfClass(type,stub);
+ return createInstanceOfClass((Class)type,stub);
}
@Override
- public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
+ public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
ArgumentDefinition definition = createDefaultArgumentDefinition(source);
String fileName = getArgumentValue( definition, matches );
@@ -91,7 +97,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
engine.addOutput(stub);
- Object result = createInstanceOfClass(type,stub);
+ Object result = createInstanceOfClass(makeRawTypeIfNecessary(type),stub);
// WARNING: Side effects required by engine!
parsingEngine.addTags(result,getArgumentTags(matches));
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java
index f124c2302..83d1b7eb2 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java
@@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
import java.io.File;
+import java.lang.reflect.Type;
/**
* Describe how to parse SAMFileReaders.
@@ -52,14 +53,13 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
this.engine = engine;
}
-
@Override
public boolean supports( Class type ) {
return SAMFileReader.class.isAssignableFrom(type);
}
@Override
- public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
+ public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches );
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java
index 38640eda0..43ec934ed 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java
@@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.io.OutputStream;
import java.lang.annotation.Annotation;
+import java.lang.reflect.Type;
import java.util.Arrays;
import java.util.List;
@@ -93,7 +94,12 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
}
@Override
- public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class> type) {
+ public String typeDefaultDocString(ArgumentSource source) {
+ return "stdout";
+ }
+
+ @Override
+ public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
SAMFileWriterStub stub = new SAMFileWriterStub(engine,defaultOutputStream);
@@ -102,7 +108,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
}
@Override
- public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
+ public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
// Extract all possible parameters that could be passed to a BAM file writer?
ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source);
String writerFileName = getArgumentValue( bamArgumentDefinition, matches );
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java
index 615841f02..98026554b 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java
@@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.io.OutputStream;
+import java.lang.reflect.Type;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
@@ -108,7 +109,12 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
@Override
- public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class> type) {
+ public String typeDefaultDocString(ArgumentSource source) {
+ return "stdout";
+ }
+
+ @Override
+ public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false, argumentSources, false, false);
@@ -124,7 +130,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @return Transform from the matches into the associated argument.
*/
@Override
- public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
+ public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source);
// Get the filename for the genotype file, if it exists. If not, we'll need to send output to out.
String writerFileName = getArgumentValue(defaultArgumentDefinition,matches);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java
index bb84f9457..936243f9d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java
@@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.io.stubs;
+import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.OutputTracker;
@@ -177,14 +178,23 @@ public class VCFWriterStub implements Stub, VCFWriter {
vcfHeader = header;
// Check for the command-line argument header line. If not present, add it in.
- VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
- boolean foundCommandLineHeaderLine = false;
- for(VCFHeaderLine line: vcfHeader.getMetaData()) {
- if(line.getKey().equals(commandLineArgHeaderLine.getKey()))
- foundCommandLineHeaderLine = true;
+ if ( !skipWritingHeader ) {
+ VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
+ boolean foundCommandLineHeaderLine = false;
+ for (VCFHeaderLine line: vcfHeader.getMetaData()) {
+ if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) )
+ foundCommandLineHeaderLine = true;
+ }
+ if ( !foundCommandLineHeaderLine )
+ vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
+
+ // also put in the reference contig header lines
+ String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName());
+ for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() )
+ vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly));
+
+ vcfHeader.addMetaDataLine(new VCFHeaderLine("reference", "file://" + engine.getArguments().referenceFile.getAbsolutePath()));
}
- if(!foundCommandLineHeaderLine && !skipWritingHeader)
- vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
outputTracker.getStorage(this).writeHeader(vcfHeader);
}
@@ -192,8 +202,8 @@ public class VCFWriterStub implements Stub, VCFWriter {
/**
* @{inheritDoc}
*/
- public void add(VariantContext vc, byte ref) {
- outputTracker.getStorage(this).add(vc,ref);
+ public void add(VariantContext vc) {
+ outputTracker.getStorage(this).add(vc);
}
/**
@@ -220,4 +230,27 @@ public class VCFWriterStub implements Stub, VCFWriter {
CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class);
return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\"");
}
+
+ private VCFHeaderLine getContigHeaderLine(SAMSequenceRecord contig, String assembly) {
+ String val;
+ if ( assembly != null )
+ val = String.format("", contig.getSequenceName(), contig.getSequenceLength(), assembly);
+ else
+ val = String.format("", contig.getSequenceName(), contig.getSequenceLength());
+ return new VCFHeaderLine("contig", val);
+ }
+
+ private String getReferenceAssembly(String refPath) {
+ // This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
+ String assembly = null;
+ if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 )
+ assembly = "b37";
+ else if ( refPath.indexOf("b36") != -1 )
+ assembly = "b36";
+ else if ( refPath.indexOf("hg18") != -1 )
+ assembly = "hg18";
+ else if ( refPath.indexOf("hg19") != -1 )
+ assembly = "hg19";
+ return assembly;
+ }
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
index 69c0b3e0a..4d94130a8 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@@ -46,7 +46,6 @@ import org.simpleframework.xml.stream.Format;
import org.simpleframework.xml.stream.HyphenStyle;
import java.io.*;
-import java.net.InetAddress;
import java.security.NoSuchAlgorithmException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
@@ -154,9 +153,13 @@ public class GATKRunReport {
private long nReads;
public enum PhoneHomeOption {
+ /** Disable phone home */
NO_ET,
+ /** Standard option. Writes to local repository if it can be found, or S3 otherwise */
STANDARD,
+ /** Force output to STDOUT. For debugging only */
STDOUT,
+ /** Force output to S3. For debugging only */
AWS_S3 // todo -- remove me -- really just for testing purposes
}
@@ -226,22 +229,6 @@ public class GATKRunReport {
}
- /**
- * Helper utility that calls into the InetAddress system to resolve the hostname. If this fails,
- * unresolvable gets returned instead.
- *
- * @return
- */
- private String resolveHostname() {
- try {
- return InetAddress.getLocalHost().getCanonicalHostName();
- }
- catch (java.net.UnknownHostException uhe) { // [beware typo in code sample -dmw]
- return "unresolvable";
- // handle exception
- }
- }
-
public void postReport(PhoneHomeOption type) {
logger.debug("Posting report of type " + type);
switch (type) {
@@ -321,7 +308,7 @@ public class GATKRunReport {
private void postReportToAWSS3() {
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
- this.hostName = resolveHostname(); // we want to fill in the host name
+ this.hostName = Utils.resolveHostname(); // we want to fill in the host name
File localFile = postReportToLocalDisk(new File("./"));
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
if ( localFile != null ) { // we succeeded in creating the local file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java
deleted file mode 100644
index ce924fd87..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.refdata;
-
-import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
-import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.text.XReadLines;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.lang.reflect.Constructor;
-import java.util.Iterator;
-import java.util.regex.Pattern;
-
-/**
- * This is a low-level iterator designed to provide system-wide generic support for reading record-oriented data
- * files. The only assumption made is that every line in the file provides a complete and separate data record. The records
- * can be associated with coordinates or coordinate intervals, there can be one or more records associated with a given
- * position/interval, or intervals can overlap. The records must be comprised of delimited fields, but the format is
- * otherwise free. For any specific line-based data format, an appropriate implementation of ReferenceOrderedDatum must be
- * provided that is capable of parsing itself from a single line of data. This implementation will be used,
- * through reflection mechanism, as a callback to do all the work.
- *
- * The model is, hence, as follows:
- *
- * String dataRecord <---> RodImplementation ( ::parseLine(dataRecord.split(delimiter)) is aware of the format and fills
- * an instance of RodImplementation with data values from dataRecord line).
- *
- *
- * instantiation of RODRecordIterator(dataFile, trackName, RodImplementation.class) will immediately provide an iterator
- * that walks along the dataFile line by line, and on each call to next() returns a new RodImplementation object
- * representing a single line (record) of data. The returned object will be initialized with "track name" trackName -
- * track names (as returned by ROD.getName()) are often used in other parts of the code to distinguish between
- * multiple streams of (possibly heterogeneous) annotation data bound to an application.
- *
- * This generic iterator skips and ignores a) empty lines, b) lines starting with '#' (comments): they are never sent back
- * to the ROD implementation class for processing.
- *
- * This iterator does not actually check if the ROD records (lines) in the file are indeed ordedered by coordinate,
- * and it does not depend on such an order as it still implements a low-level line-based traversal of the data. Higher-level
- * iterators/wrappers will perform all the necessary checks.
- *
- * Note: some data formats/ROD implementations may require a header line in the file. In this case the current (ugly)
- * mechanism is as follows:
- * 1) rod implementation's ::initialize(file) method should be able to open the file, find and read the header line
- * and return the header object (to be kept by the iterator)
- * 2) rod implementation's ::parseLine(header,line) method should be capable of making use of that saved header object now served to it
- * and
- * 3) ::parseLine(header,line) should be able to recognize the original header line in the file and skip it (after ROD's initialize()
- * method is called, the iterator will re-open the file and start reading it from the very beginning; there is no
- * other way, except for "smart" ::parseLine(), to avoid reading in the header line as "data").
- *
- * Created by IntelliJ IDEA.
- * User: asivache
- * Date: Sep 10, 2009
- * Time: 1:22:23 PM
- * To change this template use File | Settings | File Templates.
- */
-public class RODRecordIterator implements Iterator {
-
- private PushbackIterator reader;
-
- // stores name of the track this iterator reads (will be also returned by getName() of ROD objects
- // generated by this iterator)
- private String name;
-
- // we keep the file object, only to use file name in error reports
- private File file;
-
- // rod type; this is what we will instantiate for RODs at runtime
- private Class type;
-
- private Object header = null; // Some RODs may use header
-
- // field delimiter in the file. Should it be the job of the iterator to split the lines though? RODs can do that!
- private String fieldDelimiter;
-
- // constructor for the ROD objects we are going to return. Constructor that takes the track name as its single arg is required.
- private Constructor named_constructor;
-
- // keep track of the lines we are reading. used for error messages only.
- private long linenum = 0;
-
- private boolean allow_empty = true;
- private boolean allow_comments = true;
- public static Pattern EMPTYLINE_PATTERN = Pattern.compile("^\\s*$");
-
- public RODRecordIterator(File file, String name, Class type) {
- try {
- reader = new PushbackIterator(new XReadLines(file));
- } catch (FileNotFoundException e) {
- throw new UserException.CouldNotReadInputFile(file, e);
- }
- this.file = file;
- this.name = name;
- this.type = type;
- try {
- named_constructor = type.getConstructor(String.class);
- }
- catch (java.lang.NoSuchMethodException e) {
- throw new ReviewedStingException("ROD class "+type.getName()+" does not have constructor that accepts a single String argument (track name)");
- }
- ROD rod = instantiateROD(name);
- fieldDelimiter = rod.delimiterRegex(); // get delimiter from the ROD itself
- try {
- header = rod.initialize(file);
- } catch (FileNotFoundException e) {
- throw new UserException.CouldNotReadInputFile(file, "ROD "+type.getName() + " failed to initialize properly from file "+file);
- }
-
- }
-
-
- /**
- * Returns true if the iteration has more elements. (In other
- * words, returns true if next would return an element
- * rather than throwing an exception.)
- *
- * @return true if the iterator has more elements.
- */
- public boolean hasNext() {
- if ( allow_empty || allow_comments ) {
- while ( reader.hasNext() ) {
- String line = reader.next();
- if ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ) continue; // skip empty line
- if ( allow_comments && line.charAt(0) == '#' ) continue; // skip comment lines
- // the line is not empty and not a comment line, so we have next after all
- reader.pushback(line);
- return true;
- }
- return false; // oops, we end up here if there's nothing left
- } else {
- return reader.hasNext();
- }
- }
-
- /**
- * Returns the next valid ROD record in the file, skipping empty and comment lines.
- *
- * @return the next element in the iteration.
- * @throws java.util.NoSuchElementException
- * iteration has no more elements.
- */
- public ROD next() {
- ROD n = null;
- boolean parsed_ok = false;
- String line ;
-
- while ( ! parsed_ok && reader.hasNext() ) {
- line = reader.next();
- linenum++;
- while ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ||
- allow_comments && line.charAt(0) == '#' ) {
- if ( reader.hasNext() ) {
- line = reader.next();
- linenum++;
- } else {
- line = null;
- break;
- }
- }
-
- if ( line == null ) break; // if we ran out of lines while skipping empty lines/comments, then we are done
-
- String parts[] = line.split(fieldDelimiter);
-
- try {
- n = instantiateROD(name);
- parsed_ok = n.parseLine(header,parts) ;
- }
- catch ( Exception e ) {
- throw new UserException.MalformedFile(file, "Failed to parse ROD data ("+type.getName()+") from file "+ file + " at line #"+linenum+
- "\nOffending line: "+line+
- "\nReason ("+e.getClass().getName()+")", e);
- }
- }
-
-
- return n;
- }
-
- /**
- * Removes from the underlying collection the last element returned by the
- * iterator (optional operation). This method can be called only once per
- * call to next . The behavior of an iterator is unspecified if
- * the underlying collection is modified while the iteration is in
- * progress in any way other than by calling this method.
- *
- * @throws UnsupportedOperationException if the remove
- * operation is not supported by this Iterator.
- * @throws IllegalStateException if the next method has not
- * yet been called, or the remove method has already
- * been called after the last call to the next
- * method.
- */
- public void remove() {
- throw new UnsupportedOperationException("remove() operation is not supported by RODRecordIterator");
- }
-
- /** Instantiates appropriate implementation of the ROD used by this iteratot. The 'name' argument is the name
- * of the ROD track.
- * @param name
- * @return
- */
- private ROD instantiateROD(final String name) {
- try {
- return (ROD) named_constructor.newInstance(name);
- } catch (Exception e) {
- throw new DynamicClassResolutionException(named_constructor.getDeclaringClass(), e);
- }
- }
-
-}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java
index d03b122e2..286e22369 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java
@@ -1,13 +1,15 @@
package org.broadinstitute.sting.gatk.refdata;
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
+import org.broad.tribble.Feature;
+import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
@@ -18,348 +20,402 @@ import java.util.*;
* The standard interaction model is:
*
* Traversal system arrives at a site, which has a bunch of RMDs covering it
-Genotype * Traversal calls tracker.bind(name, RMD) for each RMDs in RMDs
- * Traversal passes tracker to the walker
- * walker calls lookup(name, default) to obtain the RMDs values at this site, or default if none was
- * bound at this site.
+ * Traversal passes creates a tracker and passes it to the walker
+ * walker calls get(rodBinding) to obtain the RMDs values at this site for the track
+ * associated with rodBinding.
+ *
+ * Note that this is an immutable class. Once created the underlying data structures
+ * cannot be modified
*
* User: mdepristo
* Date: Apr 3, 2009
* Time: 3:05:23 PM
*/
public class RefMetaDataTracker {
+ // TODO: this should be a list, not a map, actually
+ private final static RODRecordList EMPTY_ROD_RECORD_LIST = new RODRecordListImpl("EMPTY");
+
final Map map;
- protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
+ final ReferenceContext ref;
+ final protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
- public RefMetaDataTracker(int nBindings) {
- if ( nBindings == 0 )
+ // ------------------------------------------------------------------------------------------
+ //
+ //
+ // Special ENGINE interaction functions
+ //
+ //
+ // ------------------------------------------------------------------------------------------
+
+ public RefMetaDataTracker(final Collection allBindings, final ReferenceContext ref) {
+ this.ref = ref;
+
+ // set up the map
+ if ( allBindings.isEmpty() )
map = Collections.emptyMap();
- else
- map = new HashMap(nBindings);
+ else {
+ Map tmap = new HashMap(allBindings.size());
+ for ( RODRecordList rod : allBindings ) {
+ if ( rod != null && ! rod.isEmpty() )
+ tmap.put(canonicalName(rod.getName()), rod);
+ }
+
+ // ensure that no one modifies the map itself
+ map = Collections.unmodifiableMap(tmap);
+ }
+ }
+
+ // ------------------------------------------------------------------------------------------
+ //
+ //
+ // Generic accessors
+ //
+ //
+ // ------------------------------------------------------------------------------------------
+
+ /**
+ * Gets all of the Tribble features spanning this locus, returning them as a list of specific
+ * type T extending Feature. This function looks across all tracks to find the Features, so
+ * if you have two tracks A and B each containing 1 Feature, then getValues will return
+ * a list containing both features.
+ *
+ * Note that this function assumes that all of the bound features are instances of or
+ * subclasses of T. A ClassCastException will occur if this isn't the case. If you want
+ * to get all Features without any danger of such an exception use the root Tribble
+ * interface Feature.
+ *
+ * @param type The type of the underlying objects bound here
+ * @param as above
+ * @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
+ */
+ @Requires({"type != null"})
+ @Ensures("result != null")
+ public List getValues(final Class type) {
+ return addValues(map.keySet(), type, new ArrayList(), null, false, false);
}
/**
- * get all the reference meta data associated with a track name.
- * @param name the name of the track we're looking for
- * @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
- * dbSNP RMD this will be a RodDbSNP, etc.
+ * Provides the same functionality as @link #getValues(Class) but will only include
+ * Features that start as the GenomeLoc provide onlyAtThisLoc.
*
- * Important: The list returned by this function is guaranteed not to be null, but may be empty!
+ * @param type The type of the underlying objects bound here
+ * @param onlyAtThisLoc
+ * @param as above
+ * @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/
- public List getReferenceMetaData(final String name) {
- RODRecordList list = getTrackDataByName(name, true);
- List objects = new ArrayList();
- if (list == null) return objects;
- for (GATKFeature feature : list)
- objects.add(feature.getUnderlyingObject());
- return objects;
+ @Requires({"type != null", "onlyAtThisLoc != null"})
+ @Ensures("result != null")
+ public List getValues(final Class type, final GenomeLoc onlyAtThisLoc) {
+ return addValues(map.keySet(), type, new ArrayList(), onlyAtThisLoc, true, false);
}
/**
- * get all the reference meta data associated with a track name.
- * @param name the name of the track we're looking for
- * @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with
- * the passed in parameter (false).
- * @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
- * dbSNP rod this will be a RodDbSNP, etc.
+ * Uses the same logic as @link #getValues(Class) but arbitrary select one of the resulting
+ * elements of the list to return. That is, if there would be two elements in the result of
+ * @link #getValues(Class), one of these two is selected, and which one it will be isn't
+ * specified. Consequently, this method is only really safe if (1) you absolutely know
+ * that only one binding will meet the constraints of @link #getValues(Class) or (2)
+ * you truly don't care which of the multiple bindings available you are going to examine.
*
- * Important: The list returned by this function is guaranteed not to be null, but may be empty!
+ * If there are no bindings here, getFirstValue() return null
+ *
+ * @param type The type of the underlying objects bound here
+ * @param as above
+ * @return A random single element the RODs bound here, or null if none are bound.
*/
- public List getReferenceMetaData(final String name, boolean requireExactMatch) {
- RODRecordList list = getTrackDataByName(name, requireExactMatch);
- List objects = new ArrayList();
- if (list == null) return objects;
- for (GATKFeature feature : list)
- objects.add(feature.getUnderlyingObject());
- return objects;
+ @Requires({"type != null"})
+ public T getFirstValue(final Class type) {
+ return safeGetFirst(getValues(type));
}
/**
- * get all the GATK features associated with a specific track name
- * @param name the name of the track we're looking for
- * @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with
- * the passed in parameter (false).
- * @return a list of GATKFeatures for the target rmd
+ * Uses the same logic as @link #getValue(Class,GenomeLoc) to determine the list
+ * of eligible Features and @link #getFirstValue(Class) to select a single
+ * element from the interval list.
*
- * Important: The list returned by this function is guaranteed not to be null, but may be empty!
+ * @param type The type of the underlying objects bound here
+ * @param as above
+ * @param onlyAtThisLoc only Features starting at this site are considered
+ * @return A random single element the RODs bound here starting at onlyAtThisLoc, or null if none are bound.
*/
- public List getGATKFeatureMetaData(final String name, boolean requireExactMatch) {
- List feat = getTrackDataByName(name,requireExactMatch);
- return (feat == null) ? new ArrayList() : feat; // to satisfy the above requirement that we don't return null
+ @Requires({"type != null", "onlyAtThisLoc != null"})
+ public T getFirstValue(final Class type, final GenomeLoc onlyAtThisLoc) {
+ return safeGetFirst(getValues(type, onlyAtThisLoc));
+
}
/**
- * get a singleton record, given the name and a type. This function will return the first record at the current position seen,
- * and emit a logger warning if there were more than one option.
+ * Gets all of the Tribble features bound to RodBinding spanning this locus, returning them as
+ * a list of specific type T extending Feature.
*
- * WARNING: this method is deprecated, since we now suppport more than one RMD at a single position for all tracks. If there are
- * are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets
- * picked may change from time to time! BE WARNED!
- *
- * @param name the name of the track
- * @param clazz the underlying type to return
- * @param the type to parameterize on, matching the clazz argument
- * @return a record of type T, or null if no record is present.
+ * Note that this function assumes that all of the bound features are instances of or
+ * subclasses of T. A ClassCastException will occur if this isn't the case.
+ *
+ * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
+ * @param The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
+ * @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/
- @Deprecated
- public T lookup(final String name, Class clazz) {
- RODRecordList objects = getTrackDataByName(name, true);
+ @Requires({"rodBinding != null"})
+ @Ensures("result != null")
+ public List getValues(final RodBinding rodBinding) {
+ return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList(1), getTrackDataByName(rodBinding), null, false, false);
+ }
- // if emtpy or null return null;
- if (objects == null || objects.size() < 1) return null;
+ /**
+ * Gets all of the Tribble features bound to any RodBinding in rodBindings,
+ * spanning this locus, returning them as a list of specific type T extending Feature.
+ *
+ * Note that this function assumes that all of the bound features are instances of or
+ * subclasses of T. A ClassCastException will occur if this isn't the case.
+ *
+ * @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched
+ * @param The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
+ * @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
+ */
+ @Requires({"rodBindings != null"})
+ @Ensures("result != null")
+ public List getValues(final Collection> rodBindings) {
+ List results = new ArrayList(1);
+ for ( RodBinding rodBinding : rodBindings )
+ results.addAll(getValues(rodBinding));
+ return results;
+ }
- if (objects.size() > 1)
- logger.info("lookup is choosing the first record from " + (objects.size() - 1) + " options");
+ /**
+ * The same logic as @link #getValues(RodBinding) but enforces that each Feature start at onlyAtThisLoc
+ *
+ * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
+ * @param The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
+ * @param onlyAtThisLoc only Features starting at this site are considered
+ * @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
+ */
+ @Requires({"rodBinding != null", "onlyAtThisLoc != null"})
+ @Ensures("result != null")
+ public List getValues(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) {
+ return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList(1), getTrackDataByName(rodBinding), onlyAtThisLoc, true, false);
+ }
- Object obj = objects.get(0).getUnderlyingObject();
- if (!(clazz.isAssignableFrom(obj.getClass())))
- throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString()
- + " it's of type " + obj.getClass());
+ /**
+ * The same logic as @link #getValues(List) but enforces that each Feature start at onlyAtThisLoc
+ *
+ * @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched
+ * @param The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
+ * @param onlyAtThisLoc only Features starting at this site are considered
+ * @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
+ */
+ @Requires({"rodBindings != null", "onlyAtThisLoc != null"})
+ @Ensures("result != null")
+ public List getValues(final Collection> rodBindings, final GenomeLoc onlyAtThisLoc) {
+ List results = new ArrayList(1);
+ for ( RodBinding rodBinding : rodBindings )
+ results.addAll(getValues(rodBinding, onlyAtThisLoc));
+ return results;
+ }
- return (T)obj;
+ /**
+ * Uses the same logic as @getValues(RodBinding) to determine the list
+ * of eligible Features and select a single element from the resulting set
+ * of eligible features.
+ *
+ * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
+ * @param as above
+ * @return A random single element the eligible Features found, or null if none are bound.
+ */
+ @Requires({"rodBinding != null"})
+ public T getFirstValue(final RodBinding rodBinding) {
+ return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), null, false, true));
+ }
+
+ /**
+ * Uses the same logic as @getValues(RodBinding, GenomeLoc) to determine the list
+ * of eligible Features and select a single element from the resulting set
+ * of eligible features.
+ *
+ * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
+ * @param as above
+ * @param onlyAtThisLoc only Features starting at this site are considered
+ * @return A random single element the eligible Features found, or null if none are bound.
+ */
+ @Requires({"rodBinding != null", "onlyAtThisLoc != null"})
+ public T getFirstValue(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) {
+ return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), onlyAtThisLoc, true, true));
+ }
+
+ /**
+ * Uses the same logic as @getValues(List) to determine the list
+ * of eligible Features and select a single element from the resulting set
+ * of eligible features.
+ *
+ * @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched
+ * @param as above
+ * @return A random single element the eligible Features found, or null if none are bound.
+ */
+ @Requires({"rodBindings != null"})
+ public T getFirstValue(final Collection> rodBindings) {
+ for ( RodBinding rodBinding : rodBindings ) {
+ T val = getFirstValue(rodBinding);
+ if ( val != null )
+ return val;
+ }
+ return null;
+ }
+
+ /**
+ * Uses the same logic as @getValues(RodBinding,GenomeLoc) to determine the list
+ * of eligible Features and select a single element from the resulting set
+ * of eligible features.
+ *
+ * @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched
+ * @param as above
+ * @param onlyAtThisLoc only Features starting at this site are considered
+ * @return A random single element the eligible Features found, or null if none are bound.
+ */
+ @Requires({"rodBindings != null", "onlyAtThisLoc != null"})
+ public T getFirstValue(final Collection> rodBindings, final GenomeLoc onlyAtThisLoc) {
+ for ( RodBinding rodBinding : rodBindings ) {
+ T val = getFirstValue(rodBinding, onlyAtThisLoc);
+ if ( val != null )
+ return val;
+ }
+ return null;
}
/**
* Is there a binding at this site to a ROD/track with the specified name?
*
- * @param name the name of the rod
- * @return true if it has the rod
+ * @param rodBinding the rod binding we want to know about
+ * @return true if any Features are bound in this tracker to rodBinding
*/
- public boolean hasROD(final String name) {
- return map.containsKey(canonicalName(name));
- }
-
-
- /**
- * Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records
- * at the current site, they all will be added to the list as separate elements.
- *
- * @return collection of all rods
- */
- public Collection getAllRods() {
- List l = new ArrayList();
- for ( RODRecordList rl : map.values() ) {
- if ( rl == null ) continue; // how do we get null value stored for a track? shouldn't the track be missing from the map alltogether?
- l.addAll(rl);
- }
- return l;
-
+ @Requires({"rodBinding != null"})
+ public boolean hasValues(final RodBinding rodBinding) {
+ return map.containsKey(canonicalName(rodBinding.getName()));
}
/**
* Get all of the RMD tracks at the current site. Each track is returned as a single compound
* object (RODRecordList) that may contain multiple RMD records associated with the current site.
*
- * @return collection of all tracks
+ * @return List of all tracks
*/
- public Collection getBoundRodTracks() {
- LinkedList bound = new LinkedList();
-
- for ( RODRecordList value : map.values() ) {
- if ( value != null && value.size() != 0 ) bound.add(value);
- }
-
- return bound;
+ public List getBoundRodTracks() {
+ return new ArrayList(map.values());
}
/**
- * @return the number of ROD bindings (name -> value) where value is not empty in this tracker
+ * The number of tracks with at least one value bound here
+ * @return the number of tracks with at least one bound Feature
*/
- public int getNBoundRodTracks() {
- return getNBoundRodTracks(null);
+ public int getNTracksWithBoundFeatures() {
+ return map.size();
}
- public int getNBoundRodTracks(final String excludeIn ) {
- final String exclude = excludeIn == null ? null : canonicalName(excludeIn);
+ // ------------------------------------------------------------------------------------------
+ //
+ //
+ // old style accessors
+ //
+ // TODO -- DELETE ME
+ //
+ //
+ // ------------------------------------------------------------------------------------------
- int n = 0;
- for ( RODRecordList value : map.values() ) {
- if ( value != null && ! value.isEmpty() ) {
- if ( exclude == null || ! value.getName().equals(exclude) )
- n++;
- }
- }
-
- return n;
+ @Deprecated
+ public boolean hasValues(final String name) {
+ return map.containsKey(canonicalName(name));
}
+ @Deprecated
+ public List getValues(final Class type, final String name) {
+ return addValues(name, type, new ArrayList(), getTrackDataByName(name), null, false, false);
+ }
+ @Deprecated
+ public List getValues(final Class type, final String name, final GenomeLoc onlyAtThisLoc) {
+ return addValues(name, type, new ArrayList(), getTrackDataByName(name), onlyAtThisLoc, true, false);
+ }
+ @Deprecated
+ public T getFirstValue(final Class type, final String name) {
+ return safeGetFirst(getValues(type, name));
+ }
+ @Deprecated
+ public T getFirstValue(final Class type, final String name, final GenomeLoc onlyAtThisLoc) {
+ return safeGetFirst(getValues(type, name, onlyAtThisLoc));
+ }
+
+ // ------------------------------------------------------------------------------------------
+ //
+ //
+ // Private utility functions
+ //
+ //
+ // ------------------------------------------------------------------------------------------
/**
- * Binds the list of reference ordered data records (RMDs) to track name at this site. Should be used only by the traversal
- * system to provide access to RMDs in a structured way to the walkers.
+ * Helper function for getFirst() operations that takes a list of and
+ * returns the first element, or null if no such element exists.
*
- * @param name the name of the track
- * @param rod the collection of RMD data
- */
- public void bind(final String name, RODRecordList rod) {
- //logger.debug(String.format("Binding %s to %s", name, rod));
- map.put(canonicalName(name), rod);
- }
-
-
- /**
- * Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number
- * of entries per ROD.
- * The name of each VariantContext corresponds to the ROD name.
- *
- * @param ref reference context
- * @return variant context
- */
- public Collection getAllVariantContexts(ReferenceContext ref) {
- return getAllVariantContexts(ref, null, null, false, false);
- }
-
- /**
- * Returns all of the variant contexts that start at the current location
- * @param ref
- * @param curLocation
+ * @param l
+ * @param
* @return
*/
- public Collection getAllVariantContexts(ReferenceContext ref, GenomeLoc curLocation) {
- return getAllVariantContexts(ref, null, curLocation, true, false);
+ @Requires({"l != null"})
+ final private T safeGetFirst(final List l) {
+ return l.isEmpty() ? null : l.get(0);
}
- /**
- * Converts all possible ROD tracks to VariantContexts objects. If allowedTypes != null, then only
- * VariantContexts in the allow set of types will be returned. If requireStartsHere is true, then curLocation
- * must not be null, and only records whose start position is == to curLocation.getStart() will be returned.
- * If takeFirstOnly is true, then only a single VariantContext will be converted from any individual ROD. Of course,
- * this single object must pass the allowed types and start here options if provided. Note that the result
- * may return multiple VariantContexts with the same name if that particular track contained multiple RODs spanning
- * the current location.
- *
- * The name of each VariantContext corresponds to the ROD name.
- *
- * @param ref reference context
- * @param allowedTypes allowed types
- * @param curLocation location
- * @param requireStartHere do we require the rod to start at this location?
- * @param takeFirstOnly do we take the first rod only?
- * @return variant context
- */
- public Collection getAllVariantContexts(ReferenceContext ref, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
- List contexts = new ArrayList();
-
- for ( RODRecordList rodList : getBoundRodTracks() ) {
- addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
- }
-
- return contexts;
- }
-
- /**
- * Gets the variant contexts associated with track name name
- *
- * see getVariantContexts for more information.
- *
- * @param ref ReferenceContext to enable conversion to variant context
- * @param name name
- * @param curLocation location
- * @param allowedTypes allowed types
- * @param requireStartHere do we require the rod to start at this location?
- * @param takeFirstOnly do we take the first rod only?
- * @return variant context
- */
-// public Collection getVariantContexts(String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
-// return getVariantContexts(null, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
-// }
-
- public Collection getVariantContexts(ReferenceContext ref, String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
- return getVariantContexts(ref, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
- }
-
-// public Collection getVariantContexts(Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
-// return getVariantContexts(null, names, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
-// }
-
- public Collection getVariantContexts(ReferenceContext ref, Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
- Collection contexts = new ArrayList();
-
+ private List addValues(final Collection names,
+ final Class type,
+ List values,
+ final GenomeLoc curLocation,
+ final boolean requireStartHere,
+ final boolean takeFirstOnly ) {
for ( String name : names ) {
- RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match
-
- if ( rodList != null )
- addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
+ RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match
+ values = addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly );
+ if ( takeFirstOnly && ! values.isEmpty() )
+ break;
}
- return contexts;
- }
-
- public Collection getVariantContextsByPrefix(ReferenceContext ref, Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
- Collection contexts = new ArrayList();
-
- for ( String name : names ) {
- RODRecordList rodList = getTrackDataByName(name,false); // require that the name is an exact match
-
- if ( rodList != null )
- addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
- }
-
- return contexts;
- }
-
- /**
- * Gets the variant context associated with name, and assumes the system only has a single bound track at this location. Throws an exception if not.
- * see getVariantContexts for more information.
- *
- * @param name name
- * @param curLocation location
- * @param allowedTypes allowed types
- * @param requireStartHere do we require the rod to start at this location?
- * @return variant context
- */
- public VariantContext getVariantContext(ReferenceContext ref, String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere ) {
- Collection contexts = getVariantContexts(ref, name, allowedTypes, curLocation, requireStartHere, false );
-
- if ( contexts.size() > 1 )
- throw new ReviewedStingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation);
- else if ( contexts.size() == 0 )
- return null;
- else
- return contexts.iterator().next();
- }
-
- /**
- * Very simple accessor that gets the first (and only!) VC associated with name at the current location, or
- * null if there's no binding here.
- *
- * @param ref
- * @param name
- * @param curLocation
- * @return
- */
- public VariantContext getVariantContext(ReferenceContext ref, String name, GenomeLoc curLocation) {
- return getVariantContext(ref, name, null, curLocation, true);
+ return values;
}
- private void addVariantContexts(Collection contexts, RODRecordList rodList, ReferenceContext ref, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
+
+ private List addValues(final String name,
+ final Class type,
+ List values,
+ final RODRecordList rodList,
+ final GenomeLoc curLocation,
+ final boolean requireStartHere,
+ final boolean takeFirstOnly ) {
for ( GATKFeature rec : rodList ) {
- if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) {
- // ok, we might actually be able to turn this record in a variant context
- VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref);
+ if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing
+ Object obj = rec.getUnderlyingObject();
+ if (!(type.isAssignableFrom(obj.getClass())))
+ throw new UserException.CommandLineException("Unable to cast track named " + name + " to type of " + type.toString()
+ + " it's of type " + obj.getClass());
- if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted
- continue;
+ T objT = (T)obj;
+ if ( takeFirstOnly ) {
+ if ( values == null )
+ values = Arrays.asList(objT);
+ else
+ values.add(objT);
- // now, let's decide if we want to keep it
- boolean goodType = allowedTypes == null || allowedTypes.contains(vc.getType());
- boolean goodPos = ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart();
-
- if ( goodType && goodPos ) { // ok, we are going to keep this thing
- contexts.add(vc);
-
- if ( takeFirstOnly )
- // we only want the first passing instance, so break the loop over records in rodList
- break;
+ break;
+ } else {
+ if ( values == null )
+ values = new ArrayList();
+ values.add(objT);
}
}
}
+
+ return values == null ? Collections.emptyList() : values;
}
/**
* Finds the reference metadata track named 'name' and returns all ROD records from that track associated
- * with the current site as a RODRecordList collection object. If no data track with specified name is available,
+ * with the current site as a RODRecordList List object. If no data track with specified name is available,
* returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up
* with track name set to 'name' and location set to null; otherwise the wrapper object will have name and
* location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution,
@@ -367,29 +423,16 @@ public class RefMetaDataTracker {
* for instance, on locus traversal, location is usually expected to be a single base we are currently looking at,
* regardless of the presence of "extended" RODs overlapping with that location).
* @param name track name
- * @param requireExactMatch do we require an exact match of the rod name?
* @return track data for the given rod
*/
- private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) {
- //logger.debug(String.format("Lookup %s%n", name));
-
+ private RODRecordList getTrackDataByName(final String name) {
final String luName = canonicalName(name);
- RODRecordList trackData = null;
+ RODRecordList l = map.get(luName);
+ return l == null ? EMPTY_ROD_RECORD_LIST : l;
+ }
- if ( requireExactMatch ) {
- if ( map.containsKey(luName) )
- trackData = map.get(luName);
- } else {
- for ( Map.Entry datum : map.entrySet() ) {
- final String rodName = datum.getKey();
- if ( datum.getValue() != null && rodName.startsWith(luName) ) {
- if ( trackData == null ) trackData = new RODRecordListImpl(name);
- //System.out.printf("Adding bindings from %s to %s at %s%n", rodName, name, datum.getValue().getLocation());
- ((RODRecordListImpl)trackData).add(datum.getValue(), true);
- }
- }
- }
- return trackData;
+ private RODRecordList getTrackDataByName(final RodBinding binding) {
+ return getTrackDataByName(binding.getName());
}
/**
@@ -398,6 +441,7 @@ public class RefMetaDataTracker {
* @return canonical name of the rod
*/
private final String canonicalName(final String name) {
+ // todo -- remove me after switch to RodBinding syntax
return name.toLowerCase();
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java
deleted file mode 100644
index 5cdb6e9f7..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java
+++ /dev/null
@@ -1,130 +0,0 @@
-package org.broadinstitute.sting.gatk.refdata;
-
-import org.apache.log4j.Logger;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-
-import java.io.*;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-/**
- * Class for representing arbitrary reference ordered data sets
- *
- * User: mdepristo
- * Date: Feb 27, 2009
- * Time: 10:47:14 AM
- * To change this template use File | Settings | File Templates.
- */
-public class ReferenceOrderedData implements Iterable