diff --git a/build.xml b/build.xml index ef53f6aa4..275cb5555 100644 --- a/build.xml +++ b/build.xml @@ -48,8 +48,9 @@ - - + + + @@ -176,6 +177,10 @@ + + + + @@ -485,11 +490,16 @@ + + + + + + additionalparam="${gatkdocs.include.hidden.arg} -private -build-timestamp "${build.timestamp}" -absolute-version ${build.version} -quiet -J-Xdebug -J-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"> @@ -744,8 +754,7 @@ - - + diff --git a/public/R/queueJobReport.R b/public/R/queueJobReport.R new file mode 100644 index 000000000..18d33054e --- /dev/null +++ b/public/R/queueJobReport.R @@ -0,0 +1,154 @@ +library(gsalib) +require("ggplot2") +require("gplots") + +# +# Standard command line switch. Can we loaded interactively for development +# or executed with RScript +# +args = commandArgs(TRUE) +onCMDLine = ! is.na(args[1]) +if ( onCMDLine ) { + inputFileName = args[1] + outputPDF = args[2] +} else { + inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt" + outputPDF = NA +} + +RUNTIME_UNITS = "(sec)" +ORIGINAL_UNITS_TO_SECONDS = 1/1000 + +# +# Helper function to aggregate all of the jobs in the report across all tables +# +allJobsFromReport <- function(report) { + names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts") + sub <- lapply(report, function(table) table[,names]) + do.call("rbind", sub) +} + +# +# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job +# +plotJobsGantt <- function(gatkReport, sortOverall) { + allJobs = allJobsFromReport(gatkReport) + if ( sortOverall ) { + title = "All jobs, by analysis, by start time" + allJobs = allJobs[order(allJobs$analysisName, allJobs$startTime, decreasing=T), ] + } else { + title = "All jobs, sorted by start time" + allJobs = allJobs[order(allJobs$startTime, decreasing=T), ] + } + allJobs$index = 1:nrow(allJobs) + minTime = min(allJobs$startTime) + allJobs$relStartTime = allJobs$startTime - minTime + allJobs$relDoneTime = allJobs$doneTime - minTime + allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts) + maxRelTime = max(allJobs$relDoneTime) + p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName)) + p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm"))) + p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2) + p <- p + xlim(0, maxRelTime * 1.1) + p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS)) + p <- p + ylab("Job") + p <- p + opts(title=title) + print(p) +} + +# +# Plots scheduling efficiency at job events +# +plotProgressByTime <- function(gatkReport) { + allJobs = allJobsFromReport(gatkReport) + nJobs = dim(allJobs)[1] + allJobs = allJobs[order(allJobs$startTime, decreasing=F),] + allJobs$index = 1:nrow(allJobs) + + minTime = min(allJobs$startTime) + allJobs$relStartTime = allJobs$startTime - minTime + allJobs$relDoneTime = allJobs$doneTime - minTime + + times = sort(c(allJobs$relStartTime, allJobs$relDoneTime)) + + countJobs <- function(p) { + s = allJobs$relStartTime + e = allJobs$relDoneTime + x = c() # I wish I knew how to make this work with apply + for ( time in times ) + x = c(x, sum(p(s, e, time))) + x + } + + pending = countJobs(function(s, e, t) s > t) + done = countJobs(function(s, e, t) e < t) + running = nJobs - pending - done + + d = data.frame(times=times, pending=pending, running=running, done=done) + + p <- ggplot(data=melt(d, id.vars=c("times")), aes(x=times, y=value, color=variable)) + p <- p + facet_grid(variable ~ ., scales="free") + p <- p + geom_line(size=2) + p <- p + xlab(paste("Time since start of first job", RUNTIME_UNITS)) + p <- p + opts(title = "Job scheduling") + print(p) +} + +# +# Creates tables for each job in this group +# +standardColumns = c("jobName", "startTime", "formattedStartTime", "analysisName", "intermediate", "exechosts", "formattedDoneTime", "doneTime", "runtime") +plotGroup <- function(groupTable) { + name = unique(groupTable$analysisName)[1] + groupAnnotations = setdiff(names(groupTable), standardColumns) + sub = groupTable[,c("jobName", groupAnnotations, "runtime")] + sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ] + + # create a table showing each job and all annotations + textplot(sub, show.rownames=F) + title(paste("Job summary for", name, "full itemization"), cex=3) + + # create the table for each combination of values in the group, listing iterations in the columns + sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean) + textplot(as.data.frame(sum), show.rownames=F) + title(paste("Job summary for", name, "itemizing each iteration"), cex=3) + + # as above, but averaging over all iterations + groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") + sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) + textplot(as.data.frame(sum), show.rownames=F) + title(paste("Job summary for", name, "averaging over all iterations"), cex=3) +} + +# print out some useful basic information +print("Report") +print(paste("Project :", inputFileName)) + +convertUnits <- function(gatkReportData) { + convertGroup <- function(g) { + g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS + g + } + lapply(gatkReportData, convertGroup) +} + + +# read the table +gatkReportData <- gsa.read.gatkreport(inputFileName) +gatkReportData <- convertUnits(gatkReportData) +#print(summary(gatkReportData)) + +if ( ! is.na(outputPDF) ) { + pdf(outputPDF, height=8.5, width=11) +} + +plotJobsGantt(gatkReportData, T) +plotJobsGantt(gatkReportData, F) +plotProgressByTime(gatkReportData) +for ( group in gatkReportData ) { + plotGroup(group) +} + +if ( ! is.na(outputPDF) ) { + dev.off() +} diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index 2ff8aa979..7ea515591 100755 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum; import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; @@ -38,6 +39,7 @@ import org.broadinstitute.sting.utils.text.XReadLines; import java.io.*; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Map; import java.util.regex.Pattern; @@ -91,12 +93,12 @@ import java.util.regex.Pattern; * -resources resources/ \ * -ignoreQ 5 * - * + * */ @DocumentedGATKFeature( - groupName = "AnalyzeCovariates", - summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator") + groupName = "AnalyzeCovariates", + summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator") public class AnalyzeCovariates extends CommandLineProgram { ///////////////////////////// @@ -118,7 +120,7 @@ public class AnalyzeCovariates extends CommandLineProgram { private String PATH_TO_RESOURCES = "public/R/"; @Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false) private int IGNORE_QSCORES_LESS_THAN = 5; - @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false) + @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false) private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups /** @@ -323,13 +325,14 @@ public class AnalyzeCovariates extends CommandLineProgram { } private void callRScripts() { + RScriptExecutor.RScriptArgumentCollection argumentCollection = + new RScriptExecutor.RScriptArgumentCollection(PATH_TO_RSCRIPT, Arrays.asList(PATH_TO_RESOURCES)); + RScriptExecutor executor = new RScriptExecutor(argumentCollection, true); int numReadGroups = 0; - + // for each read group for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) { - - Process p; if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) { String readGroup = readGroupKey.toString(); @@ -338,35 +341,19 @@ public class AnalyzeCovariates extends CommandLineProgram { // for each covariate for( int iii = 1; iii < requestedCovariates.size(); iii++ ) { Covariate cov = requestedCovariates.get(iii); - try { - - if (DO_INDEL_QUALITY) { - p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_indelQuality.R" + " " + - OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + - cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice - p.waitFor(); - - } else { + final String outputFilename = OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat"; + if (DO_INDEL_QUALITY) { + executor.callRScripts("plot_indelQuality.R", outputFilename, + cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice + } else { if( iii == 1 ) { - // Analyze reported quality - p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " + - OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + - IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE + " " + MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored - p.waitFor(); - } else { // Analyze all other covariates - p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_OtherCovariate.R" + " " + - OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + - cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice - p.waitFor(); - } + // Analyze reported quality + executor.callRScripts("plot_residualError_QualityScoreCovariate.R", outputFilename, + IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored + } else { // Analyze all other covariates + executor.callRScripts("plot_residualError_OtherCovariate.R", outputFilename, + cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice } - } catch (InterruptedException e) { - e.printStackTrace(); - System.exit(-1); - } catch (IOException e) { - System.out.println("Fatal Exception: Perhaps RScript jobs are being spawned too quickly? One work around is to process fewer read groups using the -numRG option."); - e.printStackTrace(); - System.exit(-1); } } } else { // at the maximum number of read groups so break out diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index ff992d77d..16358d05f 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -379,10 +379,14 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { } if ( tribbleType == null ) - throw new UserException.CommandLineException( - String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " + - "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s", - manager.userFriendlyListOfAvailableFeatures(parameterType))); + if ( ! file.canRead() | !! file.isFile() ) { + throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file); + } else { + throw new UserException.CommandLineException( + String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " + + "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s", + manager.userFriendlyListOfAvailableFeatures(parameterType))); + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 48fd73e0b..65ff27497 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -48,9 +48,10 @@ public class LinearMicroScheduler extends MicroScheduler { walker.initialize(); Accumulator accumulator = Accumulator.create(engine,walker); + boolean done = walker.isDone(); int counter = 0; for (Shard shard : shardStrategy ) { - if ( shard == null ) // we ran out of shards that aren't owned + if ( done || shard == null ) // we ran out of shards that aren't owned break; if(shard.getShardType() == Shard.ShardType.LOCUS) { @@ -61,6 +62,7 @@ public class LinearMicroScheduler extends MicroScheduler { Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); accumulator.accumulate(dataProvider,result); dataProvider.close(); + if ( walker.isDone() ) break; } windowMaker.close(); } @@ -70,6 +72,8 @@ public class LinearMicroScheduler extends MicroScheduler { accumulator.accumulate(dataProvider,result); dataProvider.close(); } + + done = walker.isDone(); } Object result = accumulator.finishTraversal(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index acee1a6a3..4d94130a8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -46,7 +46,6 @@ import org.simpleframework.xml.stream.Format; import org.simpleframework.xml.stream.HyphenStyle; import java.io.*; -import java.net.InetAddress; import java.security.NoSuchAlgorithmException; import java.text.DateFormat; import java.text.SimpleDateFormat; @@ -230,22 +229,6 @@ public class GATKRunReport { } - /** - * Helper utility that calls into the InetAddress system to resolve the hostname. If this fails, - * unresolvable gets returned instead. - * - * @return - */ - private String resolveHostname() { - try { - return InetAddress.getLocalHost().getCanonicalHostName(); - } - catch (java.net.UnknownHostException uhe) { // [beware typo in code sample -dmw] - return "unresolvable"; - // handle exception - } - } - public void postReport(PhoneHomeOption type) { logger.debug("Posting report of type " + type); switch (type) { @@ -325,7 +308,7 @@ public class GATKRunReport { private void postReportToAWSS3() { // modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html - this.hostName = resolveHostname(); // we want to fill in the host name + this.hostName = Utils.resolveHostname(); // we want to fill in the host name File localFile = postReportToLocalDisk(new File("./")); logger.debug("Generating GATK report to AWS S3 based on local file " + localFile); if ( localFile != null ) { // we succeeded in creating the local file diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java index ba1ca674e..a97f3211c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.refdata.tracks; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.util.CloseableIterator; +import org.apache.log4j.Logger; import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureSource; import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; @@ -45,10 +46,11 @@ import java.io.IOException; * the basics of what a reference metadata track must contain. */ public class RMDTrack { + private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class); + private final static boolean DEBUG = false; // the basics of a track: private final Class type; // our type - private final Class recordType; // the underlying records that are produced by this track private final String name; // the name private final File file; // the associated file we create the reader from @@ -90,7 +92,6 @@ public class RMDTrack { */ public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) { this.type = type; - this.recordType = codec.getFeatureType(); this.name = name; this.file = file; this.reader = reader; @@ -112,19 +113,8 @@ public class RMDTrack { } public CloseableIterator query(GenomeLoc interval) throws IOException { - return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName()); - } - - public CloseableIterator query(GenomeLoc interval, boolean contained) throws IOException { - return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName()); - } - - public CloseableIterator query(String contig, int start, int stop) throws IOException { - return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName()); - } - - public CloseableIterator query(String contig, int start, int stop, boolean contained) throws IOException { - return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName()); + if ( DEBUG ) logger.debug("Issuing query for %s: " + interval); + return new FeatureToGATKFeatureIterator(genomeLocParser, reader.query(interval.getContig(),interval.getStart(),interval.getStop()), this.getName()); } public void close() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 152e1a57b..3e3aa29a7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -92,6 +92,8 @@ import java.util.regex.Pattern; * @author Khalid Shakir */ public class GATKReportTable { + /** REGEX that matches any table with an invalid name */ + public final static String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]"; private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2; private String tableName; private String tableDescription; @@ -111,7 +113,7 @@ public class GATKReportTable { * @return true if the name is valid, false if otherwise */ private boolean isValidName(String name) { - Pattern p = Pattern.compile("[^a-zA-Z0-9_\\-\\.]"); + Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX); Matcher m = p.matcher(name); return !m.find(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index 1ba48ca5f..046003154 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -173,7 +173,9 @@ public class TraverseDuplicates extends TraversalEngine those with the same mate pair position, for paired reads * -> those flagged as unpaired and duplicated but having the same start and end */ + boolean done = walker.isDone(); for (SAMRecord read : iter) { + if ( done ) break; // get the genome loc from the read GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read); @@ -194,6 +196,7 @@ public class TraverseDuplicates extends TraversalEngine extends TraversalEngine,Locu logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider)); LocusView locusView = getLocusView( walker, dataProvider ); + boolean done = false; if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all @@ -46,7 +47,7 @@ public class TraverseLoci extends TraversalEngine,Locu LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider ); // We keep processing while the next reference location is within the interval - while( locusView.hasNext() ) { + while( locusView.hasNext() && ! done ) { AlignmentContext locus = locusView.next(); GenomeLoc location = locus.getLocation(); @@ -76,15 +77,17 @@ public class TraverseLoci extends TraversalEngine,Locu if (keepMeP) { M x = walker.map(tracker, refContext, locus); sum = walker.reduce(x, sum); + done = walker.isDone(); } printProgress(dataProvider.getShard(),locus.getLocation()); } } - // We have a final map call to execute here to clean up the skipped based from the - // last position in the ROD to that in the interval - if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA ) { + // We have a final map call to execute here to clean up the skipped based from the + // last position in the ROD to that in the interval + if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) { + // only do this if the walker isn't done! RodLocusView rodLocusView = (RodLocusView)locusView; long nSkipped = rodLocusView.getLastSkippedBases(); if ( nSkipped > 0 ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java index 196d54036..dd4402d82 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java @@ -50,7 +50,9 @@ public class TraverseReadPairs extends TraversalEngine pairs = new ArrayList(); + boolean done = walker.isDone(); for(SAMRecord read: reads) { + if ( done ) break; dataProvider.getShard().getReadMetrics().incrementNumReadsSeen(); if(pairs.size() == 0 || pairs.get(0).getReadName().equals(read.getReadName())) { @@ -65,6 +67,8 @@ public class TraverseReadPairs extends TraversalEngine extends TraversalEngine,Read // get the reference ordered data ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider); + boolean done = walker.isDone(); // while we still have more reads for (SAMRecord read : reads) { + if ( done ) break; // ReferenceContext -- the reference bases covered by the read ReferenceContext refContext = null; @@ -106,6 +108,7 @@ public class TraverseReads extends TraversalEngine,Read GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart()); printProgress(dataProvider.getShard(),locus); + done = walker.isDone(); } return sum; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index 9e261a0b1..c88c7c3c4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -126,6 +126,17 @@ public abstract class Walker { public void initialize() { } + /** + * A function for overloading in subclasses providing a mechanism to abort early from a walker. + * + * If this ever returns true, then the Traversal engine will stop executing map calls + * and start the process of shutting down the walker in an orderly fashion. + * @return + */ + public boolean isDone() { + return false; + } + /** * Provide an initial value for reduce computations. * @return Initial value of reduce. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 4e3342609..2159bc839 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -234,7 +234,7 @@ public class DiffEngine { // now that we have a specific list of values we want to show, display them GATKReport report = new GATKReport(); - final String tableName = "diffences"; + final String tableName = "differences"; report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false); GATKReportTable table = report.getTable(tableName); table.addPrimaryKey("Difference", true); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index e7f89bf08..ae419a5c4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -156,7 +156,7 @@ public class UnifiedArgumentCollection { public boolean OUTPUT_DEBUG_INDEL_INFO = false; @Hidden - @Argument(fullName = "dovit", shortName = "dovit", doc = "Output indel debug info", required = false) + @Argument(fullName = "dovit", shortName = "dovit", doc = "Perform full Viterbi calculation when evaluating the HMM", required = false) public boolean dovit = false; @Hidden diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 55450486b..2d7969230 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -274,7 +274,7 @@ public class PairHMMIndelErrorModel { this.doViterbi = dovit; } - public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) { + public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) { this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob @@ -754,7 +754,7 @@ public class PairHMMIndelErrorModel { // check if we've already computed likelihoods for this pileup element (i.e. for this read at this location) if (indelLikelihoodMap.containsKey(p)) { - HashMap el = indelLikelihoodMap.get(p); + HashMap el = indelLikelihoodMap.get(p); int j=0; for (Allele a: haplotypeMap.keySet()) { readLikelihoods[readIdx][j++] = el.get(a); @@ -1055,7 +1055,6 @@ public class PairHMMIndelErrorModel { genotypeLikelihoods[i] -= maxElement; return genotypeLikelihoods; - } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java index 2b38afaf6..b616a0ebe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java @@ -303,7 +303,10 @@ public class GenotypeAndValidateWalker extends RodWalker= 0) uac.MIN_BASE_QUALTY_SCORE = mbq; - if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions; + if (deletions >= 0) + uac.MAX_DELETION_FRACTION = deletions; + else + uac.MAX_DELETION_FRACTION = 1.0; if (emitConf >= 0) uac.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf; if (callConf >= 0) uac.STANDARD_CONFIDENCE_FOR_CALLING = callConf; @@ -347,12 +350,14 @@ public class GenotypeAndValidateWalker extends RodWalker * */ @@ -223,6 +241,18 @@ public class SelectVariants extends RodWalker { @Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci in the analysis", required=false) private boolean EXCLUDE_FILTERED = false; + + /** + * When this argument is used, we can choose to include only multiallelic or biallelic sites, depending on how many alleles are listed in the ALT column of a vcf. + * For example, a multiallelic record such as: + * 1 100 . A AAA,AAAAA + * will be excluded if "-restrictAllelesTo BIALLELIC" is included, because there are two alternate alleles, whereas a record such as: + * 1 100 . A T + * will be included in that case, but would be excluded if "-restrictAllelesTo MULTIALLELIC + */ + @Argument(fullName="restrictAllelesTo", shortName="restrictAllelesTo", doc="Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC", required=false) + private NumberAlleleRestriction alleleRestriction = NumberAlleleRestriction.ALL; + @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false) private boolean KEEP_ORIGINAL_CHR_COUNTS = false; @@ -266,11 +296,14 @@ public class SelectVariants extends RodWalker { @Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false) private double fractionRandom = 0; - @Argument(fullName="selectSNPs", shortName="snps", doc="Select only SNPs from the input file", required=false) - private boolean SELECT_SNPS = false; + /** + * This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria. + * When specified one or more times, a particular type of variant is selected. + * + */ + @Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false) + private List TYPES_TO_INCLUDE = new ArrayList(); - @Argument(fullName="selectIndels", shortName="indels", doc="Select only indels from the input file", required=false) - private boolean SELECT_INDELS = false; @Hidden @Argument(fullName="outMVFile", shortName="outMVFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) @@ -290,6 +323,13 @@ public class SelectVariants extends RodWalker { } + public enum NumberAlleleRestriction { + ALL, + BIALLELIC, + MULTIALLELIC + } + + private ArrayList selectedTypes = new ArrayList(); private ArrayList selectNames = new ArrayList(); private List jexls = null; @@ -354,6 +394,20 @@ public class SelectVariants extends RodWalker { for ( String sample : samples ) logger.info("Including sample '" + sample + "'"); + + + // if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include + if (TYPES_TO_INCLUDE.isEmpty()) { + + for (VariantContext.Type t : VariantContext.Type.values()) + selectedTypes.add(t); + + } + else { + for (VariantContext.Type t : TYPES_TO_INCLUDE) + selectedTypes.add(t); + + } // Initialize VCF header Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); headerLines.add(new VCFHeaderLine("source", "SelectVariants")); @@ -494,12 +548,13 @@ public class SelectVariants extends RodWalker { return 0; } - // TODO - add ability to also select MNPs - // TODO - move variant selection arguments to the engine so other walkers can also do this - if (SELECT_INDELS && !(vc.isIndel() || vc.isMixed())) + if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic()) continue; - if (SELECT_SNPS && !vc.isSNP()) + if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic()) + continue; + + if (!selectedTypes.contains(vc.getType())) continue; VariantContext sub = subsetRecord(vc, samples); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 19db58e0c..2a877fb09 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -124,8 +124,12 @@ public class VariantsToTable extends RodWalker { * multi-allelic INFO field values can be lists of values. */ @Advanced - @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false) - public boolean keepMultiAllelic = false; + @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false) + public boolean keepMultiAllelic = false; + + @Hidden + @Argument(fullName="logACSum", shortName="logACSum", doc="Log sum of AC instead of max value in case of multiallelic variants", required=false) + public boolean logACSum = false; /** * By default, this tool throws a UserException when it encounters a field without a value in some record. This @@ -147,22 +151,21 @@ public class VariantsToTable extends RodWalker { if ( tracker == null ) // RodWalkers can make funky map calls return 0; - if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { - for ( VariantContext vc : tracker.getValues(variantCollection.variants, context.getLocation())) { - if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) { - List vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA); - out.println(Utils.join("\t", vals)); - } + for ( VariantContext vc : tracker.getValues(variantCollection.variants, context.getLocation())) { + if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) { + List vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA, keepMultiAllelic, logACSum); + out.println(Utils.join("\t", vals)); } - - return 1; - } else { - if ( nRecords >= MAX_RECORDS ) { - logger.warn("Calling sys exit to leave after " + nRecords + " records"); - System.exit(0); // todo -- what's the recommend way to abort like this? - } - return 0; } + + return 1; + } + + @Override + public boolean isDone() { + boolean done = MAX_RECORDS != -1 && nRecords >= MAX_RECORDS; + if ( done) logger.warn("isDone() will return true to leave after " + nRecords + " records"); + return done ; } private static final boolean isWildCard(String s) { @@ -176,9 +179,11 @@ public class VariantsToTable extends RodWalker { * @param fields a non-null list of fields to capture from VC * @param allowMissingData if false, then throws a UserException if any field isn't found in vc. Otherwise * provides a value of NA + * @param kma if true, multiallelic variants are to be kept + * @param logsum if true, AF and AC are computed based on sum of allele counts. Otherwise, based on allele with highest count. * @return */ - public static List extractFields(VariantContext vc, List fields, boolean allowMissingData) { + private static List extractFields(VariantContext vc, List fields, boolean allowMissingData, boolean kma, boolean logsum) { List vals = new ArrayList(); for ( String field : fields ) { @@ -206,12 +211,31 @@ public class VariantsToTable extends RodWalker { } if (field.equals("AF") || field.equals("AC")) { - if (val.contains(",")) { - // strip [,] and spaces - val = val.replace("[",""); - val = val.replace("]",""); - val = val.replace(" ",""); - } + String afo = val; + + double af=0; + if (afo.contains(",")) { + String[] afs = afo.split(","); + afs[0] = afs[0].substring(1,afs[0].length()); + afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); + + double[] afd = new double[afs.length]; + + for (int k=0; k < afd.length; k++) + afd[k] = Double.valueOf(afs[k]); + + if (kma && logsum) + af = MathUtils.sum(afd); + else + af = MathUtils.arrayMax(afd); + //af = Double.valueOf(afs[0]); + + } + else + if (!afo.equals("NA")) + af = Double.valueOf(afo); + + val = Double.toString(af); } vals.add(val); @@ -220,6 +244,9 @@ public class VariantsToTable extends RodWalker { return vals; } + public static List extractFields(VariantContext vc, List fields, boolean allowMissingData) { + return extractFields(vc, fields, allowMissingData, false, false); + } // // default reduce -- doesn't do anything at all // @@ -243,7 +270,7 @@ public class VariantsToTable extends RodWalker { getters.put("REF", new Getter() { public String get(VariantContext vc) { String x = ""; - if ( vc.hasReferenceBaseForIndel() ) { + if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() ) { Byte refByte = vc.getReferenceBaseForIndel(); x=x+new String(new byte[]{refByte}); } @@ -255,7 +282,7 @@ public class VariantsToTable extends RodWalker { StringBuilder x = new StringBuilder(); int n = vc.getAlternateAlleles().size(); if ( n == 0 ) return "."; - if ( vc.hasReferenceBaseForIndel() ) { + if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() ) { Byte refByte = vc.getReferenceBaseForIndel(); x.append(new String(new byte[]{refByte})); } diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java new file mode 100644 index 000000000..c0493fe22 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.R; + +import org.apache.commons.io.FileUtils; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.Advanced; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; +import org.broadinstitute.sting.utils.PathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +/** + * Generic service for executing RScripts in the GATK directory + * + * @author Your Name + * @since Date created + */ +public class RScriptExecutor { + /** + * our log + */ + protected static Logger logger = Logger.getLogger(RScriptExecutor.class); + + public static class RScriptArgumentCollection { + @Advanced + @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) + public String PATH_TO_RSCRIPT = "Rscript"; + + @Advanced + @Argument(fullName = "path_to_Rresources", shortName = "Rresources", doc = "Path to resources folder holding the Sting R scripts.", required = false) + public List PATH_TO_RESOURCES = Arrays.asList("public/R/", "private/R/"); + + public RScriptArgumentCollection() {} + + /** For testing and convenience */ + public RScriptArgumentCollection(final String PATH_TO_RSCRIPT, final List PATH_TO_RESOURCES) { + this.PATH_TO_RSCRIPT = PATH_TO_RSCRIPT; + this.PATH_TO_RESOURCES = PATH_TO_RESOURCES; + } + } + + final RScriptArgumentCollection myArgs; + final boolean exceptOnError; + + public RScriptExecutor(final RScriptArgumentCollection myArgs, final boolean exceptOnError) { + this.myArgs = myArgs; + this.exceptOnError = exceptOnError; + } + + public void callRScripts(String scriptName, Object... scriptArgs) { + callRScripts(scriptName, Arrays.asList(scriptArgs)); + } + + public void callRScripts(String scriptName, List scriptArgs) { + try { + final File pathToScript = findScript(scriptName); + if ( pathToScript == null ) return; // we failed but shouldn't exception out + final String argString = Utils.join(" ", scriptArgs); + final String cmdLine = Utils.join(" ", Arrays.asList(myArgs.PATH_TO_RSCRIPT, pathToScript, argString)); + logger.info("Executing RScript: " + cmdLine); + Runtime.getRuntime().exec(cmdLine).waitFor(); + } catch (InterruptedException e) { + generateException(e); + } catch (IOException e) { + generateException("Fatal Exception: Perhaps RScript jobs are being spawned too quickly?", e); + } + } + + public File findScript(final String scriptName) { + for ( String pathToResource : myArgs.PATH_TO_RESOURCES ) { + final File f = new File(pathToResource + "/" + scriptName); + if ( f.exists() ) { + if ( f.canRead() ) + return f; + else + generateException("Script exists but couldn't be read: " + scriptName); + } + } + + generateException("Couldn't find script: " + scriptName + " in " + myArgs.PATH_TO_RESOURCES); + return null; + } + + private void generateException(String msg) { + generateException(msg, null); + } + + private void generateException(Throwable e) { + generateException("", e); + } + + private void generateException(String msg, Throwable e) { + if ( exceptOnError ) + throw new UserException(msg, e); + else + logger.warn(msg + (e == null ? "" : ":" + e.getMessage())); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 015e5d6f6..f6edb319f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -29,6 +29,7 @@ import net.sf.samtools.util.StringUtil; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.collections.Pair; +import java.net.InetAddress; import java.util.*; /** @@ -633,4 +634,20 @@ public class Utils { public static boolean isFlagSet(int value, int flag) { return ((value & flag) == flag); } + + /** + * Helper utility that calls into the InetAddress system to resolve the hostname. If this fails, + * unresolvable gets returned instead. + * + * @return + */ + public static final String resolveHostname() { + try { + return InetAddress.getLocalHost().getCanonicalHostName(); + } + catch (java.net.UnknownHostException uhe) { // [beware typo in code sample -dmw] + return "unresolvable"; + // handle exception + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index a3100030e..bb212e128 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -281,7 +281,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VariantContext vc = null; try { - vc = new VariantContext(name, contig, pos, loc, alleles, qual, filters, attributes); + vc = new VariantContext(name, contig, pos, loc, alleles, qual, filters, attributes, ref.getBytes()[0]); } catch (Exception e) { generateException(e.getMessage()); } @@ -290,8 +290,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, if ( !header.samplesWereAlreadySorted() ) vc.getGenotypes(); - // Trim bases of all alleles if necessary - return createVariantContextWithTrimmedAlleles(vc); + return vc; } /** @@ -516,25 +515,44 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, return true; } - private static int computeForwardClipping(List unclippedAlleles, String ref) { + public static int computeForwardClipping(List unclippedAlleles, String ref) { boolean clipping = true; - // Note that the computation of forward clipping here is meant only to see whether there is a common - // base to all alleles, and to correctly compute reverse clipping, - // but it is not used for actually changing alleles - this is done in function - // createVariantContextWithTrimmedAlleles() below. - for (Allele a : unclippedAlleles) { - if (a.isSymbolic()) { + for ( Allele a : unclippedAlleles ) { + if ( a.isSymbolic() ) continue; - } - if (a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0])) { + + if ( a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0]) ) { clipping = false; + break; } } - return (clipping) ? 1 : 0; + return (clipping) ? 1 : 0; } + protected static int computeReverseClipping(List unclippedAlleles, String ref, int forwardClipping, int lineNo) { + int clipping = 0; + boolean stillClipping = true; + + while ( stillClipping ) { + for ( Allele a : unclippedAlleles ) { + if ( a.isSymbolic() ) + continue; + + if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) + stillClipping = false; + else if ( ref.length() == clipping ) + generateException("bad alleles encountered", lineNo); + else if ( a.getBases()[a.length()-clipping-1] != ref.getBytes()[ref.length()-clipping-1] ) + stillClipping = false; + } + if ( stillClipping ) + clipping++; + } + + return clipping; + } /** * clip the alleles, based on the reference * @@ -542,122 +560,30 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, * @param ref the reference string * @param unclippedAlleles the list of unclipped alleles * @param clippedAlleles output list of clipped alleles + * @param lineNo the current line number in the file * @return the new reference end position of this event */ protected static int clipAlleles(int position, String ref, List unclippedAlleles, List clippedAlleles, int lineNo) { - // Note that the computation of forward clipping here is meant only to see whether there is a common - // base to all alleles, and to correctly compute reverse clipping, - // but it is not used for actually changing alleles - this is done in function - // createVariantContextWithTrimmedAlleles() below. - int forwardClipping = computeForwardClipping(unclippedAlleles, ref); - - int reverseClipped = 0; - boolean clipping = true; - while (clipping) { - for (Allele a : unclippedAlleles) { - if (a.isSymbolic()) { - continue; - } - if (a.length() - reverseClipped <= forwardClipping || a.length() - forwardClipping == 0) - clipping = false; - else if (ref.length() == reverseClipped) - generateException("bad alleles encountered", lineNo); - else if (a.getBases()[a.length()-reverseClipped-1] != ref.getBytes()[ref.length()-reverseClipped-1]) - clipping = false; - } - if (clipping) reverseClipped++; - } + int reverseClipping = computeReverseClipping(unclippedAlleles, ref, forwardClipping, lineNo); if ( clippedAlleles != null ) { for ( Allele a : unclippedAlleles ) { if ( a.isSymbolic() ) { clippedAlleles.add(a); } else { - clippedAlleles.add(Allele.create(Arrays.copyOfRange(a.getBases(),0,a.getBases().length-reverseClipped),a.isReference())); + clippedAlleles.add(Allele.create(Arrays.copyOfRange(a.getBases(), forwardClipping, a.getBases().length-reverseClipping), a.isReference())); } } } // the new reference length - int refLength = ref.length() - reverseClipped; + int refLength = ref.length() - reverseClipping; return position+Math.max(refLength - 1,0); } - public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) { - // see if we need to trim common reference base from all alleles - boolean trimVC; - - // We need to trim common reference base from all alleles in all genotypes if a ref base is common to all alleles - Allele refAllele = inputVC.getReference(); - if (!inputVC.isVariant()) - trimVC = false; - else if (refAllele.isNull()) - trimVC = false; - else { - trimVC = (computeForwardClipping(new ArrayList(inputVC.getAlternateAlleles()), - inputVC.getReference().getDisplayString()) > 0); - } - - // nothing to do if we don't need to trim bases - if (trimVC) { - List alleles = new ArrayList(); - Map genotypes = new TreeMap(); - - // set the reference base for indels in the attributes - Map attributes = new TreeMap(inputVC.getAttributes()); - - Map originalToTrimmedAlleleMap = new HashMap(); - - for (Allele a : inputVC.getAlleles()) { - if (a.isSymbolic()) { - alleles.add(a); - originalToTrimmedAlleleMap.put(a, a); - } else { - // get bases for current allele and create a new one with trimmed bases - byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length()); - Allele trimmedAllele = Allele.create(newBases, a.isReference()); - alleles.add(trimmedAllele); - originalToTrimmedAlleleMap.put(a, trimmedAllele); - } - } - - // detect case where we're trimming bases but resulting vc doesn't have any null allele. In that case, we keep original representation - // example: mixed records such as {TA*,TGA,TG} - boolean hasNullAlleles = false; - - for (Allele a: originalToTrimmedAlleleMap.values()) { - if (a.isNull()) - hasNullAlleles = true; - if (a.isReference()) - refAllele = a; - } - - if (!hasNullAlleles) - return inputVC; - // now we can recreate new genotypes with trimmed alleles - for ( Map.Entry sample : inputVC.getGenotypes().entrySet() ) { - - List originalAlleles = sample.getValue().getAlleles(); - List trimmedAlleles = new ArrayList(); - for ( Allele a : originalAlleles ) { - if ( a.isCalled() ) - trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); - else - trimmedAlleles.add(Allele.NO_CALL); - } - genotypes.put(sample.getKey(), Genotype.modifyAlleles(sample.getValue(), trimmedAlleles)); - - } - return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes, new Byte(inputVC.getReference().getBases()[0])); - - } - - return inputVC; - } - public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) { try { return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) || diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index c0a04c81f..2d8421507 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -116,10 +116,26 @@ public class VCFUtils { return fields; } - + /** Only displays a warning if a logger is provided and an identical warning hasn't been already issued */ + private static final class HeaderConflictWarner { + Logger logger; + Set alreadyIssued = new HashSet(); + + private HeaderConflictWarner(final Logger logger) { + this.logger = logger; + } + + public void warn(final VCFHeaderLine line, final String msg) { + if ( logger != null && ! alreadyIssued.contains(line.getKey()) ) { + alreadyIssued.add(line.getKey()); + logger.warn(msg); + } + } + } public static Set smartMergeHeaders(Collection headers, Logger logger) throws IllegalStateException { HashMap map = new HashMap(); // from KEY.NAME -> line + HeaderConflictWarner conflictWarner = new HeaderConflictWarner(logger); // todo -- needs to remove all version headers from sources and add its own VCF version line for ( VCFHeader source : headers ) { @@ -152,24 +168,24 @@ public class VCFUtils { // number, then this value should be 1. However, if the INFO field describes a pair // of numbers, then this value should be 2 and so on. If the number of possible // values varies, is unknown, or is unbounded, then this value should be '.'. - if ( logger != null ) logger.warn("Promoting header field Number to . due to number differences in header lines: " + line + " " + other); + conflictWarner.warn(line, "Promoting header field Number to . due to number differences in header lines: " + line + " " + other); compOther.setNumberToUnbounded(); } else if ( compLine.getType() == VCFHeaderLineType.Integer && compOther.getType() == VCFHeaderLineType.Float ) { // promote key to Float - if ( logger != null ) logger.warn("Promoting Integer to Float in header: " + compOther); + conflictWarner.warn(line, "Promoting Integer to Float in header: " + compOther); map.put(key, compOther); } else if ( compLine.getType() == VCFHeaderLineType.Float && compOther.getType() == VCFHeaderLineType.Integer ) { // promote key to Float - if ( logger != null ) logger.warn("Promoting Integer to Float in header: " + compOther); + conflictWarner.warn(line, "Promoting Integer to Float in header: " + compOther); } else { throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other ); } } if ( ! compLine.getDescription().equals(compOther) ) - if ( logger != null ) logger.warn("Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine); + conflictWarner.warn(line, "Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine); } else { // we are not equal, but we're not anything special either - if ( logger != null ) logger.warn("Ignoring header line already in map: this header line = " + line + " already present header = " + other); + conflictWarner.warn(line, "Ignoring header line already in map: this header line = " + line + " already present header = " + other); } } else { map.put(key, line); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 888dc1e98..673fe4529 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -209,6 +209,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati /** * the complete constructor. Makes a complete VariantContext from its arguments + * This is the only constructor that is able to create indels! DO NOT USE THE OTHER ONES. * * @param source source * @param contig the contig @@ -257,9 +258,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param negLog10PError qual * @param filters filters: use null for unfiltered and empty set for passes filters * @param attributes attributes + * @param referenceBaseForIndel padded reference base */ - public VariantContext(String source, String contig, long start, long stop, Collection alleles, double negLog10PError, Set filters, Map attributes) { - this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, null, true); + public VariantContext(String source, String contig, long start, long stop, Collection alleles, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel) { + this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true); } /** @@ -293,7 +295,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati } /** - * Create a new variant context without genotypes and no Perror, no filters, and no attributes + * Create a new variant context with genotypes but without Perror, filters, and attributes * * @param source source * @param contig the contig diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 834ad0917..986d6305c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -657,12 +657,84 @@ public class VariantContextUtils { VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, (mergeInfoWithMaxAC ? attributesWithMaxAC : attributes) ); // Trim the padded bases of all alleles if necessary - merged = AbstractVCFCodec.createVariantContextWithTrimmedAlleles(merged); + merged = createVariantContextWithTrimmedAlleles(merged); if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged); return merged; } + public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) { + // see if we need to trim common reference base from all alleles + boolean trimVC; + + // We need to trim common reference base from all alleles in all genotypes if a ref base is common to all alleles + Allele refAllele = inputVC.getReference(); + if (!inputVC.isVariant()) + trimVC = false; + else if (refAllele.isNull()) + trimVC = false; + else { + trimVC = (AbstractVCFCodec.computeForwardClipping(new ArrayList(inputVC.getAlternateAlleles()), + inputVC.getReference().getDisplayString()) > 0); + } + + // nothing to do if we don't need to trim bases + if (trimVC) { + List alleles = new ArrayList(); + Map genotypes = new TreeMap(); + + // set the reference base for indels in the attributes + Map attributes = new TreeMap(inputVC.getAttributes()); + + Map originalToTrimmedAlleleMap = new HashMap(); + + for (Allele a : inputVC.getAlleles()) { + if (a.isSymbolic()) { + alleles.add(a); + originalToTrimmedAlleleMap.put(a, a); + } else { + // get bases for current allele and create a new one with trimmed bases + byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length()); + Allele trimmedAllele = Allele.create(newBases, a.isReference()); + alleles.add(trimmedAllele); + originalToTrimmedAlleleMap.put(a, trimmedAllele); + } + } + + // detect case where we're trimming bases but resulting vc doesn't have any null allele. In that case, we keep original representation + // example: mixed records such as {TA*,TGA,TG} + boolean hasNullAlleles = false; + + for (Allele a: originalToTrimmedAlleleMap.values()) { + if (a.isNull()) + hasNullAlleles = true; + if (a.isReference()) + refAllele = a; + } + + if (!hasNullAlleles) + return inputVC; + // now we can recreate new genotypes with trimmed alleles + for ( Map.Entry sample : inputVC.getGenotypes().entrySet() ) { + + List originalAlleles = sample.getValue().getAlleles(); + List trimmedAlleles = new ArrayList(); + for ( Allele a : originalAlleles ) { + if ( a.isCalled() ) + trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); + else + trimmedAlleles.add(Allele.NO_CALL); + } + genotypes.put(sample.getKey(), Genotype.modifyAlleles(sample.getValue(), trimmedAlleles)); + + } + return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes, new Byte(inputVC.getReference().getBases()[0])); + + } + + return inputVC; + } + public static Map stripPLs(Map genotypes) { Map newGs = new HashMap(genotypes.size()); diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java index 013a37a88..f04731214 100755 --- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java @@ -40,6 +40,8 @@ import java.util.EnumSet; * Test suite for the parsing engine. */ public class ParsingEngineUnitTest extends BaseTest { + /** we absolutely cannot have this file existing, or we'll fail the UnitTest */ + private final static String NON_EXISTANT_FILENAME_VCF = "this_file_should_not_exist_on_disk_123456789.vcf"; private ParsingEngine parsingEngine; @BeforeMethod @@ -636,7 +638,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test public void basicRodBindingArgumentTest() { - final String[] commandLine = new String[] {"-V:vcf","foo.vcf"}; + final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -646,7 +648,7 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.loadArgumentsIntoObject( argProvider ); Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value"); Assert.assertEquals(argProvider.binding.isBound(), true, "Bound() isn't returning its expected value"); Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); @@ -659,7 +661,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test public void shortNameOnlyRodBindingArgumentTest() { - final String[] commandLine = new String[] {"-short:vcf","foo.vcf"}; + final String[] commandLine = new String[] {"-short:vcf",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( ShortNameOnlyRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -669,7 +671,7 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.loadArgumentsIntoObject( argProvider ); Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value"); Assert.assertEquals(argProvider.binding.isBound(), true, "Bound() isn't returning its expected value"); Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); @@ -711,7 +713,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test(expectedExceptions = UserException.class) public void rodBindingArgumentTestMissingType() { - final String[] commandLine = new String[] {"-V","foo.vcf"}; + final String[] commandLine = new String[] {"-V",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -723,7 +725,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test(expectedExceptions = UserException.class) public void rodBindingArgumentTestTooManyTags() { - final String[] commandLine = new String[] {"-V:x,y,z","foo.vcf"}; + final String[] commandLine = new String[] {"-V:x,y,z",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -740,7 +742,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test public void variantContextBindingArgumentTest() { - final String[] commandLine = new String[] {"-V:vcf","foo.vcf"}; + final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -750,14 +752,14 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.loadArgumentsIntoObject( argProvider ); Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); } @Test public void variantContextBindingArgumentTestVCF3() { - final String[] commandLine = new String[] {"-V:vcf3","foo.vcf"}; + final String[] commandLine = new String[] {"-V:vcf3",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -767,7 +769,7 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.loadArgumentsIntoObject( argProvider ); Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); } @@ -779,7 +781,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test public void listRodBindingArgumentTest() { - final String[] commandLine = new String[] {"-V:vcf","foo.vcf"}; + final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( ListRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -791,14 +793,14 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertEquals(argProvider.bindings.size(), 1, "Unexpected number of bindings"); RodBinding binding = argProvider.bindings.get(0); Assert.assertEquals(binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value"); Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); } @Test public void listRodBindingArgumentTest2Args() { - final String[] commandLine = new String[] {"-V:vcf","foo.vcf", "-V:vcf", "bar.vcf"}; + final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF, "-V:vcf", "bar.vcf"}; parsingEngine.addArgumentSource( ListRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -811,7 +813,7 @@ public class ParsingEngineUnitTest extends BaseTest { RodBinding binding = argProvider.bindings.get(0); Assert.assertEquals(binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value"); Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); @@ -838,7 +840,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test public void listRodBindingArgumentTestExplicitlyNamed() { - final String[] commandLine = new String[] {"-V:foo,vcf","foo.vcf", "-V:foo,vcf", "bar.vcf"}; + final String[] commandLine = new String[] {"-V:foo,vcf",NON_EXISTANT_FILENAME_VCF, "-V:foo,vcf", "bar.vcf"}; parsingEngine.addArgumentSource( ListRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java index f9aaaecc1..1f11b5886 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -50,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest { @DataProvider(name = "data") public Object[][] createData() { - new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "92311de76dda3f38aac289d807ef23d0"); - new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "0c69412c385fda50210f2a612e1ffe4a"); + new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "dc1ca75c6ecf32641967d61e167acfff"); + new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "df0fcb568a3a49fc74830103b2e26f6c"); return TestParams.getTests(TestParams.class); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index cec377f5f..20409d4ca 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -77,6 +77,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testConcordance--" + testFile, spec); } + @Test + public void testVariantTypeSelection() { + String testFile = validationDataLocation + "complexExample1.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -restrictAllelesTo MULTIALLELIC -selectType MIXED --variant " + testFile + " -o %s -NO_HEADER", + 1, + Arrays.asList("e0b12c0b47a8a7a988b3587b47bfa8cf") + ); + + executeTest("testVariantTypeSelection--" + testFile, spec); + } + @Test(enabled=false) public void testRemovePLs() { String testFile = validationDataLocation + "combine.3.vcf"; diff --git a/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/LibDrmaaIntegrationTest.java b/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/LibDrmaaIntegrationTest.java index ac2064640..d98281ad3 100644 --- a/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/LibDrmaaIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/LibDrmaaIntegrationTest.java @@ -40,6 +40,7 @@ import java.util.Arrays; import java.util.List; public class LibDrmaaIntegrationTest extends BaseTest { + private String implementation = null; @Test public void testDrmaa() throws Exception { @@ -79,10 +80,24 @@ public class LibDrmaaIntegrationTest extends BaseTest { Assert.fail(String.format("Could not get DRMAA implementation from the DRMAA library: %s", error.getString(0))); System.out.println(String.format("DRMAA implementation(s): %s", drmaaImplementation.getString(0))); + + this.implementation = drmaaImplementation.getString(0); } - @Test + @Test(dependsOnMethods = { "testDrmaa" }) public void testSubmitEcho() throws Exception { + if (implementation.indexOf("LSF") >= 0) { + System.err.println(" *********************************************************"); + System.err.println(" ***********************************************************"); + System.err.println(" **** ****"); + System.err.println(" **** Skipping LibDrmaaIntegrationTest.testSubmitEcho() ****"); + System.err.println(" **** Are you using the dotkit .combined_LSF_SGE? ****"); + System.err.println(" **** ****"); + System.err.println(" ***********************************************************"); + System.err.println(" *********************************************************"); + return; + } + Memory error = new Memory(LibDrmaa.DRMAA_ERROR_STRING_BUFFER); int errnum; @@ -129,7 +144,7 @@ public class LibDrmaaIntegrationTest extends BaseTest { errnum = LibDrmaa.drmaa_set_vector_attribute(jt, LibDrmaa.DRMAA_V_ARGV, args, error, LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN); if (errnum != LibDrmaa.DRMAA_ERRNO.DRMAA_ERRNO_SUCCESS) - Assert.fail(String.format("Could not set attribute \"%s\": %s", LibDrmaa.DRMAA_REMOTE_COMMAND, error.getString(0))); + Assert.fail(String.format("Could not set attribute \"%s\": %s", LibDrmaa.DRMAA_V_ARGV, error.getString(0))); errnum = LibDrmaa.drmaa_run_job(jobIdMem, LibDrmaa.DRMAA_JOBNAME_BUFFER_LEN, jt, error, LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN); diff --git a/public/java/test/org/broadinstitute/sting/utils/R/RScriptExecutorUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/R/RScriptExecutorUnitTest.java new file mode 100644 index 000000000..1bbf74db9 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/R/RScriptExecutorUnitTest.java @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.R; + +import org.apache.commons.io.FileUtils; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.walkers.diffengine.DiffElement; +import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine; +import org.broadinstitute.sting.gatk.walkers.diffengine.DiffNode; +import org.broadinstitute.sting.gatk.walkers.diffengine.Difference; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Basic unit test for RScriptExecutor in reduced reads + */ +public class RScriptExecutorUnitTest extends BaseTest { + final static String testrscript = "print(\"hello, world\")\n"; + final static String publicRScript = "plot_Tranches.R"; + final static String privateRScript = "variantCallQC.R"; + + // -------------------------------------------------------------------------------- + // + // Difference testing routines + // + // -------------------------------------------------------------------------------- + + private void testOne(String script, String pathToRscript, String anotherSearchPath, boolean exceptOnError) { + RScriptExecutor.RScriptArgumentCollection collection = + new RScriptExecutor.RScriptArgumentCollection(); + if ( pathToRscript != null ) + collection.PATH_TO_RSCRIPT = pathToRscript; + if ( anotherSearchPath != null ) { + List x = new ArrayList(collection.PATH_TO_RESOURCES); + x.add(anotherSearchPath); + collection.PATH_TO_RESOURCES = x; + } + RScriptExecutor executor = new RScriptExecutor(collection, exceptOnError); + executor.callRScripts(script); + } + + @Test + public void testPublic() { testOne(publicRScript, null, null, true); } + + @Test + public void testPrivate() { testOne(privateRScript, null, null, true); } + + // make sure we don't break finding something in private by adding another directory + @Test + public void testPrivateWithAdditionalPath1() { testOne(privateRScript, null, "dist", true); } + + // make sure we don't break finding something in private by adding another directory + @Test + public void testPrivateWithAdditionalPath2() { testOne(privateRScript, null, "doesNotExist", true); } + + @Test(expectedExceptions = UserException.class) + public void testNonExistantScriptException() { testOne("does_not_exist.R", null, null, true); } + + @Test() + public void testNonExistantScriptNoException() { testOne("does_not_exist.R", null, null, false); } + + @Test(expectedExceptions = UserException.class) + public void testNonExistantRScriptException() { testOne(publicRScript, "badRScriptValue", null, true); } + + @Test() + public void testNonExistantRScriptNoException() { testOne(publicRScript, "badRScriptValue", null, false); } + + @Test() + public void testScriptInNewPath() throws IOException { + File t = createTempFile("myTestScript", ".R"); + FileUtils.writeStringToFile(t, testrscript); + testOne(t.getName(), null, t.getParent(), true); + } +} \ No newline at end of file diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 724518142..2417e5620 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -106,7 +106,7 @@ class DataProcessingPipeline extends QScript { // Because the realignment only happens after these scripts are executed, in case you are using // bwa realignment, this function will operate over the original bam files and output over the // (to be realigned) bam files. - def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File]): Map[String, File] = { + def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File]): Map[String, List[File]] = { // Creating a table with SAMPLE information from each input BAM file val sampleTable = scala.collection.mutable.Map.empty[String, List[File]] @@ -131,24 +131,25 @@ class DataProcessingPipeline extends QScript { sampleTable(sample) :+= rBam } } + return sampleTable.toMap - println("\n\n*** INPUT FILES ***\n") - // Creating one file for each sample in the dataset - val sampleBamFiles = scala.collection.mutable.Map.empty[String, File] - for ((sample, flist) <- sampleTable) { - - println(sample + ":") - for (f <- flist) - println (f) - println() - - val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list") - sampleBamFiles(sample) = sampleFileName - add(writeList(flist, sampleFileName)) - } - println("*** INPUT FILES ***\n\n") - - return sampleBamFiles.toMap +// println("\n\n*** INPUT FILES ***\n") +// // Creating one file for each sample in the dataset +// val sampleBamFiles = scala.collection.mutable.Map.empty[String, File] +// for ((sample, flist) <- sampleTable) { +// +// println(sample + ":") +// for (f <- flist) +// println (f) +// println() +// +// val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list") +// sampleBamFiles(sample) = sampleFileName +// //add(writeList(flist, sampleFileName)) +// } +// println("*** INPUT FILES ***\n\n") +// +// return sampleBamFiles.toMap } // Rebuilds the Read Group string to give BWA @@ -224,7 +225,10 @@ class DataProcessingPipeline extends QScript { def script = { + // final output list of processed bam files + var cohortList: List[File] = List() + // sets the model for the Indel Realigner cleanModelEnum = getIndelCleaningModel() // keep a record of the number of contigs in the first bam file in the list @@ -233,28 +237,19 @@ class DataProcessingPipeline extends QScript { val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams)} - // Generate a BAM file per sample joining all per lane files if necessary - val sampleBAMFiles: Map[String, File] = createSampleFiles(bams, realignedBAMs) + // generate a BAM file per sample joining all per lane files if necessary + val sampleBAMFiles: Map[String, List[File]] = createSampleFiles(bams, realignedBAMs) - // Final output list of processed bam files - var cohortList: List[File] = List() - - // Simple progress report - println("\nFound the following samples: ") - for ((sample, file) <- sampleBAMFiles) - println("\t" + sample + " -> " + file) - println("\n") - - // If this is a 'knowns only' indel realignment run, do it only once for all samples. + // if this is a 'knowns only' indel realignment run, do it only once for all samples. val globalIntervals = new File(outputDir + projectName + ".intervals") if (cleaningModel == ConsensusDeterminationModel.KNOWNS_ONLY) add(target(null, globalIntervals)) - // Put each sample through the pipeline - for ((sample, sampleFile) <- sampleBAMFiles) { - val bam = if (sampleFile.endsWith(".list")) {swapExt(sampleFile, ".list", ".bam")} else {sampleFile} + // put each sample through the pipeline + for ((sample, bamList) <- sampleBAMFiles) { // BAM files generated by the pipeline + val bam = new File(qscript.projectName + "." + sample + ".bam") val cleanedBam = swapExt(bam, ".bam", ".clean.bam") val dedupedBam = swapExt(bam, ".bam", ".clean.dedup.bam") val recalBam = swapExt(bam, ".bam", ".clean.dedup.recal.bam") @@ -272,15 +267,16 @@ class DataProcessingPipeline extends QScript { // Validation is an optional step for the BAM file generated after // alignment and the final bam file of the pipeline. - if (!noValidation && sampleFile.endsWith(".bam")) { // todo -- implement validation for .list BAM files + if (!noValidation) { + for (sampleFile <- bamList) add(validate(sampleFile, preValidateLog), validate(recalBam, postValidateLog)) } if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY) - add(target(sampleFile, targetIntervals)) + add(target(bamList, targetIntervals)) - add(clean(sampleFile, targetIntervals, cleanedBam), + add(clean(bamList, targetIntervals, cleanedBam), dedup(cleanedBam, dedupedBam, metricsFile), cov(dedupedBam, preRecalFile), recal(dedupedBam, preRecalFile, recalBam), @@ -320,9 +316,9 @@ class DataProcessingPipeline extends QScript { this.maxRecordsInRam = 100000 } - case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs { + case class target (inBams: List[File], outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs { if (cleanModelEnum != ConsensusDeterminationModel.KNOWNS_ONLY) - this.input_file :+= inBams + this.input_file = inBams this.out = outIntervals this.mismatchFraction = 0.0 this.known :+= qscript.dbSNP @@ -333,8 +329,8 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outIntervals + ".target" } - case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs { - this.input_file :+= inBams + case class clean (inBams: List[File], tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs { + this.input_file = inBams this.targetIntervals = tIntervals this.out = outBam this.known :+= qscript.dbSNP diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala index 9f3dd9a2c..6947d4398 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala @@ -4,9 +4,9 @@ import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.util.QScriptUtils import net.sf.samtools.SAMFileHeader.SortOrder -import org.broadinstitute.sting.queue.extensions.picard.{SortSam, AddOrReplaceReadGroups} import org.broadinstitute.sting.utils.exceptions.UserException import org.broadinstitute.sting.commandline.Hidden +import org.broadinstitute.sting.queue.extensions.picard.{ReorderSam, SortSam, AddOrReplaceReadGroups} /** * Created by IntelliJ IDEA. @@ -16,7 +16,7 @@ import org.broadinstitute.sting.commandline.Hidden */ -class RecalibrateBaseQualities extends QScript { +class PacbioProcessingPipeline extends QScript { @Input(doc="input FASTA/FASTQ/BAM file - or list of FASTA/FASTQ/BAM files. ", shortName="i", required=true) var input: File = _ @@ -39,13 +39,16 @@ class RecalibrateBaseQualities extends QScript { @Input(doc="The path to the binary of bwa to align fasta/fastq files", fullName="path_to_bwa", shortName="bwa", required=false) var bwaPath: File = _ + @Input(doc="Input is a BLASR generated BAM file", shortName = "blasr", fullName="blasr_bam", required=false) + var BLASR_BAM: Boolean = false + @Hidden @Input(doc="The default base qualities to use before recalibration. Default is Q20 (should be good for every dataset)." , shortName = "dbq", required=false) var dbq: Int = 20 - - - + @Hidden + @Input(shortName="bwastring", required=false) + var bwastring: String = "" val queueLogDir: String = ".qlog/" @@ -57,8 +60,6 @@ class RecalibrateBaseQualities extends QScript { var USE_BWA: Boolean = false - println("DEBUG: processing " + file + "\nDEBUG: name -- " + file.getName) - if (file.endsWith(".fasta") || file.endsWith(".fq")) { if (bwaPath == null) { throw new UserException("You provided a fasta/fastq file but didn't provide the path for BWA"); @@ -69,28 +70,34 @@ class RecalibrateBaseQualities extends QScript { // FASTA -> BAM steps val alignedSam: File = file.getName + ".aligned.sam" val sortedBam: File = swapExt(alignedSam, ".sam", ".bam") - val qualBam: File = swapExt(sortedBam, ".bam", ".q.bam") val rgBam: File = swapExt(file, ".bam", ".rg.bam") val bamBase = if (USE_BWA) {rgBam} else {file} // BAM Steps + val mqBAM: File = swapExt(bamBase, ".bam", ".mq.bam") val recalFile1: File = swapExt(bamBase, ".bam", ".recal1.csv") val recalFile2: File = swapExt(bamBase, ".bam", ".recal2.csv") val recalBam: File = swapExt(bamBase, ".bam", ".recal.bam") val path1: String = recalBam + ".before" val path2: String = recalBam + ".after" - if (USE_BWA) { add(align(file, alignedSam), sortSam(alignedSam, sortedBam), - addQuals(sortedBam, qualBam, dbq), - addReadGroup(qualBam, rgBam, sample)) + addReadGroup(sortedBam, rgBam, sample)) } - add(cov(bamBase, recalFile1), - recal(bamBase, recalFile1, recalBam), + else if (BLASR_BAM) { + val reorderedBAM = swapExt(bamBase, ".bam", ".reordered.bam") + add(reorder(bamBase, reorderedBAM), + changeMQ(reorderedBAM, mqBAM)) + } + + val bam = if (BLASR_BAM) {mqBAM} else {bamBase} + + add(cov(bam, recalFile1), + recal(bam, recalFile1, recalBam), cov(recalBam, recalFile2), analyzeCovariates(recalFile1, path1), analyzeCovariates(recalFile2, path2)) @@ -110,13 +117,13 @@ class RecalibrateBaseQualities extends QScript { case class align(@Input inFastq: File, @Output outSam: File) extends ExternalCommonArgs { - def commandLine = bwaPath + " bwasw -b5 -q2 -r1 -z10 -t8 " + reference + " " + inFastq + " > " + outSam + def commandLine = bwaPath + " bwasw -b5 -q2 -r1 -z20 -t16 " + reference + " " + inFastq + " > " + outSam + this.memoryLimit = 8 this.analysisName = queueLogDir + outSam + ".bwa_sam_se" this.jobName = queueLogDir + outSam + ".bwa_sam_se" } - case class sortSam (@Input inSam: File, @Output outBam: File) extends SortSam with ExternalCommonArgs { - @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") + case class sortSam (inSam: File, outBam: File) extends SortSam with ExternalCommonArgs { this.input = List(inSam) this.output = outBam this.sortOrder = SortOrder.coordinate @@ -124,10 +131,16 @@ class RecalibrateBaseQualities extends QScript { this.jobName = queueLogDir + outBam + ".sortSam" } - case class addQuals(inBam: File, outBam: File, qual: Int) extends PrintReads with CommandLineGATKArgs { + case class reorder (inSam: File, outSam: File) extends ReorderSam with ExternalCommonArgs { + this.input = List(inSam) + this.output = outSam + this.sortReference = reference + } + + case class changeMQ(inBam: File, outBam: File) extends PrintReads with CommandLineGATKArgs { this.input_file :+= inBam this.out = outBam - this.DBQ = qual + this.read_filter :+= "ReassignMappingQuality" } case class addReadGroup (inBam: File, outBam: File, sample: String) extends AddOrReplaceReadGroups with ExternalCommonArgs { @@ -145,7 +158,8 @@ class RecalibrateBaseQualities extends QScript { } case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs { - this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) + this.DBQ = dbq + this.knownSites :+= dbSNP this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") this.input_file :+= inBam this.recal_file = outRecalFile @@ -155,6 +169,7 @@ class RecalibrateBaseQualities extends QScript { } case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs { + this.DBQ = dbq this.input_file :+= inBam this.recal_file = inRecalFile this.out = outBam diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index f19d60930..297da8cc9 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -95,7 +95,8 @@ class QCommandLine extends CommandLineProgram with Logging { def execute = { qGraph.settings = settings - for (script <- pluginManager.createAllTypes()) { + val allQScripts = pluginManager.createAllTypes(); + for (script <- allQScripts) { logger.info("Scripting " + pluginManager.getName(script.getClass.asSubclass(classOf[QScript]))) loadArgumentsIntoObject(script) try { @@ -108,14 +109,26 @@ class QCommandLine extends CommandLineProgram with Logging { logger.info("Added " + script.functions.size + " functions") } + // Execute the job graph qGraph.run() + // walk over each script, calling onExecutionDone + for (script <- allQScripts) { + script.onExecutionDone(qGraph.getFunctionsAndStatus(script.functions), qGraph.success) + if ( ! settings.disableJobReport ) { + val jobStringName = (QScriptUtils.?(settings.jobReportFile)).getOrElse(settings.qSettings.jobNamePrefix + ".jobreport.txt") + val jobReportFile = new File(jobStringName) + logger.info("Writing JobLogging GATKReport to file " + jobReportFile) + QJobReport.printReport(qGraph.getFunctionsAndStatus(script.functions), jobReportFile) + QJobReport.plotReport(settings.rScriptArgs, jobReportFile) + } + } + if (!qGraph.success) { logger.info("Done with errors") qGraph.logFailed() 1 } else { - logger.info("Done") 0 } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala index 5cb8d1d29..fce65c997 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -24,6 +24,7 @@ package org.broadinstitute.sting.queue +import engine.JobRunInfo import org.broadinstitute.sting.queue.function.QFunction import annotation.target.field import io.Source @@ -57,6 +58,16 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon */ def script() + /** + * A default handler for the onExecutionDone() function. By default this doesn't do anything + * except print out a fine status message. + */ + def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) { + logger.info("Script %s with %d total jobs".format(if (success) "completed successfully" else "failed", jobs.size)) + // this is too much output + // for ( (f, info) <- jobs ) logger.info(" %s %s".format(f.jobName, info)) + } + /** * The command line functions that will be executed for this QScript. */ diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala index 68bc7ae61..ef7f2afb0 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala @@ -23,6 +23,8 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod */ var depth = -1 + val myRunInfo: JobRunInfo = JobRunInfo.default // purely for dryRun testing + /** * Initializes with the current status of the function. */ @@ -179,4 +181,8 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod printWriter.close IOUtils.writeContents(functionErrorFile, stackTrace.toString) } + + def getRunInfo = { + if ( runner == null ) myRunInfo else runner.getRunInfo + } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/InProcessRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/InProcessRunner.scala index d583a55ef..a580be473 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/InProcessRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/InProcessRunner.scala @@ -1,7 +1,9 @@ package org.broadinstitute.sting.queue.engine import org.broadinstitute.sting.queue.function.InProcessFunction -import org.broadinstitute.sting.queue.util.IOUtils +import java.util.Date +import org.broadinstitute.sting.queue.util.{Logging, IOUtils} +import org.broadinstitute.sting.utils.Utils /** * Runs a function that executes in process and does not fork out an external process. @@ -10,8 +12,13 @@ class InProcessRunner(val function: InProcessFunction) extends JobRunner[InProce private var runStatus: RunnerStatus.Value = _ def start() = { + getRunInfo.startTime = new Date() + getRunInfo.exechosts = Utils.resolveHostname() runStatus = RunnerStatus.RUNNING + function.run() + + getRunInfo.doneTime = new Date() val content = "%s%nDone.".format(function.description) IOUtils.writeContents(function.jobOutputFile, content) runStatus = RunnerStatus.DONE diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunInfo.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunInfo.scala new file mode 100644 index 000000000..2caa4d2aa --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunInfo.scala @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.engine + +import java.util.Date +import java.text.SimpleDateFormat + +/** + * Class containing tracked information about a job run. + */ + // todo -- it might be nice to have the hostname +class JobRunInfo { + /** constant date format */ + val formatter = new SimpleDateFormat("yy-MM-dd H:mm:ss:SSS"); + + /** The start time with millisecond resolution of this job */ + var startTime: Date = _ + /** The done time with millisecond resolution of this job */ + var doneTime: Date = _ + var exechosts: String = "localhost" + + def getStartTime = startTime + def getDoneTime = doneTime + def getFormattedStartTime = formatTime(getStartTime) + def getFormattedDoneTime = formatTime(getDoneTime) + + /** Helper function that pretty prints the date */ + private def formatTime(d: Date) = if ( d != null ) formatter.format(d) else "null" + + def getExecHosts = exechosts + + /** + * Was any information set for this jobInfo? JobInfo can be unset because + * the job never ran or because it already completed. + */ + def isFilledIn = startTime != null + + /** + * How long did the job run (in wall time)? Returns -1 if this jobInfo isn't filled in + */ + def getRuntimeInMs: Long = { + if ( isFilledIn ) + getDoneTime.getTime - getStartTime.getTime + else + -1 + } + + override def toString: String = + "started %s ended %s runtime %s".format(getFormattedStartTime, getFormattedDoneTime, getRuntimeInMs) +} + +object JobRunInfo { + def default: JobRunInfo = new JobRunInfo() +} \ No newline at end of file diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala index de5fbde05..6dca5d89f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala @@ -69,6 +69,12 @@ trait JobRunner[TFunction <: QFunction] { def cleanup() { } + /** + * Must be overloaded + */ + val runInfo = JobRunInfo.default + def getRunInfo = runInfo + /** * Calls back to a hook that an expert user can setup to modify a job. * @param value Value to modify. diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala index 766d9db94..4469874e2 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala @@ -38,6 +38,8 @@ import org.apache.commons.lang.StringUtils import org.broadinstitute.sting.queue.util._ import collection.immutable.{TreeSet, TreeMap} import org.broadinstitute.sting.queue.function.scattergather.{ScatterFunction, CloneFunction, GatherFunction, ScatterGatherableFunction} +import java.util.Date +import org.broadinstitute.sting.utils.Utils /** * The internal dependency tracker between sets of function input and output files. @@ -319,7 +321,10 @@ class QGraph extends Logging { logger.debug("+++++++") foreachFunction(readyJobs.toList, edge => { if (running) { + edge.myRunInfo.startTime = new Date() + edge.getRunInfo.exechosts = Utils.resolveHostname() logEdge(edge) + edge.myRunInfo.doneTime = new Date() edge.markAsDone } }) @@ -939,6 +944,14 @@ class QGraph extends Logging { edges.sorted(functionOrdering).foreach(edge => if (running) f(edge)) } + /** + * Utility function for running a method over all function edges. + * @param edgeFunction Function to run for each FunctionEdge. + */ + private def getFunctionEdges: List[FunctionEdge] = { + jobGraph.edgeSet.toList.filter(_.isInstanceOf[FunctionEdge]).asInstanceOf[List[FunctionEdge]] + } + /** * Utility function for running a method over all functions, but traversing the nodes in order of dependency. * @param edgeFunction Function to run for each FunctionEdge. @@ -1028,6 +1041,10 @@ class QGraph extends Logging { */ def isShutdown = !running + def getFunctionsAndStatus(functions: List[QFunction]): Map[QFunction, JobRunInfo] = { + getFunctionEdges.map(edge => (edge.function, edge.getRunInfo)).toMap + } + /** * Kills any forked jobs still running. */ diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraphSettings.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraphSettings.scala index 6ece600dd..ee498c8a0 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraphSettings.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraphSettings.scala @@ -26,8 +26,9 @@ package org.broadinstitute.sting.queue.engine import java.io.File import org.broadinstitute.sting.queue.QSettings -import org.broadinstitute.sting.commandline.{ArgumentCollection, Argument} import org.broadinstitute.sting.queue.util.SystemUtils +import org.broadinstitute.sting.commandline.{Advanced, ArgumentCollection, Argument} +import org.broadinstitute.sting.utils.R.RScriptExecutor /** * Command line options for a QGraph. @@ -69,6 +70,16 @@ class QGraphSettings { @Argument(fullName="expanded_dot_graph", shortName="expandedDot", doc="Outputs the queue graph of scatter gather to a .dot file. Otherwise overwrites the dot_graph", required=false) var expandedDotFile: File = _ + @Argument(fullName="jobReport", shortName="jobReport", doc="File where we will write the Queue job report", required=false) + var jobReportFile: String = _ + + @Advanced + @Argument(fullName="disableJobReport", shortName="disabpleJobReport", doc="If provided, we will not create a job report", required=false) + var disableJobReport: Boolean = false + + @ArgumentCollection + var rScriptArgs = new RScriptExecutor.RScriptArgumentCollection + @ArgumentCollection val qSettings = new QSettings } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala index bb711344c..323cc63ff 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala @@ -31,11 +31,12 @@ import org.broadinstitute.sting.jna.lsf.v7_0_6.{LibLsf, LibBat} import org.broadinstitute.sting.utils.Utils import org.broadinstitute.sting.jna.clibrary.LibC import org.broadinstitute.sting.jna.lsf.v7_0_6.LibBat.{submitReply, submit} -import com.sun.jna.ptr.IntByReference import org.broadinstitute.sting.queue.engine.{RunnerStatus, CommandLineJobRunner} -import com.sun.jna.{Structure, StringArray, NativeLong} import java.util.regex.Pattern import java.lang.StringBuffer +import java.util.Date +import com.sun.jna.{Pointer, Structure, StringArray, NativeLong} +import com.sun.jna.ptr.{PointerByReference, IntByReference} /** * Runs jobs on an LSF compute cluster. @@ -271,12 +272,27 @@ object Lsf706JobRunner extends Logging { logger.debug("Job Id %s status / exitStatus / exitInfo: 0x%02x / 0x%02x / 0x%02x".format(runner.jobId, jobStatus, exitStatus, exitInfo)) + def updateRunInfo() { + // the platform LSF startTimes are in seconds, not milliseconds, so convert to the java convention + runner.getRunInfo.startTime = new Date(jobInfo.startTime.longValue * 1000) + runner.getRunInfo.doneTime = new Date(jobInfo.endTime.longValue * 1000) + val exHostsRaw = jobInfo.exHosts.getStringArray(0) + //logger.warn("exHostsRaw = " + exHostsRaw) + val exHostsList = exHostsRaw.toList + //logger.warn("exHostsList = " + exHostsList) + val exHosts = exHostsList.reduceLeft(_ + "," + _) + //logger.warn("exHosts = " + exHosts) + runner.getRunInfo.exechosts = exHosts + } + runner.updateStatus( if (Utils.isFlagSet(jobStatus, LibBat.JOB_STAT_DONE)) { // Done successfully. + updateRunInfo() RunnerStatus.DONE } else if (Utils.isFlagSet(jobStatus, LibBat.JOB_STAT_EXIT) && !willRetry(exitInfo, endTime)) { // Exited function that (probably) won't be retried. + updateRunInfo() RunnerStatus.FAILED } else { // Note that we still saw the job in the system. diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala index 03f9d3315..ae899868a 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala @@ -27,6 +27,9 @@ package org.broadinstitute.sting.queue.engine.shell import org.broadinstitute.sting.queue.function.CommandLineFunction import org.broadinstitute.sting.queue.util.ShellJob import org.broadinstitute.sting.queue.engine.{RunnerStatus, CommandLineJobRunner} +import java.util.Date +import org.broadinstitute.sting.gatk.phonehome.GATKRunReport +import org.broadinstitute.sting.utils.Utils /** * Runs jobs one at a time locally @@ -50,8 +53,11 @@ class ShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRu // Allow advanced users to update the job. updateJobRun(job) + getRunInfo.startTime = new Date() + getRunInfo.exechosts = Utils.resolveHostname() updateStatus(RunnerStatus.RUNNING) job.run() + getRunInfo.doneTime = new Date() updateStatus(RunnerStatus.DONE) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/ReorderSam.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/ReorderSam.scala new file mode 100644 index 000000000..72489dc87 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/ReorderSam.scala @@ -0,0 +1,48 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline._ + +import java.io.File +/* + * Created by IntelliJ IDEA. + * User: carneiro + * Date: 6/22/11 + * Time: 10:35 AM + */ +class ReorderSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "ReorderSam" + javaMainClass = "net.sf.picard.sam.ReorderSam" + + @Input(doc="Input file (bam or sam) to extract reads from.", shortName = "input", fullName = "input_bam_files", required = true) + var input: List[File] = Nil + + @Output(doc="Output file (bam or sam) to write extracted reads to.", shortName = "output", fullName = "output_bam_file", required = true) + var output: File = _ + + @Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false) + var outputIndex: File = _ + + @Argument(doc="Reference sequence to reorder reads to match.", shortName = "ref", fullName = "sort_reference", required = true) + var sortReference: File = _ + + @Argument(doc="If true, then allows only a partial overlap of the BAM contigs with the new reference sequence contigs. By default, this tool requires a corresponding contig in the new reference for each read contig.", shortName = "aic", fullName = "allow_incomplete_concordance", required = false) + var ALLOW_INCOMPLETE_DICT_CONCORDANCE: Boolean = _ + + @Argument(doc="If true, then permits mapping from a read contig to a new reference contig with the same name but a different length. Highly dangerous, only use if you know what you are doing.", shortName = "acld", fullName = "allow_contig_length_discordance", required = false) + var ALLOW_CONTIG_LENGTH_DISCORDANCE: Boolean = _ + + override def freezeFieldValues() { + super.freezeFieldValues() + if (outputIndex == null && output != null) + outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + + override def inputBams = input + override def outputBam = output + this.createIndex = Some(true) + this.sortOrder = null + override def commandLine = super.commandLine + + " REFERENCE=" + sortReference + + optional(" ALLOW_INCOMPLETE_DICT_CONCORDANCE=", ALLOW_INCOMPLETE_DICT_CONCORDANCE) + optional(" ALLOW_CONTIG_LENGTH_DISCORDANCE=", ALLOW_CONTIG_LENGTH_DISCORDANCE) +} \ No newline at end of file diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala index 7048b6413..c905581fa 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala @@ -30,14 +30,14 @@ import org.broadinstitute.sting.commandline._ import org.broadinstitute.sting.queue.{QException, QSettings} import collection.JavaConversions._ import org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction -import org.broadinstitute.sting.queue.util.{Logging, CollectionUtils, IOUtils, ReflectionUtils} +import org.broadinstitute.sting.queue.util._ /** * The base interface for all functions in Queue. * Inputs and outputs are specified as Sets of values. * Inputs are matched to other outputs by using .equals() */ -trait QFunction extends Logging { +trait QFunction extends Logging with QJobReport { /** A short description of this step in the graph */ var analysisName: String = "" @@ -83,11 +83,17 @@ trait QFunction extends Logging { */ var deleteIntermediateOutputs = true + // ------------------------------------------------------- + // + // job run information + // + // ------------------------------------------------------- + /** * Copies settings from this function to another function. * @param function QFunction to copy values to. */ - def copySettingsTo(function: QFunction) { + override def copySettingsTo(function: QFunction) { function.analysisName = this.analysisName function.jobName = this.jobName function.qSettings = this.qSettings @@ -99,6 +105,8 @@ trait QFunction extends Logging { function.updateJobRun = this.updateJobRun function.isIntermediate = this.isIntermediate function.deleteIntermediateOutputs = this.deleteIntermediateOutputs + function.reportGroup = this.reportGroup + function.reportFeatures = this.reportFeatures } /** File to redirect any output. Defaults to .out */ diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala new file mode 100644 index 000000000..85896da66 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.util +import org.broadinstitute.sting.queue.function.QFunction +import org.broadinstitute.sting.gatk.report.{GATKReportTable, GATKReport} +import org.broadinstitute.sting.utils.exceptions.UserException +import org.broadinstitute.sting.queue.engine.JobRunInfo +import java.io.{FileOutputStream, PrintStream, File} +import org.broadinstitute.sting.queue.function.scattergather.{GathererFunction, ScatterFunction} +import org.broadinstitute.sting.utils.R.RScriptExecutor.RScriptArgumentCollection +import org.broadinstitute.sting.utils.R.RScriptExecutor +import org.broadinstitute.sting.queue.QScript + +/** + * A mixin to add Job info to the class + */ +trait QJobReport extends Logging { + self: QFunction => + + protected var reportGroup: String = null + protected var reportFeatures: Map[String, String] = Map() + protected var reportEnabled: Boolean = true + + def includeInReport = reportEnabled + def enableReport() { reportEnabled = true } + def disableReport() { reportEnabled = false } + + def setRunInfo(info: JobRunInfo) { + //logger.info("info " + info) + reportFeatures = Map( + "iteration" -> 1, + "analysisName" -> getReportGroup, + "jobName" -> QJobReport.workAroundSameJobNames(this), + "intermediate" -> self.isIntermediate, + "exechosts" -> info.getExecHosts, + "startTime" -> info.getStartTime.getTime, + "doneTime" -> info.getDoneTime.getTime, + "formattedStartTime" -> info.getFormattedStartTime, + "formattedDoneTime" -> info.getFormattedDoneTime, + "runtime" -> info.getRuntimeInMs).mapValues((x:Any) => if (x != null) x.toString else "null") ++ reportFeatures + // note -- by adding reportFeatures second we override iteration + // (or any other binding) with the user provided value + } + + /** The report Group is the analysis name transform to only contain valid GATKReportTable characters */ + def getReportGroup = self.analysisName.replaceAll(GATKReportTable.INVALID_TABLE_NAME_REGEX, "_") + def getReportFeatures = reportFeatures + + def getReportFeatureNames: List[String] = getReportFeatures.keys.toList + def getReportFeature(key: String): String = { + getReportFeatures.get(key) match { + case Some(x) => x + case None => throw new RuntimeException("Get called with key %s but no value was found".format(key)) + } + } + + def getReportName: String = getReportFeature("jobName") + + def configureJobReport(features: Map[String, Any]) { + this.reportFeatures = features.mapValues(_.toString) + } + + // copy the QJobReport information -- todo : what's the best way to do this? + override def copySettingsTo(function: QFunction) { + self.copySettingsTo(function) + function.reportFeatures = this.reportFeatures + } +} + +object QJobReport { + val JOB_REPORT_QUEUE_SCRIPT = "queueJobReport.R" + + // todo -- fixme to have a unique name for Scatter/gather jobs as well + var seenCounter = 1 + var seenNames = Set[String]() + + def printReport(jobsRaw: Map[QFunction, JobRunInfo], dest: File) { + val jobs = jobsRaw.filter(_._2.isFilledIn).filter(_._1.includeInReport) + jobs foreach {case (qf, info) => qf.setRunInfo(info)} + val stream = new PrintStream(new FileOutputStream(dest)) + printJobLogging(jobs.keys.toList, stream) + stream.close() + } + + def plotReport(args: RScriptArgumentCollection, jobReportFile: File) { + val executor = new RScriptExecutor(args, false) // don't except on error + val pdf = jobReportFile.getAbsolutePath + ".pdf" + executor.callRScripts(JOB_REPORT_QUEUE_SCRIPT, jobReportFile.getAbsolutePath, pdf) + } + + def workAroundSameJobNames(func: QFunction):String = { + if ( seenNames.apply(func.jobName) ) { + seenCounter += 1 + "%s_%d".format(func.jobName, seenCounter) + } else { + seenNames += func.jobName + func.jobName + } + } + + /** + * Prints the JobLogging logs to a GATKReport. First splits up the + * logs by group, and for each group generates a GATKReportTable + */ + private def printJobLogging(logs: List[QFunction], stream: PrintStream) { + // create the report + val report: GATKReport = new GATKReport + + // create a table for each group of logs + for ( (group, groupLogs) <- groupLogs(logs) ) { + report.addTable(group, "Job logs for " + group) + val table: GATKReportTable = report.getTable(group) + table.addPrimaryKey("jobName", false) + val keys = logKeys(groupLogs) + + // add the columns + keys.foreach(table.addColumn(_, 0)) + for (log <- groupLogs) { + for ( key <- keys ) + table.set(log.getReportName, key, log.getReportFeature(key)) + } + } + + report.print(stream) + } + + private def groupLogs(logs: List[QFunction]): Map[String, List[QFunction]] = { + logs.groupBy(_.getReportGroup) + } + + private def logKeys(logs: List[QFunction]): Set[String] = { + // the keys should be the same for each log, but we will check that + val keys = Set[String](logs(0).getReportFeatureNames : _*) + + for ( log <- logs ) + if ( keys.sameElements(Set(log.getReportFeatureNames)) ) + throw new UserException(("All JobLogging jobs in the same group must have the same set of features. " + + "We found one with %s and another with %s").format(keys, log.getReportFeatureNames)) + + keys + } +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 12bd880d8..3b1b2ece1 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -57,4 +57,6 @@ object QScriptUtils { } + def ?[A <: AnyRef](ref: A): Option[A] = + if (ref eq null) None else Some(ref) } \ No newline at end of file diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1959.jar b/settings/repository/edu.mit.broad/picard-private-parts-2034.jar similarity index 62% rename from settings/repository/edu.mit.broad/picard-private-parts-1959.jar rename to settings/repository/edu.mit.broad/picard-private-parts-2034.jar index ae11e636b..11f59420c 100644 Binary files a/settings/repository/edu.mit.broad/picard-private-parts-1959.jar and b/settings/repository/edu.mit.broad/picard-private-parts-2034.jar differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1959.xml b/settings/repository/edu.mit.broad/picard-private-parts-2034.xml similarity index 64% rename from settings/repository/edu.mit.broad/picard-private-parts-1959.xml rename to settings/repository/edu.mit.broad/picard-private-parts-2034.xml index e7c7e3a21..1a60a2015 100644 --- a/settings/repository/edu.mit.broad/picard-private-parts-1959.xml +++ b/settings/repository/edu.mit.broad/picard-private-parts-2034.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/net.sf/picard-1.49.895.xml b/settings/repository/net.sf/picard-1.49.895.xml deleted file mode 100644 index 52d4900c5..000000000 --- a/settings/repository/net.sf/picard-1.49.895.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.49.895.jar b/settings/repository/net.sf/picard-1.52.944.jar similarity index 82% rename from settings/repository/net.sf/picard-1.49.895.jar rename to settings/repository/net.sf/picard-1.52.944.jar index 3ee1f2090..e147ebbba 100644 Binary files a/settings/repository/net.sf/picard-1.49.895.jar and b/settings/repository/net.sf/picard-1.52.944.jar differ diff --git a/settings/repository/net.sf/picard-1.52.944.xml b/settings/repository/net.sf/picard-1.52.944.xml new file mode 100644 index 000000000..61fac6644 --- /dev/null +++ b/settings/repository/net.sf/picard-1.52.944.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.49.895.xml b/settings/repository/net.sf/sam-1.49.895.xml deleted file mode 100644 index 0436ce881..000000000 --- a/settings/repository/net.sf/sam-1.49.895.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/sam-1.49.895.jar b/settings/repository/net.sf/sam-1.52.944.jar similarity index 76% rename from settings/repository/net.sf/sam-1.49.895.jar rename to settings/repository/net.sf/sam-1.52.944.jar index c55ab0b72..7037c7a96 100644 Binary files a/settings/repository/net.sf/sam-1.49.895.jar and b/settings/repository/net.sf/sam-1.52.944.jar differ diff --git a/settings/repository/net.sf/sam-1.52.944.xml b/settings/repository/net.sf/sam-1.52.944.xml new file mode 100644 index 000000000..2229395b2 --- /dev/null +++ b/settings/repository/net.sf/sam-1.52.944.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/org.broad/tribble-18.jar b/settings/repository/org.broad/tribble-21.jar similarity index 88% rename from settings/repository/org.broad/tribble-18.jar rename to settings/repository/org.broad/tribble-21.jar index 1ea101dd0..2cb96f93d 100644 Binary files a/settings/repository/org.broad/tribble-18.jar and b/settings/repository/org.broad/tribble-21.jar differ diff --git a/settings/repository/org.broad/tribble-18.xml b/settings/repository/org.broad/tribble-21.xml similarity index 51% rename from settings/repository/org.broad/tribble-18.xml rename to settings/repository/org.broad/tribble-21.xml index d2648ddad..cd93af1b0 100644 --- a/settings/repository/org.broad/tribble-18.xml +++ b/settings/repository/org.broad/tribble-21.xml @@ -1,3 +1,3 @@ - +