diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 67e5ad95b..b7000e0ee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -64,6 +64,7 @@ import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor; import java.io.File; import java.util.*; +import java.util.concurrent.TimeUnit; /** * A GenomeAnalysisEngine that runs a specified walker. @@ -73,6 +74,7 @@ public class GenomeAnalysisEngine { * our log, which we want to capture anything from this class */ private static Logger logger = Logger.getLogger(GenomeAnalysisEngine.class); + public static final long NO_RUNTIME_LIMIT = -1; /** * The GATK command-line argument parsing code. @@ -1090,6 +1092,33 @@ public class GenomeAnalysisEngine { public String createApproximateCommandLineArgumentString(Object... argumentProviders) { return CommandLineUtils.createApproximateCommandLineArgumentString(parsingEngine,argumentProviders); } - + /** + * Does the current runtime in unit exceed the runtime limit, if one has been provided? + * + * @param runtime the runtime of this GATK instance in minutes + * @param unit the time unit of runtime + * @return false if not limit was requested or if runtime <= the limit, true otherwise + */ + public boolean exceedsRuntimeLimit(final long runtime, final TimeUnit unit) { + if ( runtime < 0 ) throw new IllegalArgumentException("runtime must be >= 0 but got " + runtime); + + if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT ) + return false; + else { + final long actualRuntimeNano = TimeUnit.NANOSECONDS.convert(runtime, unit); + final long maxRuntimeNano = getRuntimeLimitInNanoseconds(); + return actualRuntimeNano > maxRuntimeNano; + } + } + + /** + * @return the runtime limit in nanoseconds, or -1 if no limit was specified + */ + public long getRuntimeLimitInNanoseconds() { + if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT ) + return -1; + else + return TimeUnit.NANOSECONDS.convert(getArguments().maxRuntime, getArguments().maxRuntimeUnits); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 7875ced5a..e2b943582 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.IntervalBinding; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; @@ -44,6 +45,7 @@ import java.io.File; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.concurrent.TimeUnit; /** * @author aaron @@ -91,7 +93,7 @@ public class GATKArgumentCollection { // -------------------------------------------------------------------------------------------------------------- // - // XXX + // General features // // -------------------------------------------------------------------------------------------------------------- @@ -143,6 +145,12 @@ public class GATKArgumentCollection { @Argument(fullName = "disableRandomization",doc="Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator.") public boolean disableRandomization = false; + @Argument(fullName = "maxRuntime", shortName = "maxRuntime", doc="If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure. By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits", required = false) + public long maxRuntime = GenomeAnalysisEngine.NO_RUNTIME_LIMIT; + + @Argument(fullName = "maxRuntimeUnits", shortName = "maxRuntimeUnits", doc="The TimeUnit for maxRuntime", required = false) + public TimeUnit maxRuntimeUnits = TimeUnit.MINUTES; + // -------------------------------------------------------------------------------------------------------------- // // Downsampling Arguments diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 31f2a469c..cc0161b2d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -123,7 +123,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar final ReduceTree reduceTree = new ReduceTree(this); initializeWalker(walker); - while (isShardTraversePending() || isTreeReducePending()) { + while (! abortExecution() && (isShardTraversePending() || isTreeReducePending())) { // Check for errors during execution. errorTracker.throwErrorIfPending(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 5b94e0767..f3c1ae91c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -63,7 +63,7 @@ public class LinearMicroScheduler extends MicroScheduler { final TraversalEngine traversalEngine = borrowTraversalEngine(this); for (Shard shard : shardStrategy ) { - if ( done || shard == null ) // we ran out of shards that aren't owned + if ( abortExecution() || done || shard == null ) // we ran out of shards that aren't owned break; if(shard.getShardType() == Shard.ShardType.LOCUS) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index df4ed9ef8..38170040a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.AutoFormattingTime; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -52,6 +53,7 @@ import javax.management.ObjectName; import java.io.File; import java.lang.management.ManagementFactory; import java.util.*; +import java.util.concurrent.TimeUnit; /** @@ -269,6 +271,26 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { this.threadEfficiencyMonitor = threadEfficiencyMonitor; } + /** + * Should we stop all execution work and exit gracefully? + * + * Returns true in the case where some external signal or time limit has been received, indicating + * that this GATK shouldn't continue executing. This isn't a kill signal, it is really a "shutdown + * gracefully at the next opportunity" signal. Concrete implementations of the MicroScheduler + * examine this value as often as reasonable and, if it returns true, stop what they are doing + * at the next available opportunity, shutdown their resources, call notify done, and return. + * + * @return true if we should abort execution, or false otherwise + */ + protected boolean abortExecution() { + final boolean abort = engine.exceedsRuntimeLimit(progressMeter.getRuntimeInNanoseconds(), TimeUnit.NANOSECONDS); + if ( abort ) { + final AutoFormattingTime aft = new AutoFormattingTime(TimeUnit.SECONDS.convert(engine.getRuntimeLimitInNanoseconds(), TimeUnit.NANOSECONDS), 1, 4); + logger.info("Aborting execution (cleanly) because the runtime has exceeded the requested maximum " + aft); + } + return abort; + } + /** * Walks a walker over the given list of intervals. * diff --git a/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java b/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java index 8964c16cb..4455666e8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java +++ b/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java @@ -4,12 +4,21 @@ package org.broadinstitute.sting.utils; * Simple utility class that makes it convenient to print unit adjusted times */ public class AutoFormattingTime { - double timeInSeconds; // in Seconds - int precision; // for format + private final int width; // for format + private final int precision; // for format - public AutoFormattingTime(double timeInSeconds, int precision) { + double timeInSeconds; // in Seconds + private final String formatString; + + public AutoFormattingTime(double timeInSeconds, final int width, int precision) { + this.width = width; this.timeInSeconds = timeInSeconds; this.precision = precision; + this.formatString = "%" + width + "." + precision + "f %s"; + } + + public AutoFormattingTime(double timeInSeconds, int precision) { + this(timeInSeconds, 6, precision); } public AutoFormattingTime(double timeInSeconds) { @@ -20,6 +29,20 @@ public class AutoFormattingTime { return timeInSeconds; } + /** + * @return the precision (a la format's %WIDTH.PERCISIONf) + */ + public int getWidth() { + return width; + } + + /** + * @return the precision (a la format's %WIDTH.PERCISIONf) + */ + public int getPrecision() { + return precision; + } + /** * Instead of 10000 s, returns 2.8 hours * @return @@ -48,6 +71,6 @@ public class AutoFormattingTime { } } - return String.format("%6."+precision+"f %s", unitTime, unit); + return String.format(formatString, unitTime, unit); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java index 69cf52fc2..a8715e242 100755 --- a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java +++ b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.utils.progressmeter; +import com.google.java.contract.Ensures; import com.google.java.contract.Invariant; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.*; @@ -200,6 +201,14 @@ public class ProgressMeter { "Location", processingUnitName, processingUnitName)); } + /** + * @return the current runtime in nanoseconds + */ + @Ensures("result >= 0") + public long getRuntimeInNanoseconds() { + return timer.getElapsedTimeNano(); + } + /** * Utility routine that prints out process information (including timing) every N records or * every M seconds, for N and M set in global variables. diff --git a/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java new file mode 100644 index 000000000..6cfd7bf46 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.SimpleTimer; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.concurrent.TimeUnit; + +/** + * + */ +public class MaxRuntimeIntegrationTest extends WalkerTest { + private static final long STARTUP_TIME = TimeUnit.NANOSECONDS.convert(20, TimeUnit.SECONDS); + + private class MaxRuntimeTestProvider extends TestDataProvider { + final long maxRuntime; + final TimeUnit unit; + + public MaxRuntimeTestProvider(final long maxRuntime, final TimeUnit unit) { + super(MaxRuntimeTestProvider.class); + this.maxRuntime = maxRuntime; + this.unit = unit; + setName(String.format("Max runtime test : %d of %s", maxRuntime, unit)); + } + + public long expectedMaxRuntimeNano() { + return TimeUnit.NANOSECONDS.convert(maxRuntime, unit) + STARTUP_TIME; + } + } + + @DataProvider(name = "MaxRuntimeProvider") + public Object[][] makeMaxRuntimeProvider() { + for ( final TimeUnit requestedUnits : Arrays.asList(TimeUnit.NANOSECONDS, TimeUnit.MILLISECONDS, TimeUnit.SECONDS, TimeUnit.MINUTES) ) + new MaxRuntimeTestProvider(requestedUnits.convert(30, TimeUnit.SECONDS), requestedUnits); + + return MaxRuntimeTestProvider.getTests(MaxRuntimeTestProvider.class); + } + + // + // Loop over errors to throw, make sure they are the errors we get back from the engine, regardless of NT type + // + @Test(enabled = true, dataProvider = "MaxRuntimeProvider", timeOut = 60 * 1000) + public void testMaxRuntime(final MaxRuntimeTestProvider cfg) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R " + hg18Reference + + " -I " + validationDataLocation + "NA12878.WEx.downsampled20x.bam -o /dev/null" + + " -maxRuntime " + cfg.maxRuntime + " -maxRuntimeUnits " + cfg.unit, 0, + Collections.emptyList()); + final SimpleTimer timer = new SimpleTimer().start(); + executeTest("Max runtime " + cfg, spec); + final long actualRuntimeNano = timer.getElapsedTimeNano(); + + Assert.assertTrue(actualRuntimeNano < cfg.expectedMaxRuntimeNano(), + "Actual runtime " + TimeUnit.SECONDS.convert(actualRuntimeNano, TimeUnit.NANOSECONDS) + + " exceeded max. tolerated runtime " + TimeUnit.SECONDS.convert(cfg.expectedMaxRuntimeNano(), TimeUnit.NANOSECONDS) + + " given requested runtime " + cfg.maxRuntime + " " + cfg.unit); + } +} \ No newline at end of file