diff --git a/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 1d148ac95..07936d583 100644 --- a/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -157,13 +157,26 @@ public class IntervalUtils { * @return true if the token looks like a filename, or false otherwise. */ public static boolean isIntervalFile(String str) { + return isIntervalFile(str, true); + } + + /** + * Check if string argument was intented as a file + * Accepted file extensions: .bed .list, .picard, .interval_list, .intervals. + * @param str token to identify as a filename. + * @param checkExists if true throws an exception if the file doesn't exist. + * @return true if the token looks like a filename, or false otherwise. + */ + public static boolean isIntervalFile(String str, boolean checkExists) { // should we define list of file extensions as a public array somewhere? // is regex or endsiwth better? File file = new File(str); if (str.toUpperCase().endsWith(".BED") || str.toUpperCase().endsWith(".LIST") || str.toUpperCase().endsWith(".PICARD") || str.toUpperCase().endsWith(".INTERVAL_LIST") || str.toUpperCase().endsWith(".INTERVALS")) { - if (file.exists()) + if (!checkExists) + return true; + else if (file.exists()) return true; else throw new UserException.CouldNotReadInputFile(file, "The interval file does not exist."); diff --git a/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java b/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java index 7711759a5..5a11bf17f 100644 --- a/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java @@ -128,6 +128,22 @@ public class IntervalUtilsUnitTest extends BaseTest { Assert.assertEquals(IntervalUtils.countIntervalArguments(reference, Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2"), true), 3); } + @Test + public void testIsIntervalFile() { + Assert.assertTrue(IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "empty_intervals.list")); + Assert.assertTrue(IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "empty_intervals.list", true)); + + List extensions = Arrays.asList("bed", "interval_list", "intervals", "list", "picard"); + for (String extension: extensions) { + Assert.assertTrue(IntervalUtils.isIntervalFile("test_intervals." + extension, false), "Tested interval file extension: " + extension); + } + } + + @Test(expectedExceptions = UserException.CouldNotReadInputFile.class) + public void testMissingIntervalFile() { + IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "no_such_intervals.list"); + } + @Test public void testBasicScatter() { GenomeLoc chr1 = genomeLocParser.parseGenomeInterval("chr1"); diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala index 5e3384020..108b7c451 100644 --- a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala @@ -67,8 +67,18 @@ class IntervalScatterFunction extends ScatterFunction with InProcessFunction { this.includeUnmapped = gatk.intervalsString.exists(interval => IntervalUtils.isUnmapped(interval)) } - val maxScatterCount = IntervalUtils.countIntervalArguments(this.referenceSequence, this.intervals, this.splitByContig) - this.scatterCount = maxScatterCount min originalFunction.scatterCount + this.scatterCount = originalFunction.scatterCount + if (this.intervalFilesExist) { + val maxScatterCount = IntervalUtils.countIntervalArguments(this.referenceSequence, this.intervals, this.splitByContig) + this.scatterCount = this.scatterCount min maxScatterCount + } + } + + /** + * Returns true if all interval files exist. + */ + private def intervalFilesExist = { + !intervals.exists(interval => IntervalUtils.isIntervalFile(interval, false) && !new File(interval).exists) } def initCloneInputs(cloneFunction: CloneFunction, index: Int) = {