Checking if the interval files exist before using them to calculate the minimum scatter parts.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5143 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b7aac3b846
commit
d4f744a4d4
|
|
@ -157,13 +157,26 @@ public class IntervalUtils {
|
||||||
* @return true if the token looks like a filename, or false otherwise.
|
* @return true if the token looks like a filename, or false otherwise.
|
||||||
*/
|
*/
|
||||||
public static boolean isIntervalFile(String str) {
|
public static boolean isIntervalFile(String str) {
|
||||||
|
return isIntervalFile(str, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if string argument was intented as a file
|
||||||
|
* Accepted file extensions: .bed .list, .picard, .interval_list, .intervals.
|
||||||
|
* @param str token to identify as a filename.
|
||||||
|
* @param checkExists if true throws an exception if the file doesn't exist.
|
||||||
|
* @return true if the token looks like a filename, or false otherwise.
|
||||||
|
*/
|
||||||
|
public static boolean isIntervalFile(String str, boolean checkExists) {
|
||||||
// should we define list of file extensions as a public array somewhere?
|
// should we define list of file extensions as a public array somewhere?
|
||||||
// is regex or endsiwth better?
|
// is regex or endsiwth better?
|
||||||
File file = new File(str);
|
File file = new File(str);
|
||||||
if (str.toUpperCase().endsWith(".BED") || str.toUpperCase().endsWith(".LIST") ||
|
if (str.toUpperCase().endsWith(".BED") || str.toUpperCase().endsWith(".LIST") ||
|
||||||
str.toUpperCase().endsWith(".PICARD") || str.toUpperCase().endsWith(".INTERVAL_LIST")
|
str.toUpperCase().endsWith(".PICARD") || str.toUpperCase().endsWith(".INTERVAL_LIST")
|
||||||
|| str.toUpperCase().endsWith(".INTERVALS")) {
|
|| str.toUpperCase().endsWith(".INTERVALS")) {
|
||||||
if (file.exists())
|
if (!checkExists)
|
||||||
|
return true;
|
||||||
|
else if (file.exists())
|
||||||
return true;
|
return true;
|
||||||
else
|
else
|
||||||
throw new UserException.CouldNotReadInputFile(file, "The interval file does not exist.");
|
throw new UserException.CouldNotReadInputFile(file, "The interval file does not exist.");
|
||||||
|
|
|
||||||
|
|
@ -128,6 +128,22 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(IntervalUtils.countIntervalArguments(reference, Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2"), true), 3);
|
Assert.assertEquals(IntervalUtils.countIntervalArguments(reference, Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2"), true), 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIsIntervalFile() {
|
||||||
|
Assert.assertTrue(IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "empty_intervals.list"));
|
||||||
|
Assert.assertTrue(IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "empty_intervals.list", true));
|
||||||
|
|
||||||
|
List<String> extensions = Arrays.asList("bed", "interval_list", "intervals", "list", "picard");
|
||||||
|
for (String extension: extensions) {
|
||||||
|
Assert.assertTrue(IntervalUtils.isIntervalFile("test_intervals." + extension, false), "Tested interval file extension: " + extension);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expectedExceptions = UserException.CouldNotReadInputFile.class)
|
||||||
|
public void testMissingIntervalFile() {
|
||||||
|
IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "no_such_intervals.list");
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBasicScatter() {
|
public void testBasicScatter() {
|
||||||
GenomeLoc chr1 = genomeLocParser.parseGenomeInterval("chr1");
|
GenomeLoc chr1 = genomeLocParser.parseGenomeInterval("chr1");
|
||||||
|
|
|
||||||
|
|
@ -67,8 +67,18 @@ class IntervalScatterFunction extends ScatterFunction with InProcessFunction {
|
||||||
this.includeUnmapped = gatk.intervalsString.exists(interval => IntervalUtils.isUnmapped(interval))
|
this.includeUnmapped = gatk.intervalsString.exists(interval => IntervalUtils.isUnmapped(interval))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.scatterCount = originalFunction.scatterCount
|
||||||
|
if (this.intervalFilesExist) {
|
||||||
val maxScatterCount = IntervalUtils.countIntervalArguments(this.referenceSequence, this.intervals, this.splitByContig)
|
val maxScatterCount = IntervalUtils.countIntervalArguments(this.referenceSequence, this.intervals, this.splitByContig)
|
||||||
this.scatterCount = maxScatterCount min originalFunction.scatterCount
|
this.scatterCount = this.scatterCount min maxScatterCount
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if all interval files exist.
|
||||||
|
*/
|
||||||
|
private def intervalFilesExist = {
|
||||||
|
!intervals.exists(interval => IntervalUtils.isIntervalFile(interval, false) && !new File(interval).exists)
|
||||||
}
|
}
|
||||||
|
|
||||||
def initCloneInputs(cloneFunction: CloneFunction, index: Int) = {
|
def initCloneInputs(cloneFunction: CloneFunction, index: Int) = {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue