- After removing special code for intervals, instead of being of type File they are generated as List[File]. Changed previous checkin that was appending to this list and instead assigning a singleton list.

- More cleanup including removing the temporary classes and intermediate error files.  Quieting any errors using Apache Commons IO 2.0.
- Counting the contigs during the QScript generation instead of the end user having to pass a separate contig interval list.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4539 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2010-10-21 06:37:28 +00:00
parent b88cfd2939
commit b954a5a4d5
16 changed files with 126 additions and 67 deletions

View File

@ -34,7 +34,7 @@
<!-- Commons Dependencies --> <!-- Commons Dependencies -->
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/> <dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/>
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/> <dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
<dependency org="commons-io" name="commons-io" rev="1.3.2"/> <dependency org="commons-io" name="commons-io" rev="2.0"/>
<!-- Dependencies for Queue GATK Extensions code generator living in java/src --> <!-- Dependencies for Queue GATK Extensions code generator living in java/src -->
<dependency org="commons-lang" name="commons-lang" rev="2.5"/> <dependency org="commons-lang" name="commons-lang" rev="2.5"/>

View File

@ -16,7 +16,7 @@ val MERGED_DIR = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/ma
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
this.logging_level = "INFO"; this.logging_level = "INFO";
this.jarFile = gatkJarFile; this.jarFile = gatkJarFile;
this.intervals :+= new File(TARGET_INTERVAL); this.intervals = List(new File(TARGET_INTERVAL));
this.reference_sequence = referenceFile; this.reference_sequence = referenceFile;
this.jobQueue = "gsa"; this.jobQueue = "gsa";
this.et = Option(org.broadinstitute.sting.gatk.phonehome.GATKRunReport.PhoneHomeOption.STANDARD); this.et = Option(org.broadinstitute.sting.gatk.phonehome.GATKRunReport.PhoneHomeOption.STANDARD);

View File

@ -20,7 +20,7 @@ class ManySampleUGPerformanceTesting extends QScript {
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
this.logging_level = "INFO"; this.logging_level = "INFO";
this.jarFile = gatkJarFile; this.jarFile = gatkJarFile;
this.intervals :+= new File(TARGET_INTERVAL); this.intervals = List(new File(TARGET_INTERVAL));
this.reference_sequence = referenceFile; this.reference_sequence = referenceFile;
this.jobQueue = "gsa"; this.jobQueue = "gsa";
this.et = Option(org.broadinstitute.sting.gatk.phonehome.GATKRunReport.PhoneHomeOption.STANDARD); this.et = Option(org.broadinstitute.sting.gatk.phonehome.GATKRunReport.PhoneHomeOption.STANDARD);

View File

@ -41,7 +41,7 @@ class ExampleUnifiedGenotyper extends QScript {
trait UnifiedGenotyperArguments extends CommandLineGATK { trait UnifiedGenotyperArguments extends CommandLineGATK {
this.jarFile = qscript.gatkJar this.jarFile = qscript.gatkJar
this.reference_sequence = qscript.referenceFile this.reference_sequence = qscript.referenceFile
this.intervals :+= qscript.intervals this.intervals = List(qscript.intervals)
// Some() is how you set the value for an scala Option. // Some() is how you set the value for an scala Option.
// Set the memory limit to 2 gigabytes on each command. // Set the memory limit to 2 gigabytes on each command.
this.memoryLimit = Some(2) this.memoryLimit = Some(2)

View File

@ -16,9 +16,6 @@ import org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType
class fullCallingPipeline extends QScript { class fullCallingPipeline extends QScript {
qscript => qscript =>
@Argument(doc="list of contigs in the reference over which indel-cleaning jobs should be scattered (ugly)", shortName="contigIntervals")
var contigIntervals: File = _
@Argument(doc="the YAML file specifying inputs, interval lists, reference sequence, etc.", shortName="Y") @Argument(doc="the YAML file specifying inputs, interval lists, reference sequence, etc.", shortName="Y")
var yamlFile: File = _ var yamlFile: File = _
@ -64,7 +61,7 @@ class fullCallingPipeline extends QScript {
private var pipeline: Pipeline = _ private var pipeline: Pipeline = _
trait CommandLineGATKArgs extends CommandLineGATK { trait CommandLineGATKArgs extends CommandLineGATK {
this.intervals :+= qscript.pipeline.getProject.getIntervalList this.intervals = List(qscript.pipeline.getProject.getIntervalList)
this.jarFile = qscript.gatkJar this.jarFile = qscript.gatkJar
this.reference_sequence = qscript.pipeline.getProject.getReferenceFile this.reference_sequence = qscript.pipeline.getProject.getReferenceFile
this.memoryLimit = Some(4) this.memoryLimit = Some(4)
@ -88,10 +85,9 @@ class fullCallingPipeline extends QScript {
//val expKind = qscript.protocol //val expKind = qscript.protocol
// count number of contigs (needed for indel cleaning parallelism) // count number of contigs (needed for indel cleaning parallelism)
var contigCount = 0 val contigCount = IntervalScatterFunction.countContigs(
for ( line <- scala.io.Source.fromFile(qscript.contigIntervals).getLines ) { qscript.pipeline.getProject.getReferenceFile,
contigCount += 1 List(qscript.pipeline.getProject.getIntervalList.toString))
}
for ( sample <- recalibratedSamples ) { for ( sample <- recalibratedSamples ) {
val sampleId = sample.getId val sampleId = sample.getId
@ -118,7 +114,6 @@ class fullCallingPipeline extends QScript {
realigner.jobOutputFile = new File(".queue/logs/Cleaning/%s/IndelRealigner.out".format(sampleId)) realigner.jobOutputFile = new File(".queue/logs/Cleaning/%s/IndelRealigner.out".format(sampleId))
realigner.analysisName = "RealignBam_"+sampleId realigner.analysisName = "RealignBam_"+sampleId
realigner.input_file = targetCreator.input_file realigner.input_file = targetCreator.input_file
realigner.intervals :+= qscript.contigIntervals
realigner.targetIntervals = targetCreator.out realigner.targetIntervals = targetCreator.out
realigner.scatterCount = contigCount realigner.scatterCount = contigCount

View File

@ -39,7 +39,7 @@ def script = {
add(new CountCovariates(bamIn, recalData) { useOriginalQualities = true } ) add(new CountCovariates(bamIn, recalData) { useOriginalQualities = true } )
val tableRecal = new TableRecalibrate(bamIn, recalData, recalBam) { useOriginalQualities = true } val tableRecal = new TableRecalibrate(bamIn, recalData, recalBam) { useOriginalQualities = true }
if ( scatter ) { if ( scatter ) {
tableRecal.intervals :+= new File("/humgen/gsa-hpprojects/GATK/data/chromosomes.hg18.interval_list") tableRecal.intervals = List(new File("/humgen/gsa-hpprojects/GATK/data/chromosomes.hg18.interval_list"))
//tableRecal.scatterClass = classOf[ContigScatterFunction] //tableRecal.scatterClass = classOf[ContigScatterFunction]
tableRecal.setupGatherFunction = { case (f: PicardBamJarFunction, _) => f.jarFile = picardMergeSamFilesJar; f.memoryLimit = Some(4) } tableRecal.setupGatherFunction = { case (f: PicardBamJarFunction, _) => f.jarFile = picardMergeSamFilesJar; f.memoryLimit = Some(4) }
tableRecal.scatterCount = 25 tableRecal.scatterCount = 25

View File

@ -75,18 +75,18 @@ class QCommandLine extends CommandLineProgram with Logging {
} }
Runtime.getRuntime.addShutdownHook(new Thread { Runtime.getRuntime.addShutdownHook(new Thread {
/** Kills running processes as the JVM shuts down. */ /** Cleanup as the JVM shuts down. */
override def run = { override def run = {
qGraph.shutdown() qGraph.shutdown()
ProcessController.shutdown() ProcessController.shutdown()
QScriptManager.deleteOutdir()
} }
}) })
if ( ! getStatus ) { if (getStatus)
qGraph.run
} else {
qGraph.checkStatus qGraph.checkStatus
} else
qGraph.run
if (qGraph.hasFailed) { if (qGraph.hasFailed) {
logger.info("Done with errors") logger.info("Done with errors")

View File

@ -11,6 +11,7 @@ import org.apache.log4j.Level
import scala.tools.nsc.util.{FakePos, NoPosition, Position} import scala.tools.nsc.util.{FakePos, NoPosition, Position}
import org.broadinstitute.sting.utils.classloader.{PackageUtils, PluginManager} import org.broadinstitute.sting.utils.classloader.{PackageUtils, PluginManager}
import org.broadinstitute.sting.queue.util.TextFormatUtils._ import org.broadinstitute.sting.queue.util.TextFormatUtils._
import org.apache.commons.io.FileUtils
/** /**
* Plugin manager for QScripts which loads QScripts into the current class loader. * Plugin manager for QScripts which loads QScripts into the current class loader.
@ -40,6 +41,8 @@ class QScriptManager extends PluginManager[QScript](classOf[QScript], "QScript",
* Plugin manager for QScripts which loads QScripts into the current classloader. * Plugin manager for QScripts which loads QScripts into the current classloader.
*/ */
object QScriptManager extends Logging { object QScriptManager extends Logging {
private val outdir = IOUtils.tempDir("Q-classes")
/** /**
* Compiles and loads the scripts in the files into the current classloader. * Compiles and loads the scripts in the files into the current classloader.
* Heavily based on scala/src/compiler/scala/tools/ant/Scalac.scala * Heavily based on scala/src/compiler/scala/tools/ant/Scalac.scala
@ -49,7 +52,6 @@ object QScriptManager extends Logging {
if (scripts.size > 0) { if (scripts.size > 0) {
val settings = new Settings((error: String) => logger.error(error)) val settings = new Settings((error: String) => logger.error(error))
val outdir = IOUtils.tempDir("Q-classes")
settings.deprecation.value = true settings.deprecation.value = true
settings.outdir.value = outdir.getPath settings.outdir.value = outdir.getPath
@ -82,6 +84,14 @@ object QScriptManager extends Logging {
} }
} }
/**
* Removes the outdir cleaning up the temporary classes.
*/
def deleteOutdir() = {
if (FileUtils.deleteQuietly(outdir))
logger.debug("Deleted " + outdir)
}
/** /**
* NSC (New Scala Compiler) reporter which logs to Log4J. * NSC (New Scala Compiler) reporter which logs to Log4J.
* Heavily based on scala/src/compiler/scala/tools/nsc/reporters/ConsoleReporter.scala * Heavily based on scala/src/compiler/scala/tools/nsc/reporters/ConsoleReporter.scala

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.queue.engine package org.broadinstitute.sting.queue.engine
import org.broadinstitute.sting.queue.function.InProcessFunction import org.broadinstitute.sting.queue.function.InProcessFunction
import org.broadinstitute.sting.queue.util.{IOUtils, Logging} import org.broadinstitute.sting.queue.util.Logging
/** /**
* Runs a function that executes in process and does not fork out an external process. * Runs a function that executes in process and does not fork out an external process.
@ -17,8 +17,8 @@ class InProcessRunner(val function: InProcessFunction) extends JobRunner with Lo
logger.info("Starting: " + function.description) logger.info("Starting: " + function.description)
} }
function.doneOutputs.foreach(_.delete()) function.deleteLogs()
function.failOutputs.foreach(_.delete()) function.deleteOutputs()
runStatus = RunnerStatus.RUNNING runStatus = RunnerStatus.RUNNING
function.mkOutputDirectories() function.mkOutputDirectories()
function.run() function.run()

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.queue.engine
import java.io.File import java.io.File
import org.broadinstitute.sting.queue.function.CommandLineFunction import org.broadinstitute.sting.queue.function.CommandLineFunction
import org.broadinstitute.sting.queue.util._ import org.broadinstitute.sting.queue.util._
import org.apache.commons.io.FileUtils
/** /**
* Runs jobs on an LSF compute cluster. * Runs jobs on an LSF compute cluster.
@ -72,9 +73,8 @@ class LsfJobRunner(val function: CommandLineFunction) extends DispatchJobRunner
logger.info("Starting: " + job.bsubCommand.mkString(" ")) logger.info("Starting: " + job.bsubCommand.mkString(" "))
} }
function.jobOutputFile.delete() function.deleteLogs()
if (function.jobErrorFile != null) function.deleteOutputs()
function.jobErrorFile.delete()
runStatus = RunnerStatus.RUNNING runStatus = RunnerStatus.RUNNING
Retry.attempt(() => job.run(), 1, 5, 10) Retry.attempt(() => job.run(), 1, 5, 10)
@ -139,11 +139,11 @@ class LsfJobRunner(val function: CommandLineFunction) extends DispatchJobRunner
* Removes all temporary files used for this LSF job. * Removes all temporary files used for this LSF job.
*/ */
def removeTemporaryFiles() = { def removeTemporaryFiles() = {
exec.delete() FileUtils.deleteQuietly(exec)
preExec.delete() FileUtils.deleteQuietly(preExec)
postExec.delete() FileUtils.deleteQuietly(postExec)
jobDoneFile.delete() FileUtils.deleteQuietly(jobDoneFile)
jobFailFile.delete() FileUtils.deleteQuietly(jobFailFile)
} }
/** /**

View File

@ -28,18 +28,11 @@ class ShellJobRunner(val function: CommandLineFunction) extends JobRunner with L
} }
logger.info("Output written to " + function.jobOutputFile) logger.info("Output written to " + function.jobOutputFile)
if (function.jobErrorFile != null) {
logger.info("Errors written to " + function.jobErrorFile)
} else {
if (logger.isDebugEnabled)
logger.info("Errors also written to " + function.jobOutputFile)
}
function.jobOutputFile.delete()
if (function.jobErrorFile != null) if (function.jobErrorFile != null)
function.jobErrorFile.delete() logger.info("Errors written to " + function.jobErrorFile)
function.doneOutputs.foreach(_.delete())
function.failOutputs.foreach(_.delete()) function.deleteLogs()
function.deleteOutputs()
runStatus = RunnerStatus.RUNNING runStatus = RunnerStatus.RUNNING
function.mkOutputDirectories() function.mkOutputDirectories()
job.run() job.run()

View File

@ -51,23 +51,43 @@ class IntervalScatterFunction extends ScatterFunction with InProcessFunction {
} }
def run() = { def run() = {
IntervalScatterFunction.scatter(this.intervals, this.scatterParts, this.referenceSequence, this.splitByContig) IntervalScatterFunction.scatter(this.referenceSequence, this.intervals, this.scatterParts, this.splitByContig)
} }
} }
object IntervalScatterFunction { object IntervalScatterFunction {
def scatter(intervals: List[String], scatterParts: List[File], reference: File, splitByContig: Boolean) = { private def parseLocs(referenceSource: ReferenceDataSource, intervals: List[String]) = {
val referenceSource = new ReferenceDataSource(reference)
GenomeLocParser.setupRefContigOrdering(referenceSource.getReference) GenomeLocParser.setupRefContigOrdering(referenceSource.getReference)
val locs = { val locs = {
// TODO: Abstract genome analysis engine has richer logic for parsing. We need to use it! // TODO: Abstract genome analysis engine has richer logic for parsing. We need to use it!
if (intervals.size == 0) { if (intervals.size == 0) {
GenomeLocSortedSet.createSetFromSequenceDictionary(referenceSource.getReference.getSequenceDictionary).toList GenomeLocSortedSet.createSetFromSequenceDictionary(referenceSource.getReference.getSequenceDictionary)
} else { } else {
IntervalUtils.parseIntervalArguments(intervals, false) new GenomeLocSortedSet(IntervalUtils.parseIntervalArguments(intervals, false))
} }
} }
if (locs == null || locs.size == 0)
throw new QException("Intervals are empty: " + intervals.mkString(", "))
locs.toList
}
def countContigs(reference: File, intervals: List[String]) = {
val referenceSource = new ReferenceDataSource(reference)
val locs = parseLocs(referenceSource, intervals)
var count = 0
var contig: String = null
for (loc <- locs) {
if (contig != loc.getContig) {
count += 1
contig = loc.getContig
}
}
count
}
def scatter(reference: File, intervals: List[String], scatterParts: List[File], splitByContig: Boolean) = {
val referenceSource = new ReferenceDataSource(reference)
val locs = parseLocs(referenceSource, intervals)
val fileHeader = new SAMFileHeader val fileHeader = new SAMFileHeader
fileHeader.setSequenceDictionary(referenceSource.getReference.getSequenceDictionary) fileHeader.setSequenceDictionary(referenceSource.getReference.getSequenceDictionary)
@ -75,9 +95,6 @@ object IntervalScatterFunction {
var fileIndex = -1 var fileIndex = -1
var locIndex = 0 var locIndex = 0
if (locs == null || locs.size == 0)
throw new QException("Locs produced an empty interval list: " + intervals.mkString(", "))
if (splitByContig) { if (splitByContig) {
var contig: String = null var contig: String = null
for (loc <- locs) { for (loc <- locs) {

View File

@ -3,17 +3,18 @@ package org.broadinstitute.sting.queue.function
import java.io.File import java.io.File
import java.lang.annotation.Annotation import java.lang.annotation.Annotation
import org.broadinstitute.sting.commandline._ import org.broadinstitute.sting.commandline._
import org.broadinstitute.sting.queue.util.{CollectionUtils, IOUtils, ReflectionUtils}
import org.broadinstitute.sting.queue.{QException, QSettings} import org.broadinstitute.sting.queue.{QException, QSettings}
import collection.JavaConversions._ import collection.JavaConversions._
import org.broadinstitute.sting.queue.function.scattergather.{Gather, SimpleTextGatherFunction} import org.broadinstitute.sting.queue.function.scattergather.{Gather, SimpleTextGatherFunction}
import org.broadinstitute.sting.queue.util.{Logging, CollectionUtils, IOUtils, ReflectionUtils}
import org.apache.commons.io.FileUtils
/** /**
* The base interface for all functions in Queue. * The base interface for all functions in Queue.
* Inputs and outputs are specified as Sets of values. * Inputs and outputs are specified as Sets of values.
* Inputs are matched to other outputs by using .equals() * Inputs are matched to other outputs by using .equals()
*/ */
trait QFunction { trait QFunction extends Logging {
/** /**
* Analysis function name * Analysis function name
*/ */
@ -152,13 +153,27 @@ trait QFunction {
dirs dirs
} }
/**
* Deletes the log files for this function.
*/
def deleteLogs() = {
deleteOutput(jobOutputFile)
if (jobErrorFile != null)
deleteOutput(jobErrorFile)
}
/** /**
* Deletes the output files and all the status files for this function. * Deletes the output files and all the status files for this function.
*/ */
def deleteOutputs() = { def deleteOutputs() = {
outputs.foreach(_.delete()) outputs.foreach(file => deleteOutput(file))
doneOutputs.foreach(_.delete()) doneOutputs.foreach(file => deleteOutput(file))
failOutputs.foreach(_.delete()) failOutputs.foreach(file => deleteOutput(file))
}
private def deleteOutput(file: File) = {
if (FileUtils.deleteQuietly(file))
logger.debug("Deleted " + file)
} }
/** /**

View File

@ -23,7 +23,7 @@ class BamProcessing(yaml: File, gatkJar: File, fixMatesJar: File) {
trait StandardCommandLineGATK extends CommandLineGATK { trait StandardCommandLineGATK extends CommandLineGATK {
this.reference_sequence = library.attributes.getProject.getReferenceFile this.reference_sequence = library.attributes.getProject.getReferenceFile
this.intervals :+= library.attributes.getProject.getIntervalList this.intervals = List(library.attributes.getProject.getIntervalList)
this.DBSNP = library.attributes.getProject.getDbsnpFile this.DBSNP = library.attributes.getProject.getDbsnpFile
this.memoryLimit = Some(2) this.memoryLimit = Some(2)
this.jarFile = library.gatkJar this.jarFile = library.gatkJar

View File

@ -23,7 +23,7 @@ class VariantCalling(yaml: File,gatkJar: File) {
*/ */
trait StandardCommandLineGATK extends CommandLineGATK { trait StandardCommandLineGATK extends CommandLineGATK {
this.reference_sequence = vc.attributes.getProject.getReferenceFile this.reference_sequence = vc.attributes.getProject.getReferenceFile
this.intervals :+= vc.attributes.getProject.getIntervalList this.intervals = List(vc.attributes.getProject.getIntervalList)
this.DBSNP = vc.attributes.getProject.getDbsnpFile this.DBSNP = vc.attributes.getProject.getDbsnpFile
// set global memory limit on the low side. Additional input bams will affect it. // set global memory limit on the low side. Additional input bams will affect it.
this.memoryLimit = Some(2) this.memoryLimit = Some(2)

View File

@ -19,6 +19,12 @@ class IntervalScatterFunctionUnitTest extends BaseTest {
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()) GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary())
} }
@Test
def testCountContigs = {
Assert.assertEquals(3, IntervalScatterFunction.countContigs(reference, List("1:1-1", "2:1-1", "3:2-2")))
Assert.assertEquals(1, IntervalScatterFunction.countContigs(reference, List(BaseTest.validationDataLocation + "chr1_b36_pilot3.interval_list")))
}
@Test @Test
def testBasicScatter = { def testBasicScatter = {
val chr1 = GenomeLocParser.parseGenomeInterval("1") val chr1 = GenomeLocParser.parseGenomeInterval("1")
@ -27,7 +33,7 @@ class IntervalScatterFunctionUnitTest extends BaseTest {
val files = (1 to 3).toList.map(index => new File(testDir + "basic." + index + ".intervals")) val files = (1 to 3).toList.map(index => new File(testDir + "basic." + index + ".intervals"))
IntervalScatterFunction.scatter(List("1", "2", "3"), files, reference, false) IntervalScatterFunction.scatter(reference, List("1", "2", "3"), files, false)
val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false)
val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false)
@ -51,7 +57,7 @@ class IntervalScatterFunctionUnitTest extends BaseTest {
val files = (1 to 3).toList.map(index => new File(testDir + "less." + index + ".intervals")) val files = (1 to 3).toList.map(index => new File(testDir + "less." + index + ".intervals"))
IntervalScatterFunction.scatter(List("1", "2", "3", "4"), files, reference, false) IntervalScatterFunction.scatter(reference, List("1", "2", "3", "4"), files, false)
val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false)
val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false)
@ -70,7 +76,7 @@ class IntervalScatterFunctionUnitTest extends BaseTest {
@Test(expected=classOf[QException]) @Test(expected=classOf[QException])
def testScatterMoreFiles = { def testScatterMoreFiles = {
val files = (1 to 3).toList.map(index => new File(testDir + "more." + index + ".intervals")) val files = (1 to 3).toList.map(index => new File(testDir + "more." + index + ".intervals"))
IntervalScatterFunction.scatter(List("1", "2"), files, reference, false) IntervalScatterFunction.scatter(reference, List("1", "2"), files, false)
} }
@Test @Test
@ -83,7 +89,7 @@ class IntervalScatterFunctionUnitTest extends BaseTest {
val files = (1 to 3).toList.map(index => new File(testDir + "split." + index + ".intervals")) val files = (1 to 3).toList.map(index => new File(testDir + "split." + index + ".intervals"))
IntervalScatterFunction.scatter(intervals, files, reference, true) IntervalScatterFunction.scatter(reference, intervals, files, true)
val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false)
val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false)
@ -99,6 +105,29 @@ class IntervalScatterFunctionUnitTest extends BaseTest {
Assert.assertEquals(chr3, locs3.get(0)) Assert.assertEquals(chr3, locs3.get(0))
} }
@Test
def testScatterOrder = {
val intervals = List("2:1-1", "1:1-1", "3:2-2")
val chr1 = GenomeLocParser.parseGenomeInterval("1:1-1")
val chr2 = GenomeLocParser.parseGenomeInterval("2:1-1")
val chr3 = GenomeLocParser.parseGenomeInterval("3:2-2")
val files = (1 to 3).toList.map(index => new File(testDir + "split." + index + ".intervals"))
IntervalScatterFunction.scatter(reference, intervals, files, true)
val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false)
val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false)
val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false)
Assert.assertEquals(1, locs1.size)
Assert.assertEquals(1, locs2.size)
Assert.assertEquals(1, locs3.size)
Assert.assertEquals(chr1, locs1.get(0))
Assert.assertEquals(chr2, locs2.get(0))
Assert.assertEquals(chr3, locs3.get(0))
}
@Test @Test
def testBasicScatterByContig = { def testBasicScatterByContig = {
@ -108,7 +137,7 @@ class IntervalScatterFunctionUnitTest extends BaseTest {
val files = (1 to 3).toList.map(index => new File(testDir + "contig_basic." + index + ".intervals")) val files = (1 to 3).toList.map(index => new File(testDir + "contig_basic." + index + ".intervals"))
IntervalScatterFunction.scatter(List("1", "2", "3"), files, reference, true) IntervalScatterFunction.scatter(reference, List("1", "2", "3"), files, true)
val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false)
val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false)
@ -132,7 +161,7 @@ class IntervalScatterFunctionUnitTest extends BaseTest {
val files = (1 to 3).toList.map(index => new File(testDir + "contig_less." + index + ".intervals")) val files = (1 to 3).toList.map(index => new File(testDir + "contig_less." + index + ".intervals"))
IntervalScatterFunction.scatter(List("1", "2", "3", "4"), files, reference, true) IntervalScatterFunction.scatter(reference, List("1", "2", "3", "4"), files, true)
val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false)
val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false)
@ -151,7 +180,7 @@ class IntervalScatterFunctionUnitTest extends BaseTest {
@Test(expected=classOf[QException]) @Test(expected=classOf[QException])
def testScatterByContigMoreFiles = { def testScatterByContigMoreFiles = {
val files = (1 to 3).toList.map(index => new File(testDir + "contig_more." + index + ".intervals")) val files = (1 to 3).toList.map(index => new File(testDir + "contig_more." + index + ".intervals"))
IntervalScatterFunction.scatter(List("1", "2"), files, reference, true) IntervalScatterFunction.scatter(reference, List("1", "2"), files, true)
} }
@Test @Test
@ -164,7 +193,7 @@ class IntervalScatterFunctionUnitTest extends BaseTest {
val files = (1 to 3).toList.map(index => new File(testDir + "contig_split_start." + index + ".intervals")) val files = (1 to 3).toList.map(index => new File(testDir + "contig_split_start." + index + ".intervals"))
IntervalScatterFunction.scatter(intervals, files, reference, true) IntervalScatterFunction.scatter(reference, intervals, files, true)
val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false)
val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false)
@ -190,7 +219,7 @@ class IntervalScatterFunctionUnitTest extends BaseTest {
val files = (1 to 3).toList.map(index => new File(testDir + "contig_split_middle." + index + ".intervals")) val files = (1 to 3).toList.map(index => new File(testDir + "contig_split_middle." + index + ".intervals"))
IntervalScatterFunction.scatter(intervals, files, reference, true) IntervalScatterFunction.scatter(reference, intervals, files, true)
val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false)
val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false)
@ -216,7 +245,7 @@ class IntervalScatterFunctionUnitTest extends BaseTest {
val files = (1 to 3).toList.map(index => new File(testDir + "contig_split_end." + index + ".intervals")) val files = (1 to 3).toList.map(index => new File(testDir + "contig_split_end." + index + ".intervals"))
IntervalScatterFunction.scatter(intervals, files, reference, true) IntervalScatterFunction.scatter(reference, intervals, files, true)
val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false)
val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false)