gatk-3.8/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala

package org.broadinstitute.sting.queue.pipeline

import org.broadinstitute.sting.utils.Utils
import org.testng.Assert
import org.broadinstitute.sting.commandline.CommandLineProgram
import java.io.File
import org.broadinstitute.sting.queue.util.{TextFormatUtils, ProcessController}
import java.util.Date
import java.text.SimpleDateFormat
import org.broadinstitute.sting.{WalkerTest, BaseTest}
import org.broadinstitute.sting.queue.{QException, QCommandLine}

object PipelineTest {

  /** The path to the current Sting directory.  Useful when specifying Sting resources. */
  val currentStingDir = new File(".").getAbsolutePath

  /** The path to the current build of the GATK jar in the currentStingDir. */
  val currentGATK = new File(currentStingDir, "dist/GenomeAnalysisTK.jar")

  /**
   * Returns the top level output path to this test.
   * @param testName The name of the test passed to PipelineTest.executeTest()
   * @return the top level output path to this test.
   */
  def testDir(testName: String) = "pipelinetests/%s/".format(testName)

  /**
   * Returns the directory where relative output files will be written for this test.
   * @param testName The name of the test passed to PipelineTest.executeTest()
   * @return the directory where relative output files will be written for this test.
   */
  def runDir(testName: String) = testDir(testName) + "run/"

  /**
   * Returns the directory where temp files will be written for this test.
   * @param testName The name of the test passed to PipelineTest.executeTest()
   * @return the directory where temp files will be written for this test.
   */
  def tempDir(testName: String) = testDir(testName) + "temp/"

  /**
   * Encapsulates a file MD5
   * @param testName The name of the test also passed to PipelineTest.executeTest().
   * @param filePath The file path of the output file, relative to the directory the pipeline is run in.
   * @param md5 The expected MD5
   * @return a file md5 that can be appended to the PipelineTestSpec.fileMD5s
   */
  def fileMD5(testName: String, filePath: String, md5: String) = (new File(runDir(testName) + filePath), md5)

  private var runningCommandLines = Set.empty[QCommandLine]

  private val validationReportsDataLocation = "/humgen/gsa-hpprojects/GATK/validationreports/submitted/"

  val run = System.getProperty("pipeline.run") == "run"

  def executeTest(pipelineTest: PipelineTestSpec) {
    val name = pipelineTest.name
    if (name == null)
      throw new QException("PipelineTestSpec.name is null.")
    println(Utils.dupString('-', 80));
    executeTest(name, pipelineTest.args, pipelineTest.jobQueue, pipelineTest.expectedException)
    if (run) {
      assertMatchingMD5s(name, pipelineTest.fileMD5s.map{case (file, md5) => new File(runDir(name), file) -> md5})
      if (pipelineTest.evalSpec != null)
        validateEval(name, pipelineTest.evalSpec)
      println("  => %s PASSED".format(name))
    }
    else
      println("  => %s PASSED DRY RUN".format(name))
  }

  private def assertMatchingMD5s(name: String, fileMD5s: Traversable[(File, String)]) {
    var failed = 0
    for ((file, expectedMD5) <- fileMD5s) {
      val calculatedMD5 = BaseTest.testFileMD5(name, file, expectedMD5, false)
      if (expectedMD5 != "" && expectedMD5 != calculatedMD5)
        failed += 1
    }
    if (failed > 0)
      Assert.fail("%d of %d MD5%s did not match.".format(failed, fileMD5s.size, TextFormatUtils.plural(failed)))
  }

  private def validateEval(name: String, evalSpec: PipelineTestEvalSpec) {
    // write the report to the shared validation data location
    val formatter = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss")
    val reportLocation = "%s%s/validation.%s.eval".format(validationReportsDataLocation, name, formatter.format(new Date))
    new File(reportLocation).getParentFile.mkdirs

    // Run variant eval generating the report and validating the pipeline vcf.
    var walkerCommand = "-T VariantEval -R %s -B:eval,VCF %s -E %s -reportType R -reportLocation %s -L %s"
      .format(evalSpec.reference, evalSpec.vcf, evalSpec.evalModules.mkString(" -E "), reportLocation, evalSpec.intervals)

    if (evalSpec.dbsnp != null) {
      val dbsnpArg = if (evalSpec.dbsnp.getName.toLowerCase.endsWith(".vcf")) "-B:dbsnp,VCF" else "-D"
      walkerCommand += " %s %s".format(dbsnpArg, evalSpec.dbsnp)
    }

    if (evalSpec.intervals != null)
      walkerCommand += " -L %s".format(evalSpec.intervals)

    for (validation <- evalSpec.validations) {
      walkerCommand += " -summary %s".format(validation.metric)
      walkerCommand += " -validate '%1$s >= %2$s' -validate '%1$s <= %3$s'".format(
        validation.metric, validation.min, validation.max)
    }

    WalkerTest.executeTest(name + "-validate", walkerCommand, null)
  }

  /**
   * execute the test
   * @param name the name of the test
   * @param args the argument list
   * @param jobQueue the queue to run the job on.  Defaults to hour if jobQueue is null.
   * @param expectedException the expected exception or null if no exception is expected.
   */
  def executeTest(name: String, args: String, jobQueue: String, expectedException: Class[_]) {
    var command = Utils.escapeExpressions(args)

    // add the logging level to each of the integration test commands

    command = Utils.appendArray(command, "-bsub", "-l", "WARN", "-tempDir", tempDir(name), "-runDir", runDir(name))

    if (jobQueue == null)
      command = Utils.appendArray(command, "-jobQueue", "hour")
    else
      command = Utils.appendArray(command, "-jobQueue", jobQueue)

    if (run)
      command = Utils.appendArray(command, "-run")

    // run the executable
    var gotAnException = false

    val instance = new QCommandLine
    runningCommandLines += instance
    try {
      println("Executing test %s with Queue arguments: %s".format(name, Utils.join(" ",command)))
      CommandLineProgram.start(instance, command)
    } catch {
      case e =>
        gotAnException = true
        if (expectedException != null) {
          // we expect an exception
          println("Wanted exception %s, saw %s".format(expectedException, e.getClass))
          if (expectedException.isInstance(e)) {
            // it's the type we expected
            println(String.format("  => %s PASSED", name))
          } else {
            e.printStackTrace()
            Assert.fail("Test %s expected exception %s but got %s instead".format(
              name, expectedException, e.getClass))
          }
        } else {
          // we didn't expect an exception but we got one :-(
          throw new RuntimeException(e)
        }
    } finally {
      instance.shutdown()
      runningCommandLines -= instance
    }

    // catch failures from the integration test
    if (expectedException != null) {
      if (!gotAnException)
      // we expected an exception but didn't see it
        Assert.fail("Test %s expected exception %s but none was thrown".format(name, expectedException.toString))
    } else {
      if (CommandLineProgram.result != 0)
        throw new RuntimeException("Error running the GATK with arguments: " + args)
    }
  }

  Runtime.getRuntime.addShutdownHook(new Thread {
    /** Cleanup as the JVM shuts down. */
    override def run {
      try {
        ProcessController.shutdown()
      } catch {
        case _ => /*ignore */
      }
      runningCommandLines.foreach(commandLine =>
        try {
          commandLine.shutdown()
        } catch {
          case _ => /* ignore */
        })
    }
  })
}