Merge branch 'master' of github.com:broadinstitute/gsa-unstable

2013-01-07 15:00:35 -05:00 · 2013-01-07 15:00:35 -05:00 · 1a4b112865
parent a0219acfaa d3e2352072
commit 1a4b112865
3 changed files with 0 additions and 1057 deletions
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
@ -1,496 +0,0 @@
-package org.broadinstitute.sting.queue.qscripts
-
-import org.broadinstitute.sting.queue.extensions.gatk._
-import org.broadinstitute.sting.queue.QScript
-import org.broadinstitute.sting.queue.extensions.picard._
-import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
-import org.broadinstitute.sting.utils.baq.BAQ.CalculationMode
-
-import collection.JavaConversions._
-import net.sf.samtools.SAMFileReader
-import net.sf.samtools.SAMFileHeader.SortOrder
-
-import org.broadinstitute.sting.queue.util.QScriptUtils
-import org.broadinstitute.sting.queue.function.ListWriterFunction
-import org.broadinstitute.sting.commandline.Hidden
-import org.broadinstitute.sting.commandline
-
-class DataProcessingPipeline extends QScript {
-  qscript =>
-
-  /****************************************************************************
-  * Required Parameters
-  ****************************************************************************/
-
-
-  @Input(doc="input BAM file - or list of BAM files", fullName="input", shortName="i", required=true)
-  var input: File = _
-
-  @Input(doc="Reference fasta file", fullName="reference", shortName="R", required=true)
-  var reference: File = _
-
-  @Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true)
-  var dbSNP: Seq[File] = Seq()
-
-  /****************************************************************************
-  * Optional Parameters
-  ****************************************************************************/
-
-  @Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
-  var indels: Seq[File] = Seq()
-
-  @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
-  var bwaPath: File = _
-
-  @Argument(doc="the project name determines the final output (BAM file) base name. Example NA12878 yields NA12878.processed.bam", fullName="project", shortName="p", required=false)
-  var projectName: String = "project"
-
-  @Argument(doc="Output path for the processed BAM files.", fullName="output_directory", shortName="outputDir", required=false)
-  var outputDir: String = ""
-
-  @Argument(doc="the -L interval string to be used by GATK - output bams at interval only", fullName="gatk_interval_string", shortName="L", required=false)
-  var intervalString: String = ""
-
-  @Input(doc="an intervals file to be used by GATK - output bams at intervals only", fullName="gatk_interval_file", shortName="intervals", required=false)
-  var intervals: File = _
-
-  @Argument(doc="Cleaning model: KNOWNS_ONLY, USE_READS or USE_SW", fullName="clean_model", shortName="cm", required=false)
-  var cleaningModel: String = "USE_READS"
-
-  @Argument(doc="Decompose input BAM file and fully realign it using BWA and assume Single Ended reads", fullName="use_bwa_single_ended", shortName="bwase", required=false)
-  var useBWAse: Boolean = false
-
-  @Argument(doc="Decompose input BAM file and fully realign it using BWA and assume Pair Ended reads", fullName="use_bwa_pair_ended", shortName="bwape", required=false)
-  var useBWApe: Boolean = false
-
-  @Argument(doc="Decompose input BAM file and fully realign it using BWA SW", fullName="use_bwa_sw", shortName="bwasw", required=false)
-  var useBWAsw: Boolean = false
-
-  @Argument(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false)
-  var bwaThreads: Int = 1
-
-  @Argument(doc="Perform validation on the BAM files", fullName="validation", shortName="vs", required=false)
-  var validation: Boolean = false
-
-
-  /****************************************************************************
-  * Hidden Parameters
-  ****************************************************************************/
-  @Hidden
-  @Argument(doc="How many ways to scatter/gather", fullName="scatter_gather", shortName="sg", required=false)
-  var nContigs: Int = -1
-
-  @Hidden
-  @Argument(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false)
-  var defaultPlatform: String = ""
-
-  @Hidden
-  @Argument(doc="Run the pipeline in test mode only", fullName = "test_mode", shortName = "test", required=false)
-  var testMode: Boolean = false
-
-
-  /****************************************************************************
-  * Global Variables
-  ****************************************************************************/
-
-  val queueLogDir: String = ".qlog/"  // Gracefully hide Queue's output
-
-  var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS
-
-
-
-
-  /****************************************************************************
-  * Helper classes and methods
-  ****************************************************************************/
-
-  class ReadGroup (val id: String,
-                   val lb: String,
-                   val pl: String,
-                   val pu: String,
-                   val sm: String,
-                   val cn: String,
-                   val ds: String)
-  {}
-
-  // Utility function to merge all bam files of similar samples. Generates one BAM file per sample.
-  // It uses the sample information on the header of the input BAM files.
-  //
-  // Because the realignment only happens after these scripts are executed, in case you are using
-  // bwa realignment, this function will operate over the original bam files and output over the
-  // (to be realigned) bam files.
-  def createSampleFiles(bamFiles: Seq[File], realignedBamFiles: Seq[File]): Map[String, Seq[File]] = {
-
-    // Creating a table with SAMPLE information from each input BAM file
-    val sampleTable = scala.collection.mutable.Map.empty[String, Seq[File]]
-    val realignedIterator = realignedBamFiles.iterator
-    for (bam <- bamFiles) {
-      val rBam = realignedIterator.next()  // advance to next element in the realignedBam list so they're in sync.
-
-      val samReader = new SAMFileReader(bam)
-      val header = samReader.getFileHeader
-      val readGroups = header.getReadGroups
-
-      // only allow one sample per file. Bam files with multiple samples would require pre-processing of the file
-      // with PrintReads to separate the samples. Tell user to do it himself!
-      assert(!QScriptUtils.hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)
-
-      // Fill out the sample table with the readgroups in this file
-      for (rg <- readGroups) {
-        val sample = rg.getSample
-        if (!sampleTable.contains(sample))
-          sampleTable(sample) = Seq(rBam)
-        else if ( !sampleTable(sample).contains(rBam))
-          sampleTable(sample) :+= rBam
-      }
-    }
-    sampleTable.toMap
-  }
-
-  // Rebuilds the Read Group string to give BWA
-  def addReadGroups(inBam: File, outBam: File, samReader: SAMFileReader) {
-    val readGroups = samReader.getFileHeader.getReadGroups
-    var index: Int = readGroups.length
-    for (rg <- readGroups) {
-      val intermediateInBam: File = if (index == readGroups.length) { inBam } else { swapExt(outBam, ".bam", index+1 + "-rg.bam") }
-      val intermediateOutBam: File = if (index > 1) {swapExt(outBam, ".bam", index + "-rg.bam") } else { outBam}
-      val readGroup = new ReadGroup(rg.getReadGroupId, rg.getLibrary, rg.getPlatform, rg.getPlatformUnit, rg.getSample, rg.getSequencingCenter, rg.getDescription)
-      add(addReadGroup(intermediateInBam, intermediateOutBam, readGroup))
-      index = index - 1
-    }
-  }
-
-  // Takes a list of processed BAM files and realign them using the BWA option requested  (bwase or bwape).
-  // Returns a list of realigned BAM files.
-  def performAlignment(bams: Seq[File]): Seq[File] = {
-    var realignedBams: Seq[File] = Seq()
-    var index = 1
-    for (bam <- bams) {
-      // first revert the BAM file to the original qualities
-      val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai")
-      val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai")
-      val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam")
-      val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam")
-      val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam")
-
-      if (useBWAse) {
-        val revertedBAM = revertBAM(bam, true)
-        add(bwa_aln_se(revertedBAM, saiFile1),
-            bwa_sam_se(revertedBAM, saiFile1, realignedSamFile))
-      }
-      else if (useBWApe) {
-        val revertedBAM = revertBAM(bam, true)
-        add(bwa_aln_pe(revertedBAM, saiFile1, 1),
-            bwa_aln_pe(revertedBAM, saiFile2, 2),
-            bwa_sam_pe(revertedBAM, saiFile1, saiFile2, realignedSamFile))
-      }
-      else if (useBWAsw) {
-        val revertedBAM = revertBAM(bam, false)
-        val fastQ = swapExt(revertedBAM, ".bam", ".fq")
-        add(convertToFastQ(revertedBAM, fastQ),
-            bwa_sw(fastQ, realignedSamFile))
-      }
-      add(sortSam(realignedSamFile, realignedBamFile, SortOrder.coordinate))
-      addReadGroups(realignedBamFile, rgRealignedBamFile, new SAMFileReader(bam))
-      realignedBams :+= rgRealignedBamFile
-      index = index + 1
-    }
-    realignedBams
-  }
-
-  def getIndelCleaningModel: ConsensusDeterminationModel = {
-    if (cleaningModel == "KNOWNS_ONLY")
-      ConsensusDeterminationModel.KNOWNS_ONLY
-    else if (cleaningModel == "USE_SW")
-      ConsensusDeterminationModel.USE_SW
-    else
-      ConsensusDeterminationModel.USE_READS
-  }
-
-  def revertBams(bams: Seq[File], removeAlignmentInformation: Boolean): Seq[File] = {
-    var revertedBAMList: Seq[File] = Seq()
-    for (bam <- bams)
-      revertedBAMList :+= revertBAM(bam, removeAlignmentInformation)
-    revertedBAMList
-  }
-
-  def revertBAM(bam: File, removeAlignmentInformation: Boolean): File = {
-    val revertedBAM = swapExt(bam, ".bam", ".reverted.bam")
-    add(revert(bam, revertedBAM, removeAlignmentInformation))
-    revertedBAM
-  }
-
-  /****************************************************************************
-  * Main script
-  ****************************************************************************/
-
-
-  def script() {
-    // final output list of processed bam files
-    var cohortList: Seq[File] = Seq()
-
-    // sets the model for the Indel Realigner
-    cleanModelEnum = getIndelCleaningModel
-
-    // keep a record of the number of contigs in the first bam file in the list
-    val bams = QScriptUtils.createSeqFromFile(input)
-    if (nContigs < 0)
-     nContigs = QScriptUtils.getNumberOfContigs(bams(0))
-
-    val realignedBAMs = if (useBWApe || useBWAse  || useBWAsw) {performAlignment(bams)} else {revertBams(bams, false)}
-
-    // generate a BAM file per sample joining all per lane files if necessary
-    val sampleBAMFiles: Map[String, Seq[File]] = createSampleFiles(bams, realignedBAMs)
-
-    // if this is a 'knowns only' indel realignment run, do it only once for all samples.
-    val globalIntervals = new File(outputDir + projectName + ".intervals")
-    if (cleaningModel == ConsensusDeterminationModel.KNOWNS_ONLY)
-      add(target(null, globalIntervals))
-
-    // put each sample through the pipeline
-    for ((sample, bamList) <- sampleBAMFiles) {
-
-      // BAM files generated by the pipeline
-      val bam        = new File(qscript.projectName + "." + sample + ".bam")
-      val cleanedBam = swapExt(bam, ".bam", ".clean.bam")
-      val dedupedBam = swapExt(bam, ".bam", ".clean.dedup.bam")
-      val recalBam   = swapExt(bam, ".bam", ".clean.dedup.recal.bam")
-
-      // Accessory files
-      val targetIntervals = if (cleaningModel == ConsensusDeterminationModel.KNOWNS_ONLY) {globalIntervals} else {swapExt(bam, ".bam", ".intervals")}
-      val metricsFile     = swapExt(bam, ".bam", ".metrics")
-      val preRecalFile    = swapExt(bam, ".bam", ".pre_recal.table")
-      val postRecalFile   = swapExt(bam, ".bam", ".post_recal.table")
-      val preOutPath      = swapExt(bam, ".bam", ".pre")
-      val postOutPath     = swapExt(bam, ".bam", ".post")
-      val preValidateLog  = swapExt(bam, ".bam", ".pre.validation")
-      val postValidateLog = swapExt(bam, ".bam", ".post.validation")
-
-
-      // Validation is an optional step for the BAM file generated after
-      // alignment and the final bam file of the pipeline.
-      if (validation) {
-        for (sampleFile <- bamList)
-        add(validate(sampleFile, preValidateLog),
-            validate(recalBam, postValidateLog))
-      }
-
-      if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
-        add(target(bamList, targetIntervals))
-
-      add(clean(bamList, targetIntervals, cleanedBam),
-          dedup(cleanedBam, dedupedBam, metricsFile),
-          cov(dedupedBam, preRecalFile),
-          recal(dedupedBam, preRecalFile, recalBam),
-          cov(recalBam, postRecalFile))
-
-
-      cohortList :+= recalBam
-    }
-
-    // output a BAM list with all the processed per sample files
-    val cohortFile = new File(qscript.outputDir + qscript.projectName + ".cohort.list")
-    add(writeList(cohortList, cohortFile))
-  }
-
-
-
-  /****************************************************************************
-  * Classes (GATK Walkers)
-  ****************************************************************************/
-
-
-
-  // General arguments to non-GATK tools
-  trait ExternalCommonArgs extends CommandLineFunction {
-    this.memoryLimit = 4
-    this.isIntermediate = true
-  }
-
-  // General arguments to GATK walkers
-  trait CommandLineGATKArgs extends CommandLineGATK with ExternalCommonArgs {
-    this.reference_sequence = qscript.reference
-  }
-
-  trait SAMargs extends PicardBamFunction with ExternalCommonArgs {
-      this.maxRecordsInRam = 100000
-  }
-
-  case class target (inBams: Seq[File], outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
-    if (cleanModelEnum != ConsensusDeterminationModel.KNOWNS_ONLY)
-      this.input_file = inBams
-    this.out = outIntervals
-    this.mismatchFraction = 0.0
-    this.known ++= qscript.dbSNP
-    if (indels != null)
-      this.known ++= qscript.indels
-    this.scatterCount = nContigs
-    this.analysisName = queueLogDir + outIntervals + ".target"
-    this.jobName = queueLogDir + outIntervals + ".target"
-  }
-
-  case class clean (inBams: Seq[File], tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
-    this.input_file = inBams
-    this.targetIntervals = tIntervals
-    this.out = outBam
-    this.known ++= qscript.dbSNP
-    if (qscript.indels != null)
-      this.known ++= qscript.indels
-    this.consensusDeterminationModel = cleanModelEnum
-    this.compress = 0
-    this.noPGTag = qscript.testMode;
-    this.scatterCount = nContigs
-    this.analysisName = queueLogDir + outBam + ".clean"
-    this.jobName = queueLogDir + outBam + ".clean"
-  }
-
-  case class cov (inBam: File, outRecalFile: File) extends BaseRecalibrator with CommandLineGATKArgs {
-    this.knownSites ++= qscript.dbSNP
-    this.covariate ++= Seq("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "ContextCovariate")
-    this.input_file :+= inBam
-    this.disable_indel_quals = true
-    this.out = outRecalFile
-    if (!defaultPlatform.isEmpty) this.default_platform = defaultPlatform
-    if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
-    else if (qscript.intervals != null) this.intervals :+= qscript.intervals
-    this.scatterCount = nContigs
-    this.analysisName = queueLogDir + outRecalFile + ".covariates"
-    this.jobName = queueLogDir + outRecalFile + ".covariates"
-  }
-
-  case class recal (inBam: File, inRecalFile: File, outBam: File) extends PrintReads with CommandLineGATKArgs {
-    this.input_file :+= inBam
-    this.BQSR = inRecalFile
-    this.baq = CalculationMode.CALCULATE_AS_NECESSARY
-    this.out = outBam
-    if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
-    else if (qscript.intervals != null) this.intervals :+= qscript.intervals
-    this.scatterCount = nContigs
-    this.isIntermediate = false
-    this.analysisName = queueLogDir + outBam + ".recalibration"
-    this.jobName = queueLogDir + outBam + ".recalibration"
-  }
-
-
-
-  /****************************************************************************
-   * Classes (non-GATK programs)
-   ****************************************************************************/
-
-
-  case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs {
-    this.input :+= inBam
-    this.output = outBam
-    this.metrics = metricsFile
-    this.memoryLimit = 16
-    this.analysisName = queueLogDir + outBam + ".dedup"
-    this.jobName = queueLogDir + outBam + ".dedup"
-  }
-
-  case class joinBams (inBams: Seq[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
-    this.input = inBams
-    this.output = outBam
-    this.analysisName = queueLogDir + outBam + ".joinBams"
-    this.jobName = queueLogDir + outBam + ".joinBams"
-  }
-
-  case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs {
-    this.input :+= inSam
-    this.output = outBam
-    this.sortOrder = sortOrderP
-    this.analysisName = queueLogDir + outBam + ".sortSam"
-    this.jobName = queueLogDir + outBam + ".sortSam"
-  }
-
-  case class validate (inBam: File, outLog: File) extends ValidateSamFile with ExternalCommonArgs {
-    this.input :+= inBam
-    this.output = outLog
-    this.REFERENCE_SEQUENCE = qscript.reference
-    this.isIntermediate = false
-    this.analysisName = queueLogDir + outLog + ".validate"
-    this.jobName = queueLogDir + outLog + ".validate"
-  }
-
-
-  case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups with ExternalCommonArgs {
-    this.input :+= inBam
-    this.output = outBam
-    this.RGID = readGroup.id
-    this.RGCN = readGroup.cn
-    this.RGDS = readGroup.ds
-    this.RGLB = readGroup.lb
-    this.RGPL = readGroup.pl
-    this.RGPU = readGroup.pu
-    this.RGSM = readGroup.sm
-    this.analysisName = queueLogDir + outBam + ".rg"
-    this.jobName = queueLogDir + outBam + ".rg"
-  }
-
-  case class revert (inBam: File, outBam: File, removeAlignmentInfo: Boolean) extends RevertSam with ExternalCommonArgs {
-    this.output = outBam
-    this.input :+= inBam
-    this.removeAlignmentInformation = removeAlignmentInfo;
-    this.sortOrder = if (removeAlignmentInfo) {SortOrder.queryname} else {SortOrder.coordinate}
-    this.analysisName = queueLogDir + outBam + "revert"
-    this.jobName = queueLogDir + outBam + ".revert"
-  }
-
-  case class convertToFastQ (inBam: File, outFQ: File) extends SamToFastq with ExternalCommonArgs {
-    this.input :+= inBam
-    this.fastq = outFQ
-    this.analysisName = queueLogDir + outFQ + "convert_to_fastq"
-    this.jobName = queueLogDir + outFQ + ".convert_to_fastq"
-  }
-
-  case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs {
-    @Input(doc="bam file to be aligned") var bam = inBam
-    @Output(doc="output sai file") var sai = outSai
-    def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai
-    this.analysisName = queueLogDir + outSai + ".bwa_aln_se"
-    this.jobName = queueLogDir + outSai + ".bwa_aln_se"
-  }
-
-  case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs {
-    @Input(doc="bam file to be aligned") var bam = inBam
-    @Output(doc="output sai file for 1st mating pair") var sai = outSai1
-    def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai
-    this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1"
-    this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1"
-  }
-
-  case class bwa_sam_se (inBam: File, inSai: File, outBam: File) extends CommandLineFunction with ExternalCommonArgs {
-    @Input(doc="bam file to be aligned") var bam = inBam
-    @Input(doc="bwa alignment index file") var sai = inSai
-    @Output(doc="output aligned bam file") var alignedBam = outBam
-    def commandLine = bwaPath + " samse " + reference + " " + sai + " " + bam + " > " + alignedBam
-    this.memoryLimit = 6
-    this.analysisName = queueLogDir + outBam + ".bwa_sam_se"
-    this.jobName = queueLogDir + outBam + ".bwa_sam_se"
-  }
-
-  case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File) extends CommandLineFunction with ExternalCommonArgs {
-    @Input(doc="bam file to be aligned") var bam = inBam
-    @Input(doc="bwa alignment index file for 1st mating pair") var sai1 = inSai1
-    @Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2
-    @Output(doc="output aligned bam file") var alignedBam = outBam
-    def commandLine = bwaPath + " sampe " + reference + " " + sai1 + " " + sai2 + " " + bam + " " + bam + " > " + alignedBam
-    this.memoryLimit = 6
-    this.analysisName = queueLogDir + outBam + ".bwa_sam_pe"
-    this.jobName = queueLogDir + outBam + ".bwa_sam_pe"
-  }
-
-  case class bwa_sw (inFastQ: File, outBam: File) extends CommandLineFunction with ExternalCommonArgs {
-    @Input(doc="fastq file to be aligned") var fq = inFastQ
-    @Output(doc="output bam file") var bam = outBam
-    def commandLine = bwaPath + " bwasw -t " + bwaThreads + " " + reference + " " + fq + " > " + bam
-    this.analysisName = queueLogDir + outBam + ".bwasw"
-    this.jobName = queueLogDir + outBam + ".bwasw"
-  }
-
-  case class writeList(inBams: Seq[File], outBamList: File) extends ListWriterFunction {
-    this.inputFiles = inBams
-    this.listFile = outBamList
-    this.analysisName = queueLogDir + outBamList + ".bamList"
-    this.jobName = queueLogDir + outBamList + ".bamList"
-  }
-}
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala
@ -1,373 +0,0 @@
-package org.broadinstitute.sting.queue.qscripts
-
-import org.broadinstitute.sting.queue.extensions.gatk._
-import org.broadinstitute.sting.queue.QScript
-import org.broadinstitute.sting.gatk.phonehome.GATKRunReport
-
-class MethodsDevelopmentCallingPipeline extends QScript {
-  qscript =>
-
-  @Argument(shortName="outputDir", doc="output directory", required=false)
-  var outputDir: String = "./"
-
-  @Argument(shortName="skipCalling", doc="skip the calling part of the pipeline and only run VQSR on preset, gold standard VCF files", required=false)
-  var skipCalling: Boolean = false
-
-  @Argument(shortName="dataset", doc="selects the datasets to run. If not provided, all datasets will be used", required=false)
-  var datasets: List[String] = Nil
-
-  @Argument(shortName="runGoldStandard", doc="run the pipeline with the goldstandard VCF files for comparison", required=false)
-  var runGoldStandard: Boolean = false
-
-  @Argument(shortName="noBAQ", doc="turns off BAQ calculation", required=false)
-  var noBAQ: Boolean = false
-
-  @Argument(shortName="noIndels", doc="do not call indels with the Unified Genotyper", required=false)
-  var noIndels: Boolean = false
-
-  @Argument(shortName="mbq", doc="The minimum Phred-Scaled quality score threshold to be considered a good base.", required=false)
-  var minimumBaseQuality: Int = -1
-
-  @Argument(shortName="deletions", doc="Maximum deletion fraction allowed at a site to call a genotype.", required=false)
-  var deletions: Double = -1
-
-  @Argument(shortName="sample", doc="Samples to include in Variant Eval", required=false)
-  var samples: List[String] = Nil
-
-  class Target(
-          val baseName: String,
-          val reference: File,
-          val dbsnpFile: String,
-          val hapmapFile: String,
-          val maskFile: String,
-          val bamList: File,
-          val goldStandard_VCF: File,
-          val intervals: String,
-          val indelTranchTarget: Double,
-          val snpTrancheTarget: Double,
-          val isLowpass: Boolean,
-          val isExome: Boolean,
-          val nSamples: Int) {
-    val name = qscript.outputDir + baseName
-    val clusterFile = new File(name + ".clusters")
-    val rawSnpVCF = new File(name + ".raw.vcf")
-    val rawIndelVCF = new File(name + ".raw.indel.vcf")
-    val filteredIndelVCF = new File(name + ".filtered.indel.vcf")
-    val recalibratedSnpVCF = new File(name + ".snp.recalibrated.vcf")
-    val recalibratedIndelVCF = new File(name + ".indel.recalibrated.vcf")
-    val tranchesSnpFile = new File(name + ".snp.tranches")
-    val tranchesIndelFile = new File(name + ".indel.tranches")
-    val vqsrSnpRscript = name + ".snp.vqsr.r"
-    val vqsrIndelRscript = name + ".indel.vqsr.r"
-    val recalSnpFile = new File(name + ".snp.tranches.recal")
-    val recalIndelFile = new File(name + ".indel.tranches.recal")
-    val goldStandardRecalibratedVCF = new File(name + "goldStandard.recalibrated.vcf")
-    val goldStandardTranchesFile = new File(name + "goldStandard.tranches")
-    val goldStandardRecalFile = new File(name + "goldStandard.tranches.recal")
-    val evalFile = new File(name + ".snp.eval")
-    val evalIndelFile = new File(name + ".indel.eval")
-    val goldStandardName = qscript.outputDir + "goldStandard/" + baseName
-    val goldStandardClusterFile = new File(goldStandardName + ".clusters")
-  }
-
-  val b37_decoy = new File("/humgen/1kg/reference/human_g1k_v37_decoy.fasta")
-  val hg19 = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
-  val hg18 = new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")
-  val b36 = new File("/humgen/1kg/reference/human_b36_both.fasta")
-  val b37 = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
-  val dbSNP_hg18_129 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_129_hg18.rod"            // Special case for NA12878 collections that can't use 132 because they are part of it.
-  val dbSNP_b36 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_132.b36.excluding_sites_after_129.vcf"
-  val dbSNP_b37 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_129_b37.leftAligned.vcf"              // Special case for NA12878 collections that can't use 132 because they are part of it.
-  val hapmap_hg18 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/sites_r27_nr.hg18_fwd.vcf"
-  val hapmap_b36 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/sites_r27_nr.b36_fwd.vcf"
-  val hapmap_b37 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf"
-  val training_hapmap_b37 = "/humgen/1kg/processing/pipeline_test_bams/hapmap3.3_training_chr20.vcf"
-  val omni_b36 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Omni2.5_chip/Omni25_sites_1525_samples.b36.vcf"
-  val omni_b37 = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf"
-  val indelMask_b36 = "/humgen/1kg/processing/pipeline_test_bams/pilot1.dindel.mask.b36.bed"
-  val indelMask_b37 = "/humgen/1kg/processing/pipeline_test_bams/pilot1.dindel.mask.b37.bed"
-  val training_1000G = "/humgen/1kg/processing/official_release/phase1/projectConsensus/phase1.wgs.projectConsensus.v2b.recal.highQuality.vcf"
-  val badSites_1000G = "/humgen/1kg/processing/official_release/phase1/projectConsensus/phase1.wgs.projectConsensus.v2b.recal.terrible.vcf"
-  val projectConsensus_1000G = "/humgen/1kg/processing/official_release/phase1/projectConsensus/ALL.wgs.projectConsensus_v2b.20101123.snps.sites.vcf"
-  val indelGoldStandardCallset = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf"
-
-  val lowPass: Boolean = true
-  val exome: Boolean = true
-  val indels: Boolean = true
-
-  val queueLogDir = ".qlog/"
-
-  // BUGBUG: We no longer support b36/hg18 because several of the necessary files aren't available aligned to those references
-
-  val targetDataSets: Map[String, Target] = Map(
-    "NA12878_gold" -> new Target("NA12878.goldStandard", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/data/goldStandard.list"),
-              new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"),          // ** There is no gold standard for the gold standard **
-              "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 90.0, 99.0, lowPass, !exome, 391),
-    "NA12878_wgs_b37" -> new Target("NA12878.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.bam"),
-              new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"),
-              "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 1),
-    "NA12878_wgs_decoy" -> new Target("NA12878.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.NA12878.clean.dedup.recal.bam"),
-              new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"),          // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
-              "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 1),
-    "NA12878_wgs_hg18" -> new Target("NA12878.HiSeq.WGS.hg18", hg18, dbSNP_hg18_129, hapmap_hg18,
-              "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.indels.10.mask",
-              new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam"),
-              new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.vcf"),
-              "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg18.intervals", 90.0, 99.0, !lowPass, !exome, 1),
-    "NA12878_wex_b37" -> new Target("NA12878.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/seq/picard_aggregation/C339/NA12878/v3/NA12878.bam"),
-              new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"),                  // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
-              "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 1),
-    "NA12878_wex_hg18" -> new Target("NA12878.HiSeq.WEx.hg18", hg18, dbSNP_hg18_129, hapmap_hg18,
-              "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.indels.10.mask",
-              new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.WEx.cleaned.recal.bam"),
-              new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.vcf"),
-              "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list", 90.0, 98.0, !lowPass, exome, 1),
-    "NA12878_wex_decoy" -> new Target("NA12878.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.NA12878.clean.dedup.recal.bam"),
-              new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"),                  // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
-              "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 1),
-    "CEUTrio_wex_b37" -> new Target("CEUTrio.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.bwa.cleaned.recal.bam"),
-              new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"),
-              "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 3),
-    "CEUTrio_wgs_b37" -> new Target("CEUTrio.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.bwa.cleaned.recal.bam"),
-              new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"),                  // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
-              "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 3),
-    "CEUTrio_wex_decoy" -> new Target("CEUTrio.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.list"),
-              new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"),                  // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
-              "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 3),
-    "CEUTrio_wgs_decoy" -> new Target("CEUTrio.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.list"),
-              new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"),                  // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
-              "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 3),
-    "GA2hg19" -> new Target("NA12878.GA2.hg19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.GA2.WGS.bwa.cleaned.hg19.bam"),
-              new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.GA2.hg19.filtered.vcf"),
-              "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 1),
-    "FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"),
-              new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"),         // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
-              "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 90.0, 99.0, lowPass, !exome, 79),
-    "TGPWExGdA" -> new Target("1000G.WEx.GdA", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/humgen/1kg/processing/pipeline_test_bams/Barcoded_1000G_WEx_Reduced_Plate_1.cleaned.list"),        // BUGBUG: reduce from 60 to 20 people
-              new File("/humgen/gsa-scr1/delangel/NewUG/calls/AugustRelease.filtered_Q50_QD5.0_SB0.0.allSamples.SNPs_hg19.WEx_UG_newUG_MQC.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
-              "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 99.0, !lowPass, exome, 96),
-    "LowPassN60" -> new Target("lowpass.N60", b36, dbSNP_b36, hapmap_b36, indelMask_b36,
-              new File("/humgen/1kg/analysis/bamsForDataProcessingPapers/lowpass_b36/lowpass.chr20.cleaned.matefixed.bam"), // the bam list to call from
-              new File("/home/radon01/depristo/work/oneOffProjects/VQSRCutByNRS/lowpass.N60.chr20.filtered.vcf"),           // the gold standard VCF file to run through the VQSR
-              "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.b36.intervals", 90.0, 99.0, lowPass, !exome, 60),          // chunked interval list to use with Queue's scatter/gather functionality
-    "LowPassEUR363Nov" -> new Target("EUR.nov2010", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
-              new File("/humgen/1kg/processing/pipeline_test_bams/EUR.363sample.Nov2010.chr20.bam"),
-              new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"),         // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
-              "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 90.0, 99.0, lowPass, !exome, 363)
-  )
-
-
-  def script = {
-
-    // Selects the datasets in the -dataset argument and adds them to targets.
-    var targets: List[Target] = List()
-    if (!datasets.isEmpty)
-      for (ds <- datasets)
-        targets ::= targetDataSets(ds)
-    else                                                // If -dataset is not specified, all datasets are used.
-      for (targetDS <- targetDataSets.valuesIterator)
-        targets ::= targetDS
-
-    val goldStandard = true
-    for (target <- targets) {
-      if( !skipCalling ) {
-        if (!noIndels) add(new indelCall(target), new indelRecal(target), new indelCut(target), new indelEvaluation(target))
-        add(new snpCall(target))
-        add(new snpRecal(target, !goldStandard))
-        add(new snpCut(target, !goldStandard))
-        add(new snpEvaluation(target))
-      }
-      if ( runGoldStandard ) {
-        add(new snpRecal(target, goldStandard))
-        add(new snpCut(target, goldStandard))
-      }
-    }
-  }
-
-
-  trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
-    logging_level = "INFO";
-    memoryLimit = 4;
-    phone_home = GATKRunReport.PhoneHomeOption.NO_ET
-  }
-
-  def bai(bam: File) = new File(bam + ".bai")
-
-  // 1.) Unified Genotyper Base
-  class GenotyperBase (t: Target) extends UnifiedGenotyper with UNIVERSAL_GATK_ARGS {
-    this.reference_sequence = t.reference
-    this.intervalsString ++= List(t.intervals)
-    this.scatterCount = 140
-    this.nt = 2
-    this.dcov = if ( t.isLowpass ) { 50 } else { 250 }
-    this.stand_call_conf = if ( t.isLowpass ) { 4.0 } else { 30.0 }
-    this.stand_emit_conf = if ( t.isLowpass ) { 4.0 } else { 30.0 }
-    this.input_file :+= t.bamList
-    this.D = new File(t.dbsnpFile)
-  }
-
-  // 1a.) Call SNPs with UG
-  class snpCall (t: Target) extends GenotyperBase(t) {
-    if (minimumBaseQuality >= 0)
-      this.min_base_quality_score = minimumBaseQuality
-    if (qscript.deletions >= 0)
-      this.max_deletion_fraction = qscript.deletions
-    this.out = t.rawSnpVCF
-    this.glm = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP
-    this.baq = if (noBAQ ||  t.isExome) {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF} else {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY}
-    this.analysisName = t.name + "_UGs"
-    this.jobName =  queueLogDir + t.name + ".snpcall"
-  }
-
-  // 1b.) Call Indels with UG
-  class indelCall (t: Target) extends GenotyperBase(t) {
-    this.memoryLimit = 6
-    this.out = t.rawIndelVCF
-    this.glm = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.INDEL
-    this.baq = org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF
-    this.analysisName = t.name + "_UGi"
-    this.jobName =  queueLogDir + t.name + ".indelcall"
-  }
-
-  // 2.) Hard Filtering for indels
-  class indelFilter (t: Target) extends VariantFiltration with UNIVERSAL_GATK_ARGS {
-    this.memoryLimit = 2
-    this.reference_sequence = t.reference
-    this.intervalsString ++= List(t.intervals)
-    this.scatterCount = 10
-    this.V = t.rawIndelVCF
-    this.out = t.filteredIndelVCF
-    this.filterName ++= List("IndelQD", "IndelReadPosRankSum", "IndelFS")
-    this.filterExpression ++= List("QD < 2.0", "ReadPosRankSum < -20.0", "FS > 200.0")
-    if (t.nSamples >= 10) {
-        this.filterName ++= List("IndelInbreedingCoeff")
-        this.filterExpression ++= List("InbreedingCoeff < -0.8")
-    }
-    this.analysisName = t.name + "_VF"
-    this.jobName =  queueLogDir + t.name + ".indelfilter"
-  }
-
-  class VQSRBase(t: Target) extends VariantRecalibrator with UNIVERSAL_GATK_ARGS {
-    this.nt = 2
-    this.reference_sequence = t.reference
-    this.intervalsString ++= List(t.intervals)
-    this.allPoly = true
-    this.tranche ++= List("100.0", "99.9", "99.5", "99.3", "99.0", "98.9", "98.8", "98.5", "98.4", "98.3", "98.2", "98.1", "98.0", "97.9", "97.8", "97.5", "97.0", "95.0", "90.0")  
-  }
-  
-  class snpRecal(t: Target, goldStandard: Boolean) extends VQSRBase(t) with UNIVERSAL_GATK_ARGS {
-    this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawSnpVCF } )
-    this.resource :+= new TaggedFile( t.hapmapFile, "known=false,training=true,truth=true,prior=15.0" )
-    this.resource :+= new TaggedFile( omni_b37, "known=false,training=true,truth=true,prior=12.0" )
-    this.resource :+= new TaggedFile( training_1000G, "known=false,training=true,prior=10.0" )
-    this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
-    this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" )
-    this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS")
-    if(t.nSamples >= 10)
-        this.use_annotation ++= List("InbreedingCoeff")  // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
-    if(!t.isExome) 
-        this.use_annotation ++= List("DP")
-    else {                                               // exome specific parameters
-        this.resource :+= new TaggedFile( badSites_1000G, "bad=true,prior=2.0" )
-        this.mG = 6
-        if(t.nSamples <= 3) {                            // very few exome samples means very few variants
-            this.mG = 4
-            this.percentBad = 0.04
-        }
-    }
-    this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesSnpFile }
-    this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalSnpFile }
-    this.rscript_file = t.vqsrSnpRscript
-    this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
-    this.analysisName = t.name + "_VQSRs"
-    this.jobName = queueLogDir + t.name + ".snprecal"
-  }
-
-  class indelRecal(t: Target) extends VQSRBase(t) with UNIVERSAL_GATK_ARGS {
-    this.input :+= t.rawIndelVCF
-    this.resource :+= new TaggedFile(indelGoldStandardCallset, "known=false,training=true,truth=true,prior=12.0" )
-    this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
-    this.use_annotation ++= List("QD", "HaplotypeScore", "ReadPosRankSum", "FS")
-    if(t.nSamples >= 10)
-      this.use_annotation ++= List("InbreedingCoeff")  // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
-    this.tranches_file = t.tranchesIndelFile
-    this.recal_file = t.recalIndelFile
-    this.rscript_file = t.vqsrIndelRscript
-    this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
-    this.analysisName = t.name + "_VQSRi"
-    this.jobName = queueLogDir + t.name + ".indelrecal"
-  }
-
-  
-  // 4.) Apply the recalibration table to the appropriate tranches
-  class applyVQSRBase (t: Target) extends ApplyRecalibration with UNIVERSAL_GATK_ARGS {
-    this.memoryLimit = 6
-    this.reference_sequence = t.reference
-    this.intervalsString ++= List(t.intervals)
-  }
-
-  class snpCut (t: Target, goldStandard: Boolean) extends applyVQSRBase(t) {
-    this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawSnpVCF } )
-    this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesSnpFile}
-    this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalSnpFile }
-    this.ts_filter_level = t.snpTrancheTarget
-    this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
-    this.out = t.recalibratedSnpVCF
-    this.analysisName = t.name + "_AVQSRs"
-    this.jobName = queueLogDir + t.name + ".snpcut"
-  }
-
-  class indelCut (t: Target) extends applyVQSRBase(t) {
-    this.input :+= t.rawIndelVCF
-    this.tranches_file = t.tranchesIndelFile
-    this.recal_file = t.recalIndelFile
-    this.ts_filter_level = t.indelTranchTarget
-    this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
-    this.out = t.recalibratedIndelVCF
-    this.analysisName = t.name + "_AVQSRi"
-    this.jobName = queueLogDir + t.name + ".indelcut"
-  }
-
-
-  // 5.) Variant Evaluation Base(OPTIONAL)
-  class EvalBase(t: Target) extends VariantEval with UNIVERSAL_GATK_ARGS {
-    this.memoryLimit = 3
-    this.comp :+= new TaggedFile(t.hapmapFile, "hapmap" )
-    this.D = new File(t.dbsnpFile)
-    this.reference_sequence = t.reference
-    this.intervalsString ++= List(t.intervals)
-    this.sample = samples
-  }
-
-  // 5a.) SNP Evaluation (OPTIONAL) based on the cut vcf
-  class snpEvaluation(t: Target) extends EvalBase(t) {
-    if (t.reference == b37 || t.reference == hg19) this.comp :+= new TaggedFile( omni_b37, "omni" )
-    this.eval :+= t.recalibratedSnpVCF
-    this.out =  t.evalFile
-    this.analysisName = t.name + "_VEs"
-    this.jobName = queueLogDir + t.name + ".snpeval"
-  }
-
-  // 5b.) Indel Evaluation (OPTIONAL)
-  class indelEvaluation(t: Target) extends EvalBase(t) {
-    this.eval :+= t.recalibratedIndelVCF
-    this.comp :+= new TaggedFile(indelGoldStandardCallset, "indelGS" )
-    this.noEV = true
-    this.evalModule = List("CompOverlap", "CountVariants", "TiTvVariantEvaluator", "ValidationReport", "IndelStatistics")
-    this.out =  t.evalIndelFile
-    this.analysisName = t.name + "_VEi"
-    this.jobName = queueLogDir + queueLogDir + t.name + ".indeleval"
-  }
-}
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala
@ -1,188 +0,0 @@
-package org.broadinstitute.sting.queue.qscripts
-
-import org.broadinstitute.sting.queue.QScript
-import org.broadinstitute.sting.queue.util.QScriptUtils
-import net.sf.samtools.SAMFileHeader.SortOrder
-import org.broadinstitute.sting.utils.exceptions.UserException
-import org.broadinstitute.sting.commandline.Hidden
-import org.broadinstitute.sting.queue.extensions.picard.{ReorderSam, SortSam, AddOrReplaceReadGroups}
-import org.broadinstitute.sting.queue.extensions.gatk._
-
-/**
- * Created by IntelliJ IDEA.
- * User: carneiro
- * Date: 4/20/11
- * Time: 16:29 PM
- */
-
-
-class PacbioProcessingPipeline extends QScript {
-
-  @Input(doc="input FASTA/FASTQ/BAM file - or list of FASTA/FASTQ/BAM files. ", shortName="i", required=true)
-  var input: File = _
-
-  @Input(doc="Reference fasta file", shortName="R", required=true)
-  var reference: File = _
-
-  @Input(doc="dbsnp VCF file to use ", shortName="D", required=true)
-  var dbSNP: File = _
-
-  @Argument(doc="Number of jobs to scatter/gather. Default: 0." , shortName = "sg", required=false)
-  var threads: Int = 0
-
-  @Argument(doc="Sample Name to fill in the Read Group information (only necessary if using fasta/fastq)" , shortName = "sn", required=false)
-  var sample: String = "NA"
-
-  @Input(doc="The path to the binary of bwa to align fasta/fastq files", fullName="path_to_bwa", shortName="bwa", required=false)
-  var bwaPath: File = _
-
-  @Argument(doc="Input is a BLASR generated BAM file", shortName = "blasr", fullName="blasr_bam", required=false)
-  var BLASR_BAM: Boolean = false
-
-  @Hidden
-  @Argument(doc="The default base qualities to use before recalibration. Default is Q20 (should be good for every dataset)." , shortName = "dbq", required=false)
-  var dbq: Int = 20
-
-  @Hidden
-  @Argument(shortName="bwastring", required=false)
-  var bwastring: String = ""
-
-  @Hidden
-  @Argument(shortName = "test", fullName = "test_mode", required = false)
-  var testMode: Boolean = false
-
-  val queueLogDir: String = ".qlog/"
-
-  def script() {
-
-    val fileList: Seq[File] = QScriptUtils.createSeqFromFile(input)
-
-    for (file: File <- fileList) {
-
-      var USE_BWA: Boolean = false
-      var resetQuals: Boolean = true
-
-      if (file.endsWith(".fasta") || file.endsWith(".fq") || file.endsWith(".fastq")) {
-        if (bwaPath == null) {
-          throw new UserException("You provided a fasta/fastq file but didn't provide the path for BWA");
-        }
-        USE_BWA = true
-        if (file.endsWith(".fq") || file.endsWith(".fastq"))
-          resetQuals = false
-      }
-
-      // FASTA -> BAM steps
-      val alignedSam: File = file.getName + ".aligned.sam"
-      val sortedBam: File = swapExt(alignedSam, ".sam", ".bam")
-      val rgBam: File = swapExt(file, ".bam", ".rg.bam")
-
-      val bamBase = if (USE_BWA) {rgBam} else {file}
-
-      // BAM Steps
-      val mqBAM: File = swapExt(bamBase, ".bam", ".mq.bam")
-      val recalFile1: File = swapExt(bamBase, ".bam", ".recal1.table")
-      val recalFile2: File = swapExt(bamBase, ".bam", ".recal2.table")
-      val recalBam: File   = swapExt(bamBase, ".bam", ".recal.bam")
-      val path1: String    = recalBam + ".before"
-      val path2: String    = recalBam + ".after"
-
-      if (USE_BWA) {
-        add(align(file, alignedSam),
-            sortSam(alignedSam, sortedBam),
-            addReadGroup(sortedBam, rgBam, sample))
-      }
-
-      else if (BLASR_BAM) {
-        val reorderedBAM = swapExt(bamBase, ".bam", ".reordered.bam")
-        add(reorder(bamBase, reorderedBAM),
-            changeMQ(reorderedBAM, mqBAM))
-      }
-
-      val bam = if (BLASR_BAM) {mqBAM} else {bamBase}
-
-      add(cov(bam, recalFile1, resetQuals),
-          recal(bam, recalFile1, recalBam),
-          cov(recalBam, recalFile2, false))
-    }
-  }
-
-
-  // General arguments to non-GATK tools
-  trait ExternalCommonArgs extends CommandLineFunction {
-    this.memoryLimit = 4
-    this.isIntermediate = true
-  }
-
-  trait CommandLineGATKArgs extends CommandLineGATK with ExternalCommonArgs {
-    this.reference_sequence = reference
-  }
-
-
-  case class align(@Input inFastq: File, @Output outSam: File) extends ExternalCommonArgs {
-    def commandLine = bwaPath + " bwasw -b5 -q2 -r1 -z20 -t16 " + reference + " " + inFastq + " > " + outSam
-    this.memoryLimit = 8
-    this.analysisName = queueLogDir + outSam + ".bwa_sam_se"
-    this.jobName = queueLogDir + outSam + ".bwa_sam_se"
-  }
-
-  case class sortSam (inSam: File, outBam: File) extends SortSam with ExternalCommonArgs {
-    this.input = List(inSam)
-    this.output = outBam
-    this.memoryLimit = 8
-    this.sortOrder = SortOrder.coordinate
-    this.analysisName = queueLogDir + outBam + ".sortSam"
-    this.jobName = queueLogDir + outBam + ".sortSam"
-  }
-
-  case class reorder (inSam: File, outSam: File) extends ReorderSam with ExternalCommonArgs {
-    this.input = List(inSam)
-    this.output = outSam
-    this.sortReference = reference
-  }
-
-  case class changeMQ(inBam: File, outBam: File) extends PrintReads with CommandLineGATKArgs {
-    this.input_file :+= inBam
-    this.out = outBam
-    this.read_filter :+= "ReassignMappingQuality"
-  }
-
-  case class addReadGroup (inBam: File, outBam: File, sample: String) extends AddOrReplaceReadGroups with ExternalCommonArgs {
-    @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
-    this.input = List(inBam)
-    this.output = outBam
-    this.RGID = "1"
-    this.RGCN = "BI"
-    this.RGPL = "PacBio_RS"
-    this.RGSM = sample
-    this.RGLB = "default_library"
-    this.RGPU = "default_pu"
-    this.analysisName = queueLogDir + outBam + ".rg"
-    this.jobName = queueLogDir + outBam + ".rg"
-  }
-
-  case class cov (inBam: File, outRecalFile: File, resetQuals: Boolean) extends BaseRecalibrator with CommandLineGATKArgs {
-    if (resetQuals) 
-      this.DBQ = dbq
-    this.knownSites :+= dbSNP
-    this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "ContextCovariate")
-    this.input_file :+= inBam
-    this.disable_indel_quals = true
-    this.out = outRecalFile
-    this.analysisName = queueLogDir + outRecalFile + ".covariates"
-    this.jobName = queueLogDir + outRecalFile + ".covariates"
-    this.scatterCount = threads
-    this.read_filter :+= "BadCigar"
-  }
-
-  case class recal (inBam: File, inRecalFile: File, outBam: File) extends PrintReads with CommandLineGATKArgs {
-    this.DBQ = dbq
-    this.input_file :+= inBam
-    this.BQSR = inRecalFile
-    this.out = outBam
-    this.isIntermediate = false
-    this.analysisName = queueLogDir + outBam + ".recalibration"
-    this.jobName = queueLogDir + outBam + ".recalibration"
-    this.read_filter :+= "BadCigar"
-    this.scatterCount = threads
-  }
-}