Queue scripts for recalibration and running nSample UG jobs pre and dynamic merging
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4186 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
995cfe34fe
commit
ca503e5801
|
|
@ -0,0 +1,66 @@
|
|||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.extensions.samtools.SamtoolsIndexFunction
|
||||
|
||||
class ManySampleUGPerformanceTesting extends QScript {
|
||||
@Argument(doc="gatkJarFile", required=false)
|
||||
var gatkJarFile: File = new File("/home/radon01/depristo/dev/GenomeAnalysisTKStable/trunk/dist/GenomeAnalysisTK.jar")
|
||||
|
||||
@Argument(shortName = "R", doc="ref", required=false)
|
||||
var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
|
||||
|
||||
val TARGET_INTERVAL = "my.intervals"
|
||||
val FULL_BAM_LIST = new File("/humgen/1kg/processing/allPopulations_chr20_june_release/allPopulations.june.bam.list")
|
||||
val MERGED_DIR = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/manySampleUGPerformance/")
|
||||
|
||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
||||
this.logging_level = "INFO";
|
||||
this.jarFile = gatkJarFile;
|
||||
this.intervals = new File(TARGET_INTERVAL);
|
||||
this.reference_sequence = referenceFile;
|
||||
this.jobQueue = "gsa";
|
||||
this.et = Option(org.broadinstitute.sting.gatk.phonehome.GATKRunReport.PhoneHomeOption.STANDARD);
|
||||
this.dcov = Option(50);
|
||||
//this.commandDirectory = new File("results");
|
||||
}
|
||||
|
||||
def script = {
|
||||
for (nSamples <- List(1, 2, 5, 10, 50, 100, 200, 300, 400, 500)) {
|
||||
val sublist = new SliceList(nSamples)
|
||||
val mergeSublist = new MergeBAMs(sublist.list)
|
||||
|
||||
add(sublist)
|
||||
add(mergeSublist)
|
||||
add(new Index(mergeSublist.o) )
|
||||
add(new Call(sublist.list, nSamples, "dynamic_merge"))
|
||||
add(new Call(mergeSublist.o, nSamples, "pre_merge"))
|
||||
}
|
||||
}
|
||||
|
||||
class Index(bamIn: File) extends SamtoolsIndexFunction {
|
||||
this.jobQueue = "gsa"
|
||||
bamFile = bamIn
|
||||
}
|
||||
|
||||
class MergeBAMs(bamList: File) extends PrintReads with UNIVERSAL_GATK_ARGS {
|
||||
this.memoryLimit = Some(3)
|
||||
this.input_file :+= bamList.toNamedFile
|
||||
this.o = new File(MERGED_DIR + "/" + bamList.getName + ".bam")
|
||||
}
|
||||
|
||||
class Call(@Input(doc="foo") bamList: File, n: Integer, name: String) extends UnifiedGenotyper with UNIVERSAL_GATK_ARGS {
|
||||
@Output(doc="foo") var outVCF: File = new File("%s.%d.%s.vcf".format(bamList.getName, n, name))
|
||||
this.memoryLimit = Some(4)
|
||||
this.input_file :+= bamList.toNamedFile
|
||||
this.jobQueue = "gsa"
|
||||
this.stand_call_conf = Option(10.0)
|
||||
this.o = outVCF
|
||||
}
|
||||
|
||||
class SliceList(n: Integer) extends CommandLineFunction {
|
||||
@Output(doc="foo") var list: File = new File("bams.%d.list".format(n))
|
||||
this.jobQueue = "gsa"
|
||||
def commandLine = "head -n %d %s > %s".format(n, FULL_BAM_LIST, list)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.extensions.samtools.SamtoolsIndexFunction
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
|
|
@ -16,9 +17,15 @@ class recalibrate extends QScript {
|
|||
@Argument(doc="Assume initial count covariates has completed", required=false)
|
||||
var skipInitialCountCovariates: Boolean = false
|
||||
|
||||
@Argument(doc="", required=false)
|
||||
var skipUQUpdateArg: Boolean = false
|
||||
|
||||
@Argument(shortName = "R", doc="ref")
|
||||
var referenceFile: File = _
|
||||
|
||||
@Argument(doc="X", required=false)
|
||||
var picardMergeSamFilesJar: File = new File("/seq/software/picard/current/bin/MergeSamFiles.jar")
|
||||
|
||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { logging_level = "INFO"; jarFile = gatkJarFile; reference_sequence = referenceFile; }
|
||||
|
||||
def script = {
|
||||
|
|
@ -33,6 +40,8 @@ def script = {
|
|||
val tableRecal = new TableRecalibrate(bamIn, recalData, recalBam) { useOriginalQualities = true }
|
||||
if ( scatter ) {
|
||||
tableRecal.intervals = new File("/humgen/gsa-hpprojects/GATK/data/chromosomes.hg18.interval_list")
|
||||
//tableRecal.scatterClass = classOf[ContigScatterFunction]
|
||||
tableRecal.setupGatherFunction = { case (f: PicardBamJarFunction, _) => f.jarFile = picardMergeSamFilesJar; f.memoryLimit = Some(4) }
|
||||
tableRecal.scatterCount = 25
|
||||
}
|
||||
add(tableRecal)
|
||||
|
|
@ -69,6 +78,7 @@ class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File) exten
|
|||
this.output_bam = bamOutArg
|
||||
this.logging_level = "INFO"
|
||||
this.memoryLimit = Some(2)
|
||||
this.skipUQUpdate = skipUQUpdateArg
|
||||
|
||||
override def dotString = "TableRecalibrate: %s => %s".format(bamInArg.getName, bamOutArg.getName, if (this.useOriginalQualities) " -OQ" else "")
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue