From ca503e5801c781eceb06444627c7bda7ea29c8c3 Mon Sep 17 00:00:00 2001 From: depristo Date: Wed, 1 Sep 2010 20:23:37 +0000 Subject: [PATCH] Queue scripts for recalibration and running nSample UG jobs pre and dynamic merging git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4186 348d0f76-0448-11de-a6fe-93d51630548a --- .../depristo/manySampleUGPerformance.scala | 66 +++++++++++++++++++ scala/qscript/recalibrate.scala | 10 +++ 2 files changed, 76 insertions(+) create mode 100755 scala/qscript/depristo/manySampleUGPerformance.scala diff --git a/scala/qscript/depristo/manySampleUGPerformance.scala b/scala/qscript/depristo/manySampleUGPerformance.scala new file mode 100755 index 000000000..df1be75a8 --- /dev/null +++ b/scala/qscript/depristo/manySampleUGPerformance.scala @@ -0,0 +1,66 @@ +import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.queue.extensions.gatk._ +import org.broadinstitute.sting.queue.extensions.samtools.SamtoolsIndexFunction + +class ManySampleUGPerformanceTesting extends QScript { + @Argument(doc="gatkJarFile", required=false) + var gatkJarFile: File = new File("/home/radon01/depristo/dev/GenomeAnalysisTKStable/trunk/dist/GenomeAnalysisTK.jar") + + @Argument(shortName = "R", doc="ref", required=false) + var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta") + +val TARGET_INTERVAL = "my.intervals" +val FULL_BAM_LIST = new File("/humgen/1kg/processing/allPopulations_chr20_june_release/allPopulations.june.bam.list") +val MERGED_DIR = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/manySampleUGPerformance/") + +trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { + this.logging_level = "INFO"; + this.jarFile = gatkJarFile; + this.intervals = new File(TARGET_INTERVAL); + this.reference_sequence = referenceFile; + this.jobQueue = "gsa"; + this.et = Option(org.broadinstitute.sting.gatk.phonehome.GATKRunReport.PhoneHomeOption.STANDARD); + this.dcov = Option(50); + //this.commandDirectory = new File("results"); + } + +def script = { + for (nSamples <- List(1, 2, 5, 10, 50, 100, 200, 300, 400, 500)) { + val sublist = new SliceList(nSamples) + val mergeSublist = new MergeBAMs(sublist.list) + + add(sublist) + add(mergeSublist) + add(new Index(mergeSublist.o) ) + add(new Call(sublist.list, nSamples, "dynamic_merge")) + add(new Call(mergeSublist.o, nSamples, "pre_merge")) + } +} + +class Index(bamIn: File) extends SamtoolsIndexFunction { + this.jobQueue = "gsa" + bamFile = bamIn +} + +class MergeBAMs(bamList: File) extends PrintReads with UNIVERSAL_GATK_ARGS { + this.memoryLimit = Some(3) + this.input_file :+= bamList.toNamedFile + this.o = new File(MERGED_DIR + "/" + bamList.getName + ".bam") + } + +class Call(@Input(doc="foo") bamList: File, n: Integer, name: String) extends UnifiedGenotyper with UNIVERSAL_GATK_ARGS { + @Output(doc="foo") var outVCF: File = new File("%s.%d.%s.vcf".format(bamList.getName, n, name)) + this.memoryLimit = Some(4) + this.input_file :+= bamList.toNamedFile + this.jobQueue = "gsa" + this.stand_call_conf = Option(10.0) + this.o = outVCF + } + +class SliceList(n: Integer) extends CommandLineFunction { + @Output(doc="foo") var list: File = new File("bams.%d.list".format(n)) + this.jobQueue = "gsa" + def commandLine = "head -n %d %s > %s".format(n, FULL_BAM_LIST, list) + } +} + diff --git a/scala/qscript/recalibrate.scala b/scala/qscript/recalibrate.scala index 182bed82d..f1211283b 100755 --- a/scala/qscript/recalibrate.scala +++ b/scala/qscript/recalibrate.scala @@ -1,3 +1,4 @@ +import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.extensions.samtools.SamtoolsIndexFunction import org.broadinstitute.sting.queue.QScript @@ -16,9 +17,15 @@ class recalibrate extends QScript { @Argument(doc="Assume initial count covariates has completed", required=false) var skipInitialCountCovariates: Boolean = false + @Argument(doc="", required=false) + var skipUQUpdateArg: Boolean = false + @Argument(shortName = "R", doc="ref") var referenceFile: File = _ + @Argument(doc="X", required=false) + var picardMergeSamFilesJar: File = new File("/seq/software/picard/current/bin/MergeSamFiles.jar") + trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { logging_level = "INFO"; jarFile = gatkJarFile; reference_sequence = referenceFile; } def script = { @@ -33,6 +40,8 @@ def script = { val tableRecal = new TableRecalibrate(bamIn, recalData, recalBam) { useOriginalQualities = true } if ( scatter ) { tableRecal.intervals = new File("/humgen/gsa-hpprojects/GATK/data/chromosomes.hg18.interval_list") + //tableRecal.scatterClass = classOf[ContigScatterFunction] + tableRecal.setupGatherFunction = { case (f: PicardBamJarFunction, _) => f.jarFile = picardMergeSamFilesJar; f.memoryLimit = Some(4) } tableRecal.scatterCount = 25 } add(tableRecal) @@ -69,6 +78,7 @@ class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File) exten this.output_bam = bamOutArg this.logging_level = "INFO" this.memoryLimit = Some(2) + this.skipUQUpdate = skipUQUpdateArg override def dotString = "TableRecalibrate: %s => %s".format(bamInArg.getName, bamOutArg.getName, if (this.useOriginalQualities) " -OQ" else "") }