From 3c1bf40a45025cd1bf56f8dbe784b69d06aeb8e3 Mon Sep 17 00:00:00 2001 From: chartl Date: Wed, 23 Mar 2011 19:42:29 +0000 Subject: [PATCH] QScript for scatter-gathering regional association (not quite as easy as using the built-in extension, due to the multiplexer). Currently does not work due to something I'm missing re: scatter gather class, this commit is an interim one. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5500 348d0f76-0448-11de-a6fe-93d51630548a --- .../oneoffs/chartl/ScatterGatherAssociation.q | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100755 scala/qscript/oneoffs/chartl/ScatterGatherAssociation.q diff --git a/scala/qscript/oneoffs/chartl/ScatterGatherAssociation.q b/scala/qscript/oneoffs/chartl/ScatterGatherAssociation.q new file mode 100755 index 000000000..ec16611c1 --- /dev/null +++ b/scala/qscript/oneoffs/chartl/ScatterGatherAssociation.q @@ -0,0 +1,89 @@ +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK +import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.utils.text.XReadLines +import collection.JavaConversions._ + +class ScatterGatherAssociation extends QScript { + + @Argument(fullName="gatkJar",shortName="gatk",doc="Path to the GATK jarfile",required=true) + var gatkJar : File = _ + @Argument(fullName="metaData",shortName="SM",doc="Sample meta data",required=true) + var metaData : File = _ + @Argument(fullName="bamList",shortName="I",doc="list of bam files (single .list file)",required=true) + var bamList : File = _ + @Argument(fullName="outputBase",shortName="o",doc="Base for output files",required=true) + var outBase : String = _ + @Argument(fullName="noBedGraph",shortName="nbg",doc="Don't use bedgraph format",required=false) + var dontUseBedGraph : Boolean = false + @Argument(fullName="reference",shortName="R",doc="Reference file, if not hg19",required=false) + var referenceFile : File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") + @Argument(fullName="intervals",shortName="L",doc="Interval list, if not whole-exome 1.1",required=false) + var intervalsFile : File = new File("/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list") + @Argument(fullName="memoryLimit",shortName="M",doc="Memory limit for SG jobs",required=false) + var memLimit : Int = 4 + @Argument(fullName="scatterJobs",shortName="SJ",doc="Number of scatter jobs",required=false) + var scatterJobs : Int = 75 + + val ASSOCIATION_TESTS = List("BaseQualityScore","InsertSizeDistribution","MappingQuality0", + "MateMappingQuality","MateOtherContig","MateSameStrand","MateUnmapped","MismatchRate", + "ProperPairs","ReadClipping","ReadIndels","ReadMappingQuality","ReferenceMismatches", + "SampleDepth") + + class RegionalAssociationSG(base : String, ext : String) extends CommandLineGATK with ScatterGatherableFunction{ + this.analysis_type = "RegionalAssociation" + + // the rest are output files implicitly constructed by the multiplexer + + @Output(doc="bqs") + var bqs : File = new File(String.format("%s.%s.%s", base, "BaseQualityScore", ext)) + @Output(doc="isd") + var isd : File = new File(String.format("%s.%s.%s",base,"InsertSizeDistribution",ext)) + @Output(doc="mq0") + var mq0 : File = new File(String.format("%s.%s.%s",base,"MappingQuality0",ext)) + @Output(doc="mmq") + var mmq : File = new File(String.format("%s.%s.%s",base,"MateMappingQuality",ext)) + @Output(doc="moc") + var moc : File = new File(String.format("%s.%s.%s",base,"MateOtherContig",ext)) + @Output(doc="mss") + var mss : File = new File(String.format("%s.%s.%s",base,"MateSameStrand",ext)) + @Output(doc="mu") + var mu : File = new File(String.format("%s.%s.%s",base,"MateUnmapped",ext)) + @Output(doc="mmr") + var mmr : File = new File(String.format("%s.%s.%s",base,"MismatchRate",ext)) + @Output(doc="pp") + var pp : File = new File(String.format("%s.%s.%s",base,"ProperPairs",ext)) + @Output(doc="rc") + var rc : File = new File(String.format("%s.%s.%s",base,"ReadClipping",ext)) + @Output(doc="ri") + var ri : File = new File(String.format("%s.%s.%s",base,"ReadIndels",ext)) + @Output(doc="rmq") + var rmq : File = new File(String.format("%s.%s.%s",base,"ReadMappingQuality",ext)) + @Output(doc="rm") + var rm : File = new File(String.format("%s.%s.%s",base,"ReferenceMismatches",ext)) + @Output(doc="sd") + var sd : File = new File(String.format("%s.%s.%s",base,"SampleDepth",ext)) + + override def commandLine = super.commandLine + " -o %s".format(base) + } + + def script = { + + var ext : String = "" + if ( dontUseBedGraph ) { + ext = "tdf" + } else { + ext = "bedgraph" + } + + var association = new RegionalAssociationSG(outBase,ext) + association.intervals :+= intervalsFile + association.reference_sequence = referenceFile + association.jarFile = gatkJar + association.input_file ++= asScalaIterable((new XReadLines(bamList)).readLines).map(u => new File(u)).toList + association.scatterCount = scatterJobs + association.memoryLimit = Some(memLimit) + + add(association) + } +} \ No newline at end of file