2011-03-24 03:42:29 +08:00
import org.broadinstitute.sting.commandline.{Argument, Output, Input}
2011-03-24 04:08:53 +08:00
import org.broadinstitute.sting.queue.extensions.gatk.{IntervalScatterFunction, CommandLineGATK}
2011-03-24 03:42:29 +08:00
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.utils.text.XReadLines
import collection.JavaConversions._
class ScatterGatherAssociation extends QScript {
@Argument(fullName="gatkJar",shortName="gatk",doc="Path to the GATK jarfile",required=true)
var gatkJar : File = _
@Argument(fullName="metaData",shortName="SM",doc="Sample meta data",required=true)
var metaData : File = _
@Argument(fullName="bamList",shortName="I",doc="list of bam files (single .list file)",required=true)
var bamList : File = _
@Argument(fullName="outputBase",shortName="o",doc="Base for output files",required=true)
var outBase : String = _
@Argument(fullName="noBedGraph",shortName="nbg",doc="Don't use bedgraph format",required=false)
var dontUseBedGraph : Boolean = false
@Argument(fullName="reference",shortName="R",doc="Reference file, if not hg19",required=false)
var referenceFile : File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
@Argument(fullName="intervals",shortName="L",doc="Interval list, if not whole-exome 1.1",required=false)
var intervalsFile : File = new File("/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list")
@Argument(fullName="memoryLimit",shortName="M",doc="Memory limit for SG jobs",required=false)
var memLimit : Int = 4
@Argument(fullName="scatterJobs",shortName="SJ",doc="Number of scatter jobs",required=false)
2011-03-26 03:35:39 +08:00
var scatterJobs : Int = 125
2011-03-24 03:42:29 +08:00
val ASSOCIATION_TESTS = List("BaseQualityScore","InsertSizeDistribution","MappingQuality0",
"MateMappingQuality","MateOtherContig","MateSameStrand","MateUnmapped","MismatchRate",
"ProperPairs","ReadClipping","ReadIndels","ReadMappingQuality","ReferenceMismatches",
"SampleDepth")
class RegionalAssociationSG(base : String, ext : String) extends CommandLineGATK with ScatterGatherableFunction{
this.analysis_type = "RegionalAssociation"
2011-03-26 03:35:39 +08:00
@Argument(doc="useBed")
var useBed : Boolean = true
2011-03-24 03:42:29 +08:00
// the rest are output files implicitly constructed by the multiplexer
@Output(doc="bqs")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var bqs : File = new File(String.format("%s.%s.%s", base, "BaseQualityScore", ext))
2011-04-21 20:56:25 +08:00
/*
2011-03-24 03:42:29 +08:00
@Output(doc="isd")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var isd : File = new File(String.format("%s.%s.%s",base,"InsertSizeDistribution",ext))
@Output(doc="mq0")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var mq0 : File = new File(String.format("%s.%s.%s",base,"MappingQuality0",ext))
@Output(doc="mmq")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var mmq : File = new File(String.format("%s.%s.%s",base,"MateMappingQuality",ext))
@Output(doc="moc")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var moc : File = new File(String.format("%s.%s.%s",base,"MateOtherContig",ext))
@Output(doc="mss")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var mss : File = new File(String.format("%s.%s.%s",base,"MateSameStrand",ext))
2011-04-21 20:56:25 +08:00
/@Output(doc="mu")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var mu : File = new File(String.format("%s.%s.%s",base,"MateUnmapped",ext))
@Output(doc="mmr")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var mmr : File = new File(String.format("%s.%s.%s",base,"MismatchRate",ext))
@Output(doc="pp")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var pp : File = new File(String.format("%s.%s.%s",base,"ProperPairs",ext))
@Output(doc="rc")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var rc : File = new File(String.format("%s.%s.%s",base,"ReadClipping",ext))
@Output(doc="ri")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var ri : File = new File(String.format("%s.%s.%s",base,"ReadIndels",ext))
@Output(doc="rmq")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var rmq : File = new File(String.format("%s.%s.%s",base,"ReadMappingQuality",ext))
@Output(doc="rm")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var rm : File = new File(String.format("%s.%s.%s",base,"ReferenceMismatches",ext))
@Output(doc="sd")
2011-03-24 04:08:53 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-03-24 03:42:29 +08:00
var sd : File = new File(String.format("%s.%s.%s",base,"SampleDepth",ext))
2011-04-16 07:55:12 +08:00
@Output(doc="rai")
2011-04-13 07:00:50 +08:00
@Gather(classOf[SimpleTextGatherFunction])
2011-04-16 07:55:12 +08:00
var rli : File = new File(String.format("%s.%s.%s",base,"ReadsAberrantInsertSize",ext))
@Output(doc="rwi")
@Gather(classOf[SimpleTextGatherFunction])
var rwi : File = new File(String.format("%s.%s.%s",base,"ReadsWithIndels",ext))
2011-04-21 20:56:25 +08:00
*/
2011-03-24 03:42:29 +08:00
2011-03-26 03:35:39 +08:00
override def commandLine = {
var bedStr : String = ""
if ( useBed ) {
bedStr = " -bg "
}
super.commandLine + " -AT ALL -o %s%s".format(base,bedStr)
}
2011-03-24 03:42:29 +08:00
}
def script = {
var ext : String = ""
if ( dontUseBedGraph ) {
ext = "tdf"
} else {
ext = "bedgraph"
}
var association = new RegionalAssociationSG(outBase,ext)
2011-03-26 03:35:39 +08:00
association.useBed = ! dontUseBedGraph
association.sample_metadata :+= metaData
2011-03-24 03:42:29 +08:00
association.intervals :+= intervalsFile
association.reference_sequence = referenceFile
association.jarFile = gatkJar
association.input_file ++= asScalaIterable((new XReadLines(bamList)).readLines).map(u => new File(u)).toList
association.scatterCount = scatterJobs
association.memoryLimit = Some(memLimit)
2011-03-24 04:08:53 +08:00
association.scatterClass = classOf[IntervalScatterFunction]
2011-03-24 03:42:29 +08:00
add(association)
}
2011-04-16 07:55:12 +08:00
}