gatk-3.8/scala/qscript/oneoffs/chartl/private_mutations.q

90 lines
4.6 KiB
Plaintext
Raw Normal View History

import collection.JavaConversions._
import java.io.FileNotFoundException
import org.broadinstitute.sting.datasources.pipeline._
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.library.ipf.vcf._
import org.broadinstitute.sting.queue.pipeline._
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.utils.yaml.YamlUtils
class private_mutations extends QScript {
@Argument(shortName="yaml",fullName="eomiYaml",doc="Project YAML file",required=true) var eomiYaml: File = _
@Argument(shortName="sting",fullName="stingDir",doc="path to the Sting directory",required=true) var sting: String = _
@Argument(shortName="out",fullName="finalVCF",doc="the merged vcf to write to", required=true) var finalMergedVCF : File = _
@Argument(shortName="mask",fullName="indelAndSVMask",doc="The indel/SV mask to apply during filtration",required=true) var filtMask : File = _
var gatkjar : File = _
def script = {
gatkjar = new File(sting+"/dist/GenomeAnalysisTK.jar")
var input_pipeline : Pipeline = YamlUtils.load(classOf[Pipeline],eomiYaml)
var eomi_pipeline : Pipeline = new Pipeline
// use only QC-positive samples
eomi_pipeline.setProject( input_pipeline.getProject )
eomi_pipeline.setSamples( input_pipeline.getSamples.filter( p => p.getTags.get("QCStatus").equals("PASS")) )
var vcLib : VariantCalling = new VariantCalling(eomi_pipeline,gatkjar)
var pmLib : ProjectManagement = new ProjectManagement(sting)
/*eomi_pipeline.getSamples.foreach( p =>
if ( ! p.getBamFiles.get("recalibrated").exists) throw new FileNotFoundException(
p.getBamFiles.get("recalibrated").getAbsolutePath+" does not exist" ))*/
var batches : List[List[PipelineSample]] = eomi_pipeline.getSamples.toList.grouped(100).toList
var genotypers : List[UnifiedGenotyper] = batches.map( pl => pl.map( p => p.getBamFiles.get("recalibrated") ) ).zipWithIndex.map(
b => vcLib.StandardUnifiedGenotyper(b._1,new File(eomi_pipeline.getProject.getName+"_batch%d.raw.vcf".format(1+b._2))))
addAll(genotypers)
var handFilters : List[VariantFiltration] = genotypers.map( g => vcLib.StandardHandfilter(g.out,swapExt(g.out,".raw.vcf",".handfiltered.vcf")))
handFilters.foreach( p => { p.rodBind :+= new RodBind("mask","bed",filtMask)
p.mask = "NearIndelOrSV"} )
addAll(handFilters)
addAll(pmLib.MergeBatches(handFilters.map( _.out), batches.flatten.map( p => p.getBamFiles.get("recalibrated")),
finalMergedVCF,eomi_pipeline.getProject.getReferenceFile,20))
var afr_sams : List[PipelineSample] = eomi_pipeline.getSamples.toList.filter( p => p.getTags.get("Population").equals("AFRAMR"))
var eur_sams : List[PipelineSample] = eomi_pipeline.getSamples.toList.filter( p => p.getTags.get("Population").equals("EURAMR") ||
p.getTags.get("Population").equals("UNK"))
var variant_loci : VCFExtractIntervals = new VCFExtractIntervals(finalMergedVCF,swapExt(finalMergedVCF,".vcf",".intervals.list"),false)
add(variant_loci)
var extract_afr : VCFExtractSamples = new VCFExtractSamples(finalMergedVCF,swapExt(finalMergedVCF,".vcf",".afr.vcf"),afr_sams.map(_.getId))
var extract_eur : VCFExtractSamples = new VCFExtractSamples(finalMergedVCF,swapExt(finalMergedVCF,".vcf",".eur+unk.vcf"),eur_sams.map(_.getId))
add(extract_afr)
add(extract_eur)
var eval_all : VariantEval = vcLib.addTrait(new VariantEval)
eval_all.rodBind :+= new RodBind("evalEOMI","vcf",finalMergedVCF)
eval_all.noStandard = true
eval_all.E :+= "ACTransitionTable"
eval_all.out = swapExt(finalMergedVCF,".vcf",".perm.csv")
Walkers can now specify a class extending from Gatherer to merge custom output formats. Add @Gather(MyGatherer.class) to the walker @Output. JavaCommandLineFunctions can now specify the classpath+mainclass as an alternative to specifying a path to an executable jar. JCLF by default pass on the current classpath and only require the mainclass be specified by the developer extending the JCLF, relieving the QScript author from having to explicitly specify the jar. Like the Picard MergeSamFiles, GATK engine by default is now run from the current classpath. The GATK can still be overridden via .jarFile or .javaClasspath. Walkers from the GATK package are now also embedded into the Queue package. Updated AnalyzeCovariates to make it easier to guess the main class, AnalyzeCovariates instead of AnalyzeCovariatesCLP. Removed the GATK jar argument from the example QScripts. Removed one of the most FAQ when getting started with Scala/Queue, the use of Option[_] in QScripts: 1) Fixed mistaken assumption with java enums. In java enums can be null so they don't need nullable wrappers. 2) Added syntactic sugar for Nullable primitives to the QScript trait. Any variable defined as Option[Int] can just be assigned an Int value or None, ex: myFunc.memoryLimit = 3 Removed other unused code. Re-fixed dry run function ordering. Re-ordered the QCommandline companion object so that IntelliJ doesn't complain about missing main methods. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5504 348d0f76-0448-11de-a6fe-93d51630548a
2011-03-24 22:03:51 +08:00
eval_all.reportType = org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV
add(eval_all)
var eval_afr : VariantEval = vcLib.addTrait(new VariantEval)
eval_afr.rodBind :+= new RodBind("evalAFR","VCF",extract_afr.outputVCF)
eval_afr.rodBind :+= new RodBind("compEUR","VCF",extract_eur.outputVCF)
eval_afr.E :+= "ACTransitionTable"
eval_afr.out = swapExt(extract_afr.outputVCF,".vcf",".perm.csv")
Walkers can now specify a class extending from Gatherer to merge custom output formats. Add @Gather(MyGatherer.class) to the walker @Output. JavaCommandLineFunctions can now specify the classpath+mainclass as an alternative to specifying a path to an executable jar. JCLF by default pass on the current classpath and only require the mainclass be specified by the developer extending the JCLF, relieving the QScript author from having to explicitly specify the jar. Like the Picard MergeSamFiles, GATK engine by default is now run from the current classpath. The GATK can still be overridden via .jarFile or .javaClasspath. Walkers from the GATK package are now also embedded into the Queue package. Updated AnalyzeCovariates to make it easier to guess the main class, AnalyzeCovariates instead of AnalyzeCovariatesCLP. Removed the GATK jar argument from the example QScripts. Removed one of the most FAQ when getting started with Scala/Queue, the use of Option[_] in QScripts: 1) Fixed mistaken assumption with java enums. In java enums can be null so they don't need nullable wrappers. 2) Added syntactic sugar for Nullable primitives to the QScript trait. Any variable defined as Option[Int] can just be assigned an Int value or None, ex: myFunc.memoryLimit = 3 Removed other unused code. Re-fixed dry run function ordering. Re-ordered the QCommandline companion object so that IntelliJ doesn't complain about missing main methods. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5504 348d0f76-0448-11de-a6fe-93d51630548a
2011-03-24 22:03:51 +08:00
eval_afr.reportType = org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV
eval_afr.noStandard = true
add(eval_afr)
var eval_eur : VariantEval = vcLib.addTrait(new VariantEval)
eval_eur.rodBind :+= new RodBind("compAFR","VCF",extract_afr.outputVCF)
eval_eur.rodBind :+= new RodBind("evalEUR","VCF",extract_eur.outputVCF)
eval_eur.E :+= "ACTransitionTable"
eval_eur.out = swapExt(extract_eur.outputVCF,".vcf",".perm.csv")
Walkers can now specify a class extending from Gatherer to merge custom output formats. Add @Gather(MyGatherer.class) to the walker @Output. JavaCommandLineFunctions can now specify the classpath+mainclass as an alternative to specifying a path to an executable jar. JCLF by default pass on the current classpath and only require the mainclass be specified by the developer extending the JCLF, relieving the QScript author from having to explicitly specify the jar. Like the Picard MergeSamFiles, GATK engine by default is now run from the current classpath. The GATK can still be overridden via .jarFile or .javaClasspath. Walkers from the GATK package are now also embedded into the Queue package. Updated AnalyzeCovariates to make it easier to guess the main class, AnalyzeCovariates instead of AnalyzeCovariatesCLP. Removed the GATK jar argument from the example QScripts. Removed one of the most FAQ when getting started with Scala/Queue, the use of Option[_] in QScripts: 1) Fixed mistaken assumption with java enums. In java enums can be null so they don't need nullable wrappers. 2) Added syntactic sugar for Nullable primitives to the QScript trait. Any variable defined as Option[Int] can just be assigned an Int value or None, ex: myFunc.memoryLimit = 3 Removed other unused code. Re-fixed dry run function ordering. Re-ordered the QCommandline companion object so that IntelliJ doesn't complain about missing main methods. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5504 348d0f76-0448-11de-a6fe-93d51630548a
2011-03-24 22:03:51 +08:00
eval_eur.reportType = org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV
eval_eur.noStandard = true
add(eval_eur)
}
}