gatk-3.8/scala/qscript/oneoffs/chartl/BatchMerge.q

105 lines
4.6 KiB
Plaintext
Executable File

import org.broadinstitute.sting.commandline.Hidden
import org.broadinstitute.sting.gatk.walkers.genotyper.{GenotypeLikelihoodsCalculationModel, UnifiedGenotyperEngine}
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.library.ipf.vcf.{VCFSimpleMerge, VCFExtractSites,VCFExtractIntervals}
import org.broadinstitute.sting.queue.{QException, QScript}
import collection.JavaConversions._
import org.broadinstitute.sting.utils.baq.BAQ
import org.broadinstitute.sting.utils.text.XReadLines
class batchMergePipeline extends QScript {
batchMerge =>
@Argument(doc="VCF list",shortName="vcfs") var vcfList: File = _
@Argument(doc="bam list",shortName="bams") var bamList: File = _
@Argument(doc="sting dir",shortName="sting") var stingDir: String = _
@Argument(doc="reference file",shortName="ref") var ref: File = _
@Argument(doc="batched output",shortName="batch") var batchOut: File = _
//@Argument(doc="read UG settings from header",shortName="ugh") var ugSettingsFromHeader : Boolean = false
@Hidden @Argument(doc="Min base q",shortName="mbq",required=false) var mbq : Int = 20
@Hidden @Argument(doc="Min map q",shortName="mmq",required=false) var mmq : Int = 20
@Hidden @Argument(doc="baq gap open penalty, using sets baq to calc when necessary",shortName="baqp",required=false) var baq : Int = -1
def script = {
var vcfs : List[File] = extractFileEntries(vcfList)
var bams : List[File] = extractFileEntries(bamList)
trait ExtractArgs extends VCFExtractSites {
this.keepFilters = false
this.keepInfo = false
this.keepQual = false
}
trait CombineVariantsArgs extends CombineVariants {
this.reference_sequence = batchMerge.ref
this.jarFile = new File(batchMerge.stingDir+"/dist/GenomeAnalysisTK.jar")
this.scatterCount = 10
this.memoryLimit=4
}
var combine : CombineVariants = new CombineVariants with CombineVariantsArgs
combine.out = swapExt(batchOut,".vcf",".variant.combined.vcf")
combine.rodBind ++= vcfs.map( u => new RodBind(u.getName,"vcf",u) )
add(combine)
var getVariantAlleles : List[VCFExtractSites] = vcfs.map( u => new VCFExtractSites(u, swapExt(batchOut.getParent,u,".vcf",".alleles.vcf")) with ExtractArgs)
var combineVCFs : VCFSimpleMerge = new VCFSimpleMerge
combineVCFs.vcfs = getVariantAlleles.map(u => u.outVCF)
combineVCFs.fai = new File(ref.getAbsolutePath+".fai")
combineVCFs.outVCF = swapExt(batchOut,".vcf",".pf.alleles.vcf")
var extractIntervals : VCFExtractIntervals = new VCFExtractIntervals(combine.out,swapExt(combine.out,".vcf",".intervals.list"),true)
//addAll(getVariantAlleles)
//add(combineVCFs,extractIntervals)
add(extractIntervals)
trait CalcLikelihoodArgs extends UGCalcLikelihoods {
this.reference_sequence = batchMerge.ref
this.min_base_quality_score = batchMerge.mbq
this.min_mapping_quality_score = batchMerge.mmq
if ( batchMerge.baq >= 0 ) {
this.baqGapOpenPenalty = batchMerge.baq
this.baq = BAQ.CalculationMode.CALCULATE_AS_NECESSARY
}
this.intervals :+= extractIntervals.listOut
this.allelesVCF = combine.out
this.jarFile = new File(stingDir+"/dist/GenomeAnalysisTK.jar")
this.memoryLimit = 4
this.scatterCount = 60
this.output_mode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES
this.genotyping_mode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
}
def newUGCL( bams: (List[File],Int) ) : UGCalcLikelihoods = {
var ugcl = new UGCalcLikelihoods with CalcLikelihoodArgs
ugcl.input_file ++= bams._1
ugcl.out = new File("MBatch%d.likelihoods.vcf".format(bams._2))
return ugcl
}
var calcs: List[UGCalcLikelihoods] = bams.grouped(20).toList.zipWithIndex.map(u => newUGCL(u))
addAll(calcs)
trait CallVariantsArgs extends UGCallVariants {
this.reference_sequence = batchMerge.ref
this.intervals :+= extractIntervals.listOut
this.jarFile = new File(stingDir+"/dist/GenomeAnalysisTK.jar")
this.scatterCount = 30
this.memoryLimit = 8
this.output_mode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES
this.genotyping_mode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
}
var cVars : UGCallVariants = new UGCallVariants with CallVariantsArgs
cVars.rodBind ++= calcs.map( a => new RodBind("variant"+a.out.getName.replace(".vcf",""),"vcf",a.out) )
cVars.out = batchOut
add(cVars)
}
override def extractFileEntries(in: File): List[File] = {
return (new XReadLines(in)).readLines.toList.map( new File(_) )
}
}