gatk-3.8/public/scala/qscript/core/RecalibrateBaseQualities.scala

91 lines
3.1 KiB
Scala
Executable File

package core
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk._
import net.sf.samtools.SAMFileReader
/**
* Created by IntelliJ IDEA.
* User: carneiro
* Date: 4/20/11
* Time: 16:29 PM
*/
class RecalibrateBaseQualities extends QScript {
@Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=true)
var GATKjar: File = _
@Input(doc="input BAM file - or list of BAM files", shortName="i", required=true)
var input: File = _
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false)
var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R")
@Input(doc="Reference fasta file", shortName="R", required=false)
var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false)
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
val queueLogDir: String = ".qlog/"
var nContigs: Int = 0
def getNumberOfContigs(bamFile: File): Int = {
val samReader = new SAMFileReader(new File(bamFile))
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
}
def script = {
nContigs = getNumberOfContigs(input)
val recalFile1: File = new File("recal1.csv")
val recalFile2: File = new File("recal2.csv")
val recalBam: File = swapExt(input, ".bam", "recal.bam")
val path1: String = "before"
val path2: String = "after"
add(cov(input, recalFile1),
recal(input, recalFile1, recalBam),
cov(recalBam, recalFile2),
analyzeCovariates(recalFile1, path1),
analyzeCovariates(recalFile2, path2))
}
trait CommandLineGATKArgs extends CommandLineGATK {
this.jarFile = GATKjar
this.reference_sequence = reference
this.memoryLimit = 4
this.isIntermediate = true
}
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
this.input_file :+= inBam
this.recal_file = outRecalFile
this.analysisName = queueLogDir + outRecalFile + ".covariates"
this.jobName = queueLogDir + outRecalFile + ".covariates"
this.scatterCount = nContigs
}
case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
this.input_file :+= inBam
this.recal_file = inRecalFile
this.out = outBam
this.isIntermediate = false
this.analysisName = queueLogDir + outBam + ".recalibration"
this.jobName = queueLogDir + outBam + ".recalibration"
this.scatterCount = nContigs
}
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
this.resources = R
this.recal_file = inRecalFile
this.output_dir = outPath.toString
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
}
}