Making the BQSR pipeline publicly available and supported.
this is for all the Pacbio validation that is going on right no in the cancer group. They are all using this script, and I'm happy to support it.
This commit is contained in:
parent
ec9d8313ee
commit
1085df8b7b
|
|
@ -0,0 +1,91 @@
|
|||
package core
|
||||
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import net.sf.samtools.SAMFileReader
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: carneiro
|
||||
* Date: 4/20/11
|
||||
* Time: 16:29 PM
|
||||
*/
|
||||
|
||||
|
||||
class RecalibrateBaseQualities extends QScript {
|
||||
|
||||
@Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=true)
|
||||
var GATKjar: File = _
|
||||
|
||||
@Input(doc="input BAM file - or list of BAM files", shortName="i", required=true)
|
||||
var input: File = _
|
||||
|
||||
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false)
|
||||
var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R")
|
||||
|
||||
@Input(doc="Reference fasta file", shortName="R", required=false)
|
||||
var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
|
||||
|
||||
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false)
|
||||
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
||||
|
||||
val queueLogDir: String = ".qlog/"
|
||||
var nContigs: Int = 0
|
||||
|
||||
def getNumberOfContigs(bamFile: File): Int = {
|
||||
val samReader = new SAMFileReader(new File(bamFile))
|
||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
||||
}
|
||||
|
||||
def script = {
|
||||
|
||||
nContigs = getNumberOfContigs(input)
|
||||
|
||||
val recalFile1: File = new File("recal1.csv")
|
||||
val recalFile2: File = new File("recal2.csv")
|
||||
val recalBam: File = swapExt(input, ".bam", "recal.bam")
|
||||
val path1: String = "before"
|
||||
val path2: String = "after"
|
||||
|
||||
add(cov(input, recalFile1),
|
||||
recal(input, recalFile1, recalBam),
|
||||
cov(recalBam, recalFile2),
|
||||
analyzeCovariates(recalFile1, path1),
|
||||
analyzeCovariates(recalFile2, path2))
|
||||
}
|
||||
|
||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
||||
this.jarFile = GATKjar
|
||||
this.reference_sequence = reference
|
||||
this.memoryLimit = 4
|
||||
this.isIntermediate = true
|
||||
}
|
||||
|
||||
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = outRecalFile
|
||||
this.analysisName = queueLogDir + outRecalFile + ".covariates"
|
||||
this.jobName = queueLogDir + outRecalFile + ".covariates"
|
||||
this.scatterCount = nContigs
|
||||
}
|
||||
|
||||
case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = inRecalFile
|
||||
this.out = outBam
|
||||
this.isIntermediate = false
|
||||
this.analysisName = queueLogDir + outBam + ".recalibration"
|
||||
this.jobName = queueLogDir + outBam + ".recalibration"
|
||||
this.scatterCount = nContigs
|
||||
}
|
||||
|
||||
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
|
||||
this.resources = R
|
||||
this.recal_file = inRecalFile
|
||||
this.output_dir = outPath.toString
|
||||
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue