73 lines
2.5 KiB
Scala
Executable File
73 lines
2.5 KiB
Scala
Executable File
import org.broadinstitute.sting.queue.QScript
|
|
import org.broadinstitute.sting.queue.extensions.gatk.{RealignerTargetCreator, RodBind, IndelRealigner}
|
|
|
|
/**
|
|
* Created by IntelliJ IDEA.
|
|
* User: carneiro
|
|
* Date: 3/17/11
|
|
* Time: 11:29 AM
|
|
* To change this template use File | Settings | File Templates.
|
|
*/
|
|
|
|
|
|
class justClean extends QScript {
|
|
|
|
@Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=true)
|
|
var GATKjar: File = _
|
|
|
|
@Input(doc="input BAM file - or list of BAM files", shortName="i", required=true)
|
|
var input: File = _
|
|
|
|
@Input(doc="Reference fasta file", shortName="R", required=false)
|
|
var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
|
|
|
|
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false)
|
|
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
|
|
|
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", shortName="indels", required=false)
|
|
var indels: File = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf")
|
|
|
|
|
|
val queueLogDir: String = ".qlog/"
|
|
|
|
|
|
def script = {
|
|
|
|
println(GATKjar)
|
|
|
|
val outBam = swapExt(input, ".bam", ".Qclean.bam")
|
|
val tIntervals = swapExt(input, ".bam", ".all_indels.intervals")
|
|
|
|
val target = new RealignerTargetCreator()
|
|
target.input_file :+= input
|
|
target.out = tIntervals
|
|
target.reference_sequence = reference
|
|
target.mismatchFraction = 0.0
|
|
target.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
|
target.rodBind :+= RodBind("indels", "VCF", indels)
|
|
target.memoryLimit = 6
|
|
target.jobName = queueLogDir + tIntervals + ".atarget"
|
|
target.jarFile = GATKjar
|
|
target.scatterCount = 84
|
|
|
|
|
|
|
|
val clean = new IndelRealigner()
|
|
clean.input_file :+= input
|
|
clean.targetIntervals = tIntervals
|
|
clean.out = outBam
|
|
clean.reference_sequence = reference
|
|
clean.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
|
clean.rodBind :+= RodBind("indels", "VCF", indels)
|
|
clean.doNotUseSW = true
|
|
clean.compress = 0
|
|
clean.U = org.broadinstitute.sting.gatk.arguments.ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION // todo -- update clean with the last consensus between Tim, Matt and Eric. This is ugly!
|
|
clean.jobName = queueLogDir + outBam + ".clean"
|
|
clean.jarFile = GATKjar
|
|
clean.memoryLimit = 12
|
|
clean.scatterCount = 84
|
|
|
|
add(clean);
|
|
}
|
|
}
|