146 lines
6.5 KiB
Scala
Executable File
146 lines
6.5 KiB
Scala
Executable File
package oneoffs.depristo
|
|
|
|
//import org.broadinstitute.sting.datasources.pipeline.Pipeline
|
|
import org.broadinstitute.sting.queue.extensions.gatk._
|
|
import org.broadinstitute.sting.queue.QScript
|
|
import collection.JavaConversions._
|
|
import org.broadinstitute.sting.queue.extensions.picard.PicardBamFunction
|
|
import org.broadinstitute.sting.queue.function.JavaCommandLineFunction
|
|
|
|
|
|
class CleaningTest extends QScript {
|
|
qscript =>
|
|
|
|
@Input(doc="path to GATK jar", shortName="gatk", required=false)
|
|
var gatkJar: File = new File("/home/radon01/depristo/dev/GenomeAnalysisTKFromLaptop/trunk/dist/GenomeAnalysisTK.jar")
|
|
|
|
@Input(doc="the chromosome to process", shortName="chr", required=false)
|
|
var chr: String = "20"
|
|
|
|
@Input(doc="the chromosome to process", shortName="L", required=false)
|
|
var range: String = _
|
|
|
|
@Input(doc="output path", shortName="outputDir", required=false)
|
|
var outputDir: String = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/isizeConstrainedRealigner/"
|
|
|
|
@Input(doc="base output filename", shortName="baseName", required=false)
|
|
var baseName: String = ""
|
|
|
|
@Input(doc="path to tmp space for storing intermediate bam files", shortName="outputTmpDir", required=false)
|
|
var outputTmpDir: String = "/broad/shptmp/depristo/tmp"
|
|
|
|
@Input(doc="path to Picard FixMateInformation.jar. See http://picard.sourceforge.net/ .", required=false)
|
|
var picardFixMatesJar: File = new java.io.File("/seq/software/picard/current/bin/FixMateInformation.jar")
|
|
var picardValidateJar: File = new java.io.File("/seq/software/picard/current/bin/ValidateSamFile.jar")
|
|
var picardSortSamJar: File = new java.io.File("/seq/software/picard/current/bin/SortSam.jar")
|
|
|
|
private val tmpDir: File = new File("/broad/shptmp/depristo/tmp/")
|
|
private val reference: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
|
|
private val dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
|
private val dindelEURCalls: String = "/humgen/1kg/DCC/ftp/technical/working/20110111_august_dindel_indel_calls/EUR.dindel_august_release.20110110.sites.vcf.gz"
|
|
// val chromosomeLength = List(249250621,243199373,198022430,191154276,180915260,171115067,159138663,146364022,141213431,135534747,135006516,133851895,115169878,107349540,102531392,90354753,81195210,78077248,59128983,63025520,48129895,51304566)
|
|
|
|
// private var pipeline: Pipeline = _
|
|
|
|
trait CommandLineGATKArgs extends CommandLineGATK {
|
|
this.jarFile = qscript.gatkJar
|
|
this.reference_sequence = qscript.reference
|
|
this.memoryLimit = 4
|
|
this.jobTempDir = qscript.tmpDir
|
|
}
|
|
|
|
def script = {
|
|
val interval = qscript.chr
|
|
val bamList: File = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/isizeConstrainedRealigner/CEU.chr%s.list".format(qscript.chr))
|
|
//val bamList: File = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/isizeConstrainedRealigner/FIN.chr%s.3samples.list".format(qscript.chr))
|
|
val targetIntervals: File = new File("%s/chr_%s.intervals".format(outputDir, qscript.chr))
|
|
|
|
Console.println("interval " + interval)
|
|
|
|
// 1.) Create cleaning targets
|
|
var target = new RealignerTargetCreator with CommandLineGATKArgs
|
|
target.input_file :+= bamList
|
|
target.intervalsString :+= interval
|
|
target.out = targetIntervals
|
|
target.mismatchFraction = 0.0
|
|
target.rodBind :+= RodBind("dbsnp", "VCF", qscript.dbSNP)
|
|
target.rodBind :+= RodBind("indels3", "VCF", qscript.dindelEURCalls)
|
|
//target.jobName = baseName + ".target"
|
|
add(target)
|
|
|
|
for ( cm <- List(true, false) ) {
|
|
// 2.) Clean without SW
|
|
var clean = new IndelRealigner with CommandLineGATKArgs
|
|
val cleanedBam = new File(outputDir + "cleaned.cm_%b.bam".format(cm))
|
|
|
|
clean.input_file :+= bamList
|
|
clean.intervalsString :+= interval + (if ( range != null ) ":" + range else "")
|
|
clean.targetIntervals = targetIntervals
|
|
clean.out = if ( cm ) cleanedBam else new File(cleanedBam + ".intermediate.bam")
|
|
clean.doNotUseSW = true
|
|
clean.constrainMovement = cm
|
|
clean.baq = org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF
|
|
clean.rodBind :+= RodBind("dbsnp", "VCF", qscript.dbSNP)
|
|
clean.rodBind :+= RodBind("indels3", "VCF", qscript.dindelEURCalls)
|
|
//clean.sortInCoordinateOrderEvenThoughItIsHighlyUnsafe = true
|
|
//clean.jobName = baseName + cm + ".clean"
|
|
|
|
Console.println("CLEAN")
|
|
add(clean)
|
|
|
|
if ( ! cm ) {
|
|
// Explicitly run fix mates if the function won't be scattered.
|
|
val fixMates = new PicardBamFunction {
|
|
// Declare inputs/outputs for dependency tracking.
|
|
@Input(doc="unfixed bam") var unfixed: File = _
|
|
@Output(doc="fixed bam") var fixed: File = _
|
|
def inputBams = List(unfixed)
|
|
def outputBam = fixed
|
|
}
|
|
|
|
//fixMates.jobOutputFile = new File(".queue/logs/Cleaning/%s/FixMates.out".format(sampleId))
|
|
fixMates.memoryLimit = 4
|
|
fixMates.jarFile = qscript.picardFixMatesJar
|
|
fixMates.unfixed = clean.out
|
|
fixMates.fixed = cleanedBam
|
|
//fixMates.analysisName = "FixMates"
|
|
|
|
// Add the fix mates explicitly
|
|
Console.println("fixMates")
|
|
add(fixMates)
|
|
}
|
|
|
|
val validate = new JavaCommandLineFunction {
|
|
// Declare inputs/outputs for dependency tracking.
|
|
@Input(doc="unfixed bam") var unfixed: File = _
|
|
def inputBams = List(unfixed)
|
|
override def commandLine = super.commandLine + "%s%s%s IGNORE=INVALID_CIGAR IGNORE=MATE_NOT_FOUND".format(
|
|
optional(" VALIDATION_STRINGENCY=", "SILENT"), repeat(" INPUT=", inputBams), " TMP_DIR=" + jobTempDir)
|
|
}
|
|
|
|
//fixMates.jobOutputFile = new File(".queue/logs/Cleaning/%s/FixMates.out".format(sampleId))
|
|
validate.memoryLimit = 2
|
|
validate.jarFile = qscript.picardValidateJar
|
|
validate.unfixed = cleanedBam
|
|
add(validate)
|
|
|
|
val toQueryName = new PicardBamFunction {
|
|
// Declare inputs/outputs for dependency tracking.
|
|
@Input(doc="coordiante bam") var cobam: File = _
|
|
@Output(doc="query bam") var qnbam: File = _
|
|
def inputBams = List(cobam)
|
|
def outputBam = qnbam
|
|
}
|
|
|
|
//fixMates.jobOutputFile = new File(".queue/logs/Cleaning/%s/FixMates.out".format(sampleId))
|
|
toQueryName.memoryLimit = 4
|
|
toQueryName.jarFile = qscript.picardSortSamJar
|
|
toQueryName.cobam = cleanedBam
|
|
toQueryName.qnbam = new File(cleanedBam.getAbsolutePath + ".qn.bam")
|
|
add(toQueryName)
|
|
|
|
Console.println("loop done")
|
|
}
|
|
}
|
|
}
|