123 lines
4.7 KiB
Plaintext
Executable File
123 lines
4.7 KiB
Plaintext
Executable File
import com.sun.tools.doclets.internal.toolkit.util.DocFinder.Input
|
|
import org.broadinstitute.sting.gatk.{CommandLineGATK, DownsampleType}
|
|
import java.io.File
|
|
import net.sf.picard.reference.FastaSequenceFile
|
|
import org.broadinstitute.sting.commandline.Argument
|
|
import org.broadinstitute.sting.datasources.pipeline.Pipeline
|
|
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model
|
|
import org.broadinstitute.sting.queue.extensions.gatk._
|
|
import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction
|
|
import org.broadinstitute.sting.queue.extensions.samtools._
|
|
import org.broadinstitute.sting.queue.pipeline.{BamProcessing,VariantCalling}
|
|
import org.broadinstitute.sting.queue.{QException, QScript}
|
|
import collection.JavaConversions._
|
|
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
|
import org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType
|
|
class fullCallingPipelineV2 extends QScript {
|
|
qscript =>
|
|
|
|
@Argument(doc="Number of cleaning jobs", shortName="cleaningJobs")
|
|
var cleaningJobs: Int = _
|
|
|
|
@Argument(doc="the YAML file specifying inputs, interval lists, reference sequence, etc.", shortName="Y")
|
|
var yamlFile: File = _
|
|
|
|
@Input(doc="path to trigger track (for UnifiedGenotyper)", shortName="trigger", required=false)
|
|
var trigger: File = _
|
|
|
|
@Input(doc="path to refseqTable (for GenomicAnnotator)", shortName="refseqTable")
|
|
var refseqTable: File = _
|
|
|
|
@Input(doc="path to Picard FixMateInformation.jar. See http://picard.sourceforge.net/ .", required=false)
|
|
var picardFixMatesJar: File = new java.io.File("/seq/software/picard/current/bin/FixMateInformation.jar")
|
|
|
|
@Input(doc="path to GATK jar")
|
|
var gatkJar: File = _
|
|
|
|
@Input(doc="target Ti/Tv ratio for recalibration", shortName="titv", required=true)
|
|
var target_titv: Float = _
|
|
|
|
@Input(doc="per-sample downsampling level",shortName="dcov",required=false)
|
|
var downsampling_coverage = 300
|
|
|
|
@Input(doc="level of parallelism for UnifiedGenotyper", shortName="snpScatter", required=false)
|
|
var num_snp_scatter_jobs = 20
|
|
|
|
@Input(doc="level of parallelism for IndelGenotyperV2", shortName="indelScatter", required=false)
|
|
var num_indel_scatter_jobs = 5
|
|
|
|
@Input(doc="Skip indel-cleaning for BAM files (for testing only)", shortName="skipCleaning", required=false)
|
|
var skip_cleaning = false
|
|
|
|
//@Input(doc="ADPR script")
|
|
//var adprScript: File = _
|
|
|
|
//@Input(doc="Sequencing maching name (for use by adpr)")
|
|
//var machine: String = _
|
|
|
|
//@Input(doc="Sequencing experiement type (for use by adpr)--Whole_Exome, Whole_Genome, or Hybrid_Selection")
|
|
//var protocol: String = _
|
|
|
|
private var pipeline: Pipeline = _
|
|
|
|
trait CommandLineGATKArgs extends CommandLineGATK {
|
|
this.intervals = qscript.pipeline.getProject.getIntervalList
|
|
this.jarFile = qscript.gatkJar
|
|
this.reference_sequence = qscript.pipeline.getProject.getReferenceFile
|
|
this.memoryLimit = Some(4)
|
|
}
|
|
|
|
|
|
// ------------ SETUP THE PIPELINE ----------- //
|
|
|
|
|
|
def script = {
|
|
pipeline = YamlUtils.load(classOf[Pipeline], qscript.yamlFile)
|
|
var callingLib: VariantCalling = new VariantCalling(qscript.yamlFile,qscript.gatkJar)
|
|
var cleaningLib: BamProcessing = new BamProcessing(qscript.yamlFile,qscript.gatkJar,qscript.picardFixMatesJar)
|
|
|
|
val projectBase: String = qscript.pipeline.getProject.getName
|
|
val cleanedBase: String = projectBase + ".cleaned"
|
|
val uncleanedBase: String = projectBase + ".uncleaned"
|
|
|
|
// there are commands that use all the bam files
|
|
val recalibratedSamples = qscript.pipeline.getSamples.filter(_.getBamFiles.contains("recalibrated"))
|
|
|
|
var bamsToClean: List[(File,File)] = Nil
|
|
var recalBams: List[File] = Nil
|
|
var cleanedBams: List[File] = Nil
|
|
|
|
for ( sample <- recalibratedSamples ) {
|
|
val bam = sample.getBamFiles.get("recalibrated")
|
|
recalBams :+= bam
|
|
if (!sample.getBamFiles.contains("cleaned")) {
|
|
sample.getBamFiles.put("cleaned", swapExt(bam,"bam","cleaned.bam"))
|
|
bamsToClean :+= (bam,sample.getBamFiles.get("cleaned"))
|
|
}
|
|
|
|
cleanedBams :+= sample.getBamFiles.get("cleaned")
|
|
}
|
|
|
|
if ( !qscript.skip_cleaning ) {
|
|
cleaningLib.StandardIndelRealign(bamsToClean,qscript.cleaningJobs)
|
|
}
|
|
|
|
if (!qscript.skip_cleaning) {
|
|
endToEnd(cleanedBase, cleanedBams, lib)
|
|
} else {
|
|
endToEnd(uncleanedBase, recalBams, lib)
|
|
}
|
|
}
|
|
|
|
|
|
def endToEnd(base: String, bamFiles: List[File], lib: VariantCalling) = {
|
|
var recal_vcf = base+"_snps.recal.annotated.tranched.vcf"
|
|
var handfilt_vcf = base+"_snps.handfiltered.annotated.vcf"
|
|
var indel_vcf = base+"_indel_calls.vcf"
|
|
|
|
for ( c <- lib.StandardCallingPipeline(bamFiles,indel_vcf,recal_vcf,handfilt_vcf,qscript.target_titv,qscript.refseqTable) ) {
|
|
add(c)
|
|
}
|
|
}
|
|
}
|