Full pipeline now works through DAG creation. First draft; more work to do to make it cleaner and better command-line input handling (and properties handling); but the DAG is rendered and looks good.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3898 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-07-29 19:36:17 +00:00
parent 5af986e0c1
commit 7a5ee485d2
1 changed files with 15 additions and 14 deletions

View File

@ -39,8 +39,6 @@ class RealignerTargetCreator extends GatkFunction {
class IndelRealigner extends GatkFunction { class IndelRealigner extends GatkFunction {
@Input(doc="Intervals to clean") @Input(doc="Intervals to clean")
var intervalsToClean: File = _ var intervalsToClean: File = _
@Input(doc="Number of contigs in the contig intervals",required=false)
var numContigs: Int = 24
@Scatter(classOf[ContigScatterFunction]) @Scatter(classOf[ContigScatterFunction])
@Input(doc="Contig intervals") @Input(doc="Contig intervals")
var contigIntervals: File = _ var contigIntervals: File = _
@ -48,7 +46,7 @@ class IndelRealigner extends GatkFunction {
@Output(doc="Cleaned bam file") @Output(doc="Cleaned bam file")
var cleanedBam: File = _ var cleanedBam: File = _
this.scatterCount = numContigs this.javaTmpDir = parseArgs("-tmpdir") // todo -- hack, move into script or something
def commandLine = gatkCommandLine("IndelRealigner") + "--output %s -targetIntervals %s -L %s".format(cleanedBam,intervalsToClean,contigIntervals) def commandLine = gatkCommandLine("IndelRealigner") + "--output %s -targetIntervals %s -L %s".format(cleanedBam,intervalsToClean,contigIntervals)
} }
@ -95,7 +93,7 @@ class UnifiedGenotyperIndels extends GatkFunction {
var indelVCF: File = _ var indelVCF: File = _
// todo -- add inputs for the indel genotyper // todo -- add inputs for the indel genotyper
def commandLine = gatkCommandLine("UnifiedGenotyper") + "-varout %s -gm INDELS" def commandLine = gatkCommandLine("UnifiedGenotyper") + "-varout %s -gm INDELS".format(indelVCF)
} }
///////////////////////////////////////////////// /////////////////////////////////////////////////
@ -168,7 +166,8 @@ class ApplyVariantCuts extends GatkFunction {
var tranchFile: File = _ var tranchFile: File = _
// todo -- fdr inputs, etc // todo -- fdr inputs, etc
def commandLine = gatkCommandLine("ApplyVariantCuts") + "-B input,VCF,%s -outputVCF %s --tranchesFile %s --fdr_filter_level 10.0" def commandLine = gatkCommandLine("ApplyVariantCuts") +
"-B input,VCF,%s -outputVCF %s --tranchesFile %s --fdr_filter_level 10.0".format(recalibratedVCF,tranchedVCF,tranchFile)
} }
///////////////////////////////////////////////// /////////////////////////////////////////////////
@ -218,8 +217,8 @@ for ( bam <- inputs("bam") ) {
// put unclean bams in unclean genotypers // put unclean bams in unclean genotypers
uncleanSNPCalls.bamFiles += bam uncleanSNPCalls.bamFiles :+= bam
uncleanIndelCalls.bamFiles += bam uncleanIndelCalls.bamFiles :+= bam
// in advance, create the extension files // in advance, create the extension files
@ -229,20 +228,20 @@ for ( bam <- inputs("bam") ) {
// create the cleaning commands // create the cleaning commands
val targetCreator = new RealignerTargetCreator val targetCreator = new RealignerTargetCreator
targetCreator.bamFiles += bam targetCreator.bamFiles :+= bam
targetCreator.realignerIntervals = indel_targets targetCreator.realignerIntervals = indel_targets
val realigner = new IndelRealigner val realigner = new IndelRealigner
realigner.bamFiles = targetCreator.bamFiles realigner.bamFiles = targetCreator.bamFiles
realigner.contigIntervals = new File(parseArgs("-contigIntervals")) realigner.contigIntervals = new File(parseArgs("-contigIntervals"))
realigner.intervalsToClean = targetCreator.realignerIntervals realigner.intervalsToClean = targetCreator.realignerIntervals
realigner.numContigs = parseArgs("-numContigs").toInt realigner.scatterCount = parseArgs("-numContigs").toInt
realigner.cleanedBam = cleaned_bam realigner.cleanedBam = cleaned_bam
// put clean bams in clean genotypers // put clean bams in clean genotypers
cleanSNPCalls.bamFiles += realigner.cleanedBam cleanSNPCalls.bamFiles :+= realigner.cleanedBam
cleanIndelCalls.bamFiles += realigner.cleanedBam cleanIndelCalls.bamFiles :+= realigner.cleanedBam
add(targetCreator,realigner) add(targetCreator,realigner)
} }
@ -260,14 +259,16 @@ def endToEnd(base: String, snps: UnifiedGenotyper, indels: UnifiedGenotyperIndel
snps.trigger = new File(parseArgs("-trigger")) snps.trigger = new File(parseArgs("-trigger"))
// todo -- hack -- get this from the command line, or properties // todo -- hack -- get this from the command line, or properties
snps.compTracks :+= ( "comp1KG_CEU",new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/100328.CEU.hg18.sites.vcf") ) snps.compTracks :+= ( "comp1KG_CEU",new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/100328.CEU.hg18.sites.vcf") )
snps.scatterCount = 100 snps.scatterCount = 20
indels.indelVCF = new File(base+".indels.vcf") indels.indelVCF = new File(base+".indels.vcf")
indels.scatterCount = 100 indels.scatterCount = 20
// 1b. genomically annotate SNPs -- slow, but scatter it // 1b. genomically annotate SNPs -- slow, but scatter it
val annotated = new GenomicAnnotator val annotated = new GenomicAnnotator
annotated.inputVCF = snps.rawVCF annotated.inputVCF = snps.rawVCF
annotated.refseqTable = new File(parseArgs("-refseqTable"))
annotated.dbsnpTable = new File(parseArgs("-dbsnpTable"))
annotated.annotatedVCF = swapExt(snps.rawVCF,".vcf",".annotated.vcf") annotated.annotatedVCF = swapExt(snps.rawVCF,".vcf",".annotated.vcf")
annotated.scatterCount = 100 annotated.scatterCount = 20
// 2.a filter on cluster and near indels // 2.a filter on cluster and near indels
val masker = new VariantFiltration val masker = new VariantFiltration
masker.unfilteredVCF = annotated.annotatedVCF masker.unfilteredVCF = annotated.annotatedVCF