Full pipeline now works through DAG creation. First draft; more work to do to make it cleaner and better command-line input handling (and properties handling); but the DAG is rendered and looks good.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3898 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5af986e0c1
commit
7a5ee485d2
|
|
@ -39,8 +39,6 @@ class RealignerTargetCreator extends GatkFunction {
|
||||||
class IndelRealigner extends GatkFunction {
|
class IndelRealigner extends GatkFunction {
|
||||||
@Input(doc="Intervals to clean")
|
@Input(doc="Intervals to clean")
|
||||||
var intervalsToClean: File = _
|
var intervalsToClean: File = _
|
||||||
@Input(doc="Number of contigs in the contig intervals",required=false)
|
|
||||||
var numContigs: Int = 24
|
|
||||||
@Scatter(classOf[ContigScatterFunction])
|
@Scatter(classOf[ContigScatterFunction])
|
||||||
@Input(doc="Contig intervals")
|
@Input(doc="Contig intervals")
|
||||||
var contigIntervals: File = _
|
var contigIntervals: File = _
|
||||||
|
|
@ -48,7 +46,7 @@ class IndelRealigner extends GatkFunction {
|
||||||
@Output(doc="Cleaned bam file")
|
@Output(doc="Cleaned bam file")
|
||||||
var cleanedBam: File = _
|
var cleanedBam: File = _
|
||||||
|
|
||||||
this.scatterCount = numContigs
|
this.javaTmpDir = parseArgs("-tmpdir") // todo -- hack, move into script or something
|
||||||
|
|
||||||
def commandLine = gatkCommandLine("IndelRealigner") + "--output %s -targetIntervals %s -L %s".format(cleanedBam,intervalsToClean,contigIntervals)
|
def commandLine = gatkCommandLine("IndelRealigner") + "--output %s -targetIntervals %s -L %s".format(cleanedBam,intervalsToClean,contigIntervals)
|
||||||
}
|
}
|
||||||
|
|
@ -95,7 +93,7 @@ class UnifiedGenotyperIndels extends GatkFunction {
|
||||||
var indelVCF: File = _
|
var indelVCF: File = _
|
||||||
// todo -- add inputs for the indel genotyper
|
// todo -- add inputs for the indel genotyper
|
||||||
|
|
||||||
def commandLine = gatkCommandLine("UnifiedGenotyper") + "-varout %s -gm INDELS"
|
def commandLine = gatkCommandLine("UnifiedGenotyper") + "-varout %s -gm INDELS".format(indelVCF)
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////////////
|
/////////////////////////////////////////////////
|
||||||
|
|
@ -168,7 +166,8 @@ class ApplyVariantCuts extends GatkFunction {
|
||||||
var tranchFile: File = _
|
var tranchFile: File = _
|
||||||
// todo -- fdr inputs, etc
|
// todo -- fdr inputs, etc
|
||||||
|
|
||||||
def commandLine = gatkCommandLine("ApplyVariantCuts") + "-B input,VCF,%s -outputVCF %s --tranchesFile %s --fdr_filter_level 10.0"
|
def commandLine = gatkCommandLine("ApplyVariantCuts") +
|
||||||
|
"-B input,VCF,%s -outputVCF %s --tranchesFile %s --fdr_filter_level 10.0".format(recalibratedVCF,tranchedVCF,tranchFile)
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////////////
|
/////////////////////////////////////////////////
|
||||||
|
|
@ -218,8 +217,8 @@ for ( bam <- inputs("bam") ) {
|
||||||
|
|
||||||
// put unclean bams in unclean genotypers
|
// put unclean bams in unclean genotypers
|
||||||
|
|
||||||
uncleanSNPCalls.bamFiles += bam
|
uncleanSNPCalls.bamFiles :+= bam
|
||||||
uncleanIndelCalls.bamFiles += bam
|
uncleanIndelCalls.bamFiles :+= bam
|
||||||
|
|
||||||
// in advance, create the extension files
|
// in advance, create the extension files
|
||||||
|
|
||||||
|
|
@ -229,20 +228,20 @@ for ( bam <- inputs("bam") ) {
|
||||||
// create the cleaning commands
|
// create the cleaning commands
|
||||||
|
|
||||||
val targetCreator = new RealignerTargetCreator
|
val targetCreator = new RealignerTargetCreator
|
||||||
targetCreator.bamFiles += bam
|
targetCreator.bamFiles :+= bam
|
||||||
targetCreator.realignerIntervals = indel_targets
|
targetCreator.realignerIntervals = indel_targets
|
||||||
|
|
||||||
val realigner = new IndelRealigner
|
val realigner = new IndelRealigner
|
||||||
realigner.bamFiles = targetCreator.bamFiles
|
realigner.bamFiles = targetCreator.bamFiles
|
||||||
realigner.contigIntervals = new File(parseArgs("-contigIntervals"))
|
realigner.contigIntervals = new File(parseArgs("-contigIntervals"))
|
||||||
realigner.intervalsToClean = targetCreator.realignerIntervals
|
realigner.intervalsToClean = targetCreator.realignerIntervals
|
||||||
realigner.numContigs = parseArgs("-numContigs").toInt
|
realigner.scatterCount = parseArgs("-numContigs").toInt
|
||||||
realigner.cleanedBam = cleaned_bam
|
realigner.cleanedBam = cleaned_bam
|
||||||
|
|
||||||
// put clean bams in clean genotypers
|
// put clean bams in clean genotypers
|
||||||
|
|
||||||
cleanSNPCalls.bamFiles += realigner.cleanedBam
|
cleanSNPCalls.bamFiles :+= realigner.cleanedBam
|
||||||
cleanIndelCalls.bamFiles += realigner.cleanedBam
|
cleanIndelCalls.bamFiles :+= realigner.cleanedBam
|
||||||
|
|
||||||
add(targetCreator,realigner)
|
add(targetCreator,realigner)
|
||||||
}
|
}
|
||||||
|
|
@ -260,14 +259,16 @@ def endToEnd(base: String, snps: UnifiedGenotyper, indels: UnifiedGenotyperIndel
|
||||||
snps.trigger = new File(parseArgs("-trigger"))
|
snps.trigger = new File(parseArgs("-trigger"))
|
||||||
// todo -- hack -- get this from the command line, or properties
|
// todo -- hack -- get this from the command line, or properties
|
||||||
snps.compTracks :+= ( "comp1KG_CEU",new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/100328.CEU.hg18.sites.vcf") )
|
snps.compTracks :+= ( "comp1KG_CEU",new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/100328.CEU.hg18.sites.vcf") )
|
||||||
snps.scatterCount = 100
|
snps.scatterCount = 20
|
||||||
indels.indelVCF = new File(base+".indels.vcf")
|
indels.indelVCF = new File(base+".indels.vcf")
|
||||||
indels.scatterCount = 100
|
indels.scatterCount = 20
|
||||||
// 1b. genomically annotate SNPs -- slow, but scatter it
|
// 1b. genomically annotate SNPs -- slow, but scatter it
|
||||||
val annotated = new GenomicAnnotator
|
val annotated = new GenomicAnnotator
|
||||||
annotated.inputVCF = snps.rawVCF
|
annotated.inputVCF = snps.rawVCF
|
||||||
|
annotated.refseqTable = new File(parseArgs("-refseqTable"))
|
||||||
|
annotated.dbsnpTable = new File(parseArgs("-dbsnpTable"))
|
||||||
annotated.annotatedVCF = swapExt(snps.rawVCF,".vcf",".annotated.vcf")
|
annotated.annotatedVCF = swapExt(snps.rawVCF,".vcf",".annotated.vcf")
|
||||||
annotated.scatterCount = 100
|
annotated.scatterCount = 20
|
||||||
// 2.a filter on cluster and near indels
|
// 2.a filter on cluster and near indels
|
||||||
val masker = new VariantFiltration
|
val masker = new VariantFiltration
|
||||||
masker.unfilteredVCF = annotated.annotatedVCF
|
masker.unfilteredVCF = annotated.annotatedVCF
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue