2011-03-18 01:09:53 +08:00
import org.broadinstitute.sting.queue.QScript
2011-03-19 06:06:52 +08:00
import org.broadinstitute.sting.queue.extensions.gatk. { RealignerTargetCreator , RodBind , IndelRealigner }
2011-03-18 01:09:53 +08:00
/* *
* Created by IntelliJ IDEA .
* User : carneiro
* Date : 3 / 1 7 / 1 1
* Time : 1 1 : 2 9 AM
* To change this template use File | Settings | File Templates .
*/
class justClean extends QScript {
@Input ( doc = "path to GenomeAnalysisTK.jar" , shortName = "gatk" , required = true )
var GATKjar : File = _
@Input ( doc = "input BAM file - or list of BAM files" , shortName = "i" , required = true )
var input : File = _
@Input ( doc = "Reference fasta file" , shortName = "R" , required = false )
var reference : File = new File ( "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta" )
@Input ( doc = "dbsnp ROD to use (VCF)" , shortName = "D" , required = false )
var dbSNP : File = new File ( "/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf" )
@Input ( doc = "extra VCF files to use as reference indels for Indel Realignment" , shortName = "indels" , required = false )
var indels : File = new File ( "/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf" )
val queueLogDir : String = ".qlog/"
def script = {
2011-03-19 06:06:52 +08:00
println ( GATKjar )
2011-03-18 01:09:53 +08:00
val outBam = swapExt ( input , ".bam" , ".Qclean.bam" )
2011-03-19 06:06:52 +08:00
val tIntervals = swapExt ( input , ".bam" , ".all_indels.intervals" )
2011-03-18 01:09:53 +08:00
val target = new RealignerTargetCreator ( )
target . input_file : += input
target . out = tIntervals
target . reference_sequence = reference
2011-03-24 22:03:51 +08:00
target . mismatchFraction = 0.0
2011-03-18 01:09:53 +08:00
target . rodBind : += RodBind ( " dbsnp " , " VCF " , dbSNP )
target . rodBind : += RodBind ( " indels " , " VCF " , indels )
2011-03-24 22:03:51 +08:00
target . memoryLimit = 6
2011-03-18 01:09:53 +08:00
target . jobName = queueLogDir + tIntervals + ".atarget"
target . jarFile = GATKjar
target . scatterCount = 84
2011-03-19 06:06:52 +08:00
2011-03-18 01:09:53 +08:00
val clean = new IndelRealigner ( )
clean . input_file : += input
clean . targetIntervals = tIntervals
clean . out = outBam
clean . reference_sequence = reference
clean . rodBind : += RodBind ( " dbsnp " , " VCF " , dbSNP )
clean . rodBind : += RodBind ( " indels " , " VCF " , indels )
clean . doNotUseSW = true
2011-03-24 22:03:51 +08:00
clean . compress = 0
clean . U = org . broadinstitute . sting . gatk . arguments . ValidationExclusion . TYPE . NO_READ_ORDER_VERIFICATION // todo -- update clean with the last consensus between Tim, Matt and Eric. This is ugly!
2011-03-18 01:09:53 +08:00
clean . jobName = queueLogDir + outBam + ".clean"
clean . jarFile = GATKjar
2011-03-24 22:03:51 +08:00
clean . memoryLimit = 12
2011-03-18 01:09:53 +08:00
clean . scatterCount = 84
2011-03-19 06:06:52 +08:00
add ( clean ) ;
2011-03-18 01:09:53 +08:00
}
}