More Queue scripts for analysis

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4260 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-09-12 14:04:10 +00:00
parent 5de1124997
commit 3c5b8730d5
3 changed files with 78 additions and 9 deletions

View File

@ -38,6 +38,8 @@ class Target(project: String, snpVCF: String, indelVCF: String, calledGenome: Do
def getIndelEval = getEval("indels")
}
val RELEASE = "/humgen/1kg/DCC/ftp/release/2010_07/"
var targets: List[Target] = List()
val p1Targets = List(("CEU", 2.43e9), ("YRI", 2.39e9), ("CHBJPT", 2.41e9))
@ -48,12 +50,12 @@ for ( (pop: String,called) <- p1Targets )
// pilot 2
val p2Targets = List(("CEU", 2.264e9), ("YRI", 2.214e9))
for ( (pop: String, called) <- p2Targets )
targets ::= new Target("SRP000032", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/trio/snps/" + pop + ".trio.2010_03.genotypes.vcf.gz", "v1/dindel-v2/"+pop+".trio.2010_06.indel.genotypes.vcf", called, 2.85e9, pop, "pilot2")
targets ::= new Target("SRP000032", RELEASE + "trio/snps/" + pop + ".trio.2010_03.genotypes.vcf.gz", "v1/dindel-v2/"+pop+".trio.2010_06.indel.genotypes.vcf", called, 2.85e9, pop, "pilot2")
// pilot 3
for (pop <- List("CEU", "CHB", "CHD", "JPT", "LWK", "TSI", "YRI")) {
val indels = if ( pop != "LWK" ) "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/indel/"+pop+".exon.2010_06.genotypes.vcf.gz" else null
targets ::= new Target("SRP000033", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/snps/" + pop + ".exon.2010_03.genotypes.vcf.gz", indels, 1.43e6, 1.43e6, pop, "pilot3", "/humgen/gsa-hpprojects/1kg/1kg_pilot3/useTheseBamsForAnalysis/pilot3.%s.cleaned.bam".format(pop))
val indels = if ( pop != "LWK" ) "exon/indel/"+pop+".exon.2010_06.genotypes.vcf.gz" else null
targets ::= new Target("SRP000033", "exon/snps/" + pop + ".exon.2010_03.genotypes.vcf.gz", indels, 1.43e6, 1.43e6, pop, "pilot3", "/humgen/gsa-hpprojects/1kg/1kg_pilot3/useTheseBamsForAnalysis/pilot3.%s.cleaned.bam".format(pop))
}
// merged files
@ -72,9 +74,9 @@ def script = stage match {
case "ALL" =>
// initial pilot1 merge -- autosomes + x
for ( (pop: String,called) <- p1Targets ) {
val auto = "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/low_coverage/snps/"+ pop +".low_coverage.2010_07.genotypes.vcf.gz"
val auto = RELEASE + "low_coverage/snps/"+ pop +".low_coverage.2010_07.genotypes.vcf.gz"
// todo -- remove fixed when Laura gives us the official calls
val x = "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/low_coverage/snps/"+ pop +".low_coverage.2010_07.xchr.fixed.genotypes.vcf"
val x = RELEASE + "low_coverage/snps/"+ pop +".low_coverage.2010_07.xchr.fixed.genotypes.vcf"
val combineSNPs = new Combine(List(auto, x), pop + ".pilot1.vcf")
add(combineSNPs)
}
@ -98,12 +100,14 @@ def script = stage match {
//add(new Combine(pilots.map(_ + ".snps.merged.vcf"), snps))
add(new Combine(pilots.map(_ + ".indels.merged.vcf"), indels))
case "EVAL" =>
// VariantEval of the SNPs
//for (target <- targets) {
// add(new VariantEval(target.getSNPVCF, target.getSNPEval))
// add(new StatPop(target))
//}
for (target <- targets) {
add(new VariantEval(target.getSNPVCF, target.getSNPEval))
//add(new StatPop(target))
}
case "DOC" =>
for (target <- targets) {
@ -124,7 +128,9 @@ class VariantEval(vcfIn: String, evalOut: String, vcfType: String = "VCF") exten
this.out = new File(evalOut)
this.DBSNP = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_129_b36.rod")
this.reportType = Some(VE2TemplateType.Grep)
this.noStandard = true;
this.evalModule :+= "CompOverlap"
this.memoryLimit = Some(3)
override def dotString = "VariantEval: " + vcfFile.getName
}

View File

@ -32,8 +32,19 @@ def script = {
add(sublist)
add(mergeSublist)
add(new Index(mergeSublist.o) )
// SNP calling
add(new Call(sublist.list, nSamples, "dynamic_merge"))
add(new Call(mergeSublist.o, nSamples, "pre_merge"))
// SNP calling -- no annotations
add(new Call(sublist.list, nSamples, "dynamic_merge_no_annotations") { this.G :+= "None"; })
add(new Call(mergeSublist.o, nSamples, "pre_merge_no_annotations") { this.G :+= "None"; })
// CountLoci
add(new MyCountLoci(sublist.list, nSamples, "dynamic_merge"))
add(new MyCountLoci(mergeSublist.o, nSamples, "pre_merge"))
}
}
@ -57,6 +68,14 @@ class Call(@Input(doc="foo") bamList: File, n: Int, name: String) extends Unifie
this.o = outVCF
}
class MyCountLoci(@Input(doc="foo") bamList: File, n: Int, name: String) extends CountLoci with UNIVERSAL_GATK_ARGS {
@Output(doc="foo") var outFile: File = new File("%s.%d.%s.txt".format(bamList.getName, n, name))
this.memoryLimit = Some(4)
this.input_file :+= bamList
this.jobQueue = "gsa"
this.o = outFile
}
class SliceList(n: Int) extends CommandLineFunction {
@Output(doc="foo") var list: File = new File("bams.%d.list".format(n))
this.jobQueue = "gsa"

View File

@ -0,0 +1,44 @@
import java.io.File
import org.broadinstitute.sting.commandline.Argument
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.extensions.samtools._
class ManySampleUGPerformanceTesting extends QScript {
@Argument(doc="gatkJarFile", required=false)
var gatkJarFile: File = new File("/home/radon01/depristo/dev/GenomeAnalysisTKStable/trunk/dist/GenomeAnalysisTK.jar")
@Argument(shortName = "R", doc="ref", required=false)
var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
val TARGET_INTERVAL = "my.intervals"
val TEST_BAM_LIST = new File("ten.bam.list")
val FULL_BAM_LIST = new File("/humgen/1kg/processing/allPopulations_chr20_june_release/allPopulations.june.bam.list")
val BAM_LIST = FULL_BAM_LIST
val HM3 = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.2/genotypes_r27_nr.hg19_fwd.vcf")
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
this.logging_level = "INFO";
this.jarFile = gatkJarFile;
this.intervals = new File(TARGET_INTERVAL);
this.reference_sequence = referenceFile;
this.jobQueue = "gsa";
this.et = Option(org.broadinstitute.sting.gatk.phonehome.GATKRunReport.PhoneHomeOption.STANDARD);
this.dcov = Option(50);
}
def script = {
// SNP calling
add(new MyQSample(BAM_LIST));
}
class MyQSample(@Input(doc="foo") bamList: File) extends QSample with UNIVERSAL_GATK_ARGS {
this.memoryLimit = Some(4)
this.input_file :+= bamList
//this.BTI = "genotypes"
this.nt = Option(10)
this.rodBind :+= RodBind("genotypes", "VCF", HM3)
this.o = new File("%s.qsample".format(bamList.getName))
}
}