More Queue scripts for analysis
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4260 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5de1124997
commit
3c5b8730d5
|
|
@ -38,6 +38,8 @@ class Target(project: String, snpVCF: String, indelVCF: String, calledGenome: Do
|
|||
def getIndelEval = getEval("indels")
|
||||
}
|
||||
|
||||
val RELEASE = "/humgen/1kg/DCC/ftp/release/2010_07/"
|
||||
|
||||
var targets: List[Target] = List()
|
||||
|
||||
val p1Targets = List(("CEU", 2.43e9), ("YRI", 2.39e9), ("CHBJPT", 2.41e9))
|
||||
|
|
@ -48,12 +50,12 @@ for ( (pop: String,called) <- p1Targets )
|
|||
// pilot 2
|
||||
val p2Targets = List(("CEU", 2.264e9), ("YRI", 2.214e9))
|
||||
for ( (pop: String, called) <- p2Targets )
|
||||
targets ::= new Target("SRP000032", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/trio/snps/" + pop + ".trio.2010_03.genotypes.vcf.gz", "v1/dindel-v2/"+pop+".trio.2010_06.indel.genotypes.vcf", called, 2.85e9, pop, "pilot2")
|
||||
targets ::= new Target("SRP000032", RELEASE + "trio/snps/" + pop + ".trio.2010_03.genotypes.vcf.gz", "v1/dindel-v2/"+pop+".trio.2010_06.indel.genotypes.vcf", called, 2.85e9, pop, "pilot2")
|
||||
|
||||
// pilot 3
|
||||
for (pop <- List("CEU", "CHB", "CHD", "JPT", "LWK", "TSI", "YRI")) {
|
||||
val indels = if ( pop != "LWK" ) "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/indel/"+pop+".exon.2010_06.genotypes.vcf.gz" else null
|
||||
targets ::= new Target("SRP000033", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/snps/" + pop + ".exon.2010_03.genotypes.vcf.gz", indels, 1.43e6, 1.43e6, pop, "pilot3", "/humgen/gsa-hpprojects/1kg/1kg_pilot3/useTheseBamsForAnalysis/pilot3.%s.cleaned.bam".format(pop))
|
||||
val indels = if ( pop != "LWK" ) "exon/indel/"+pop+".exon.2010_06.genotypes.vcf.gz" else null
|
||||
targets ::= new Target("SRP000033", "exon/snps/" + pop + ".exon.2010_03.genotypes.vcf.gz", indels, 1.43e6, 1.43e6, pop, "pilot3", "/humgen/gsa-hpprojects/1kg/1kg_pilot3/useTheseBamsForAnalysis/pilot3.%s.cleaned.bam".format(pop))
|
||||
}
|
||||
|
||||
// merged files
|
||||
|
|
@ -72,9 +74,9 @@ def script = stage match {
|
|||
case "ALL" =>
|
||||
// initial pilot1 merge -- autosomes + x
|
||||
for ( (pop: String,called) <- p1Targets ) {
|
||||
val auto = "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/low_coverage/snps/"+ pop +".low_coverage.2010_07.genotypes.vcf.gz"
|
||||
val auto = RELEASE + "low_coverage/snps/"+ pop +".low_coverage.2010_07.genotypes.vcf.gz"
|
||||
// todo -- remove fixed when Laura gives us the official calls
|
||||
val x = "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/low_coverage/snps/"+ pop +".low_coverage.2010_07.xchr.fixed.genotypes.vcf"
|
||||
val x = RELEASE + "low_coverage/snps/"+ pop +".low_coverage.2010_07.xchr.fixed.genotypes.vcf"
|
||||
val combineSNPs = new Combine(List(auto, x), pop + ".pilot1.vcf")
|
||||
add(combineSNPs)
|
||||
}
|
||||
|
|
@ -98,12 +100,14 @@ def script = stage match {
|
|||
|
||||
//add(new Combine(pilots.map(_ + ".snps.merged.vcf"), snps))
|
||||
add(new Combine(pilots.map(_ + ".indels.merged.vcf"), indels))
|
||||
|
||||
|
||||
case "EVAL" =>
|
||||
// VariantEval of the SNPs
|
||||
//for (target <- targets) {
|
||||
// add(new VariantEval(target.getSNPVCF, target.getSNPEval))
|
||||
// add(new StatPop(target))
|
||||
//}
|
||||
for (target <- targets) {
|
||||
add(new VariantEval(target.getSNPVCF, target.getSNPEval))
|
||||
//add(new StatPop(target))
|
||||
}
|
||||
|
||||
case "DOC" =>
|
||||
for (target <- targets) {
|
||||
|
|
@ -124,7 +128,9 @@ class VariantEval(vcfIn: String, evalOut: String, vcfType: String = "VCF") exten
|
|||
this.out = new File(evalOut)
|
||||
this.DBSNP = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_129_b36.rod")
|
||||
this.reportType = Some(VE2TemplateType.Grep)
|
||||
this.noStandard = true;
|
||||
this.evalModule :+= "CompOverlap"
|
||||
this.memoryLimit = Some(3)
|
||||
|
||||
override def dotString = "VariantEval: " + vcfFile.getName
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,8 +32,19 @@ def script = {
|
|||
add(sublist)
|
||||
add(mergeSublist)
|
||||
add(new Index(mergeSublist.o) )
|
||||
|
||||
// SNP calling
|
||||
add(new Call(sublist.list, nSamples, "dynamic_merge"))
|
||||
add(new Call(mergeSublist.o, nSamples, "pre_merge"))
|
||||
|
||||
// SNP calling -- no annotations
|
||||
add(new Call(sublist.list, nSamples, "dynamic_merge_no_annotations") { this.G :+= "None"; })
|
||||
add(new Call(mergeSublist.o, nSamples, "pre_merge_no_annotations") { this.G :+= "None"; })
|
||||
|
||||
// CountLoci
|
||||
add(new MyCountLoci(sublist.list, nSamples, "dynamic_merge"))
|
||||
add(new MyCountLoci(mergeSublist.o, nSamples, "pre_merge"))
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -57,6 +68,14 @@ class Call(@Input(doc="foo") bamList: File, n: Int, name: String) extends Unifie
|
|||
this.o = outVCF
|
||||
}
|
||||
|
||||
class MyCountLoci(@Input(doc="foo") bamList: File, n: Int, name: String) extends CountLoci with UNIVERSAL_GATK_ARGS {
|
||||
@Output(doc="foo") var outFile: File = new File("%s.%d.%s.txt".format(bamList.getName, n, name))
|
||||
this.memoryLimit = Some(4)
|
||||
this.input_file :+= bamList
|
||||
this.jobQueue = "gsa"
|
||||
this.o = outFile
|
||||
}
|
||||
|
||||
class SliceList(n: Int) extends CommandLineFunction {
|
||||
@Output(doc="foo") var list: File = new File("bams.%d.list".format(n))
|
||||
this.jobQueue = "gsa"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,44 @@
|
|||
import java.io.File
|
||||
import org.broadinstitute.sting.commandline.Argument
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.extensions.samtools._
|
||||
|
||||
class ManySampleUGPerformanceTesting extends QScript {
|
||||
@Argument(doc="gatkJarFile", required=false)
|
||||
var gatkJarFile: File = new File("/home/radon01/depristo/dev/GenomeAnalysisTKStable/trunk/dist/GenomeAnalysisTK.jar")
|
||||
|
||||
@Argument(shortName = "R", doc="ref", required=false)
|
||||
var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
|
||||
|
||||
val TARGET_INTERVAL = "my.intervals"
|
||||
val TEST_BAM_LIST = new File("ten.bam.list")
|
||||
val FULL_BAM_LIST = new File("/humgen/1kg/processing/allPopulations_chr20_june_release/allPopulations.june.bam.list")
|
||||
val BAM_LIST = FULL_BAM_LIST
|
||||
val HM3 = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.2/genotypes_r27_nr.hg19_fwd.vcf")
|
||||
|
||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
||||
this.logging_level = "INFO";
|
||||
this.jarFile = gatkJarFile;
|
||||
this.intervals = new File(TARGET_INTERVAL);
|
||||
this.reference_sequence = referenceFile;
|
||||
this.jobQueue = "gsa";
|
||||
this.et = Option(org.broadinstitute.sting.gatk.phonehome.GATKRunReport.PhoneHomeOption.STANDARD);
|
||||
this.dcov = Option(50);
|
||||
}
|
||||
|
||||
def script = {
|
||||
// SNP calling
|
||||
add(new MyQSample(BAM_LIST));
|
||||
}
|
||||
|
||||
class MyQSample(@Input(doc="foo") bamList: File) extends QSample with UNIVERSAL_GATK_ARGS {
|
||||
this.memoryLimit = Some(4)
|
||||
this.input_file :+= bamList
|
||||
//this.BTI = "genotypes"
|
||||
this.nt = Option(10)
|
||||
this.rodBind :+= RodBind("genotypes", "VCF", HM3)
|
||||
this.o = new File("%s.qsample".format(bamList.getName))
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue