From 122d5845d3d075690e3c1e1abb7f3d386860daba Mon Sep 17 00:00:00 2001 From: depristo Date: Wed, 27 Apr 2011 22:01:36 +0000 Subject: [PATCH] GATK Resource bundle, latest version (now with b37 -> b36 support). Oneoff scala script that assesses chip coverage of calls git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5703 348d0f76-0448-11de-a6fe-93d51630548a --- scala/qscript/core/GATKResourcesBundle.scala | 19 ++-- .../depristo/AssessChipCoverageOfPanel.q | 89 +++++++++++++++++++ 2 files changed, 99 insertions(+), 9 deletions(-) create mode 100755 scala/qscript/oneoffs/depristo/AssessChipCoverageOfPanel.q diff --git a/scala/qscript/core/GATKResourcesBundle.scala b/scala/qscript/core/GATKResourcesBundle.scala index 815779383..7b372c495 100755 --- a/scala/qscript/core/GATKResourcesBundle.scala +++ b/scala/qscript/core/GATKResourcesBundle.scala @@ -39,6 +39,7 @@ class GATKResourcesBundle extends QScript { class Reference( val name: String, val file: File ) { } var b37: Reference = _ var hg18: Reference = _ + var b36: Reference = _ var exampleFASTA: Reference = _ var refs: List[Reference] = _ @@ -54,12 +55,12 @@ class GATKResourcesBundle extends QScript { def liftover(in: File, inRef: Reference, out: File, outRef: Reference): CommandLineFunction = { //Console.printf("liftover(%s => %s)%n", inRef.name, outRef.name) (inRef.name, outRef.name) match { -// case ("b37", "hg19") => -// return new HGRFC2UCSC(in, out) + case ("b37", "hg19") => + return new LiftOverPerl(in, out, new File("chainFiles/b37Tohg19.chain"), inRef, outRef) case ("b37", "hg18") => return new LiftOverPerl(in, out, new File("chainFiles/b37tohg18.chain"), inRef, outRef) -// case ("b37", "b36") => -// return new LiftOverPerl(in, out, new File("chainFiles/b37Tob36.chain"), inRef, outRef) + case ("b37", "b36") => + return new LiftOverPerl(in, out, new File("chainFiles/b37tob36.chain"), inRef, outRef) case _ => return null } } @@ -101,10 +102,12 @@ class GATKResourcesBundle extends QScript { // b37 = new Reference("b37", new File("/humgen/1kg/reference/human_g1k_v37.fasta")) hg18 = new Reference("hg18", new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")) + b36 = new Reference("b36", new File("/humgen/1kg/reference/human_b36_both.fasta")) exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta")) - refs = List(b37, hg18, exampleFASTA) + refs = List(b37, hg18, b36, exampleFASTA) addResource(new Resource(b37.file, "", b37, false)) + addResource(new Resource(b36.file, "", b36, false)) addResource(new Resource(hg18.file, "", hg18, false)) // @@ -134,6 +137,8 @@ class GATKResourcesBundle extends QScript { addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/refGene_b37.sorted.txt", "refGene", b37, true, false)) + addResource(new Resource("chainFiles/hg18tob37.chain", "", hg18, false, false)) + addResource(new Resource("chainFiles/b36tob37.chain", "", b36, false, false)) // todo -- chain files? // todo 1000G SNP and indel call sets? @@ -304,10 +309,6 @@ class GATKResourcesBundle extends QScript { oldRef.file.replace(".fasta", ""), jobTempDir) } -// class HGRFC2UCSC(@Input val in: File, @Output val out: File) extends CommandLineFunction { -// def commandLine = "python ../python/vcf_b36_to_hg18.py %s %s".format(in.getAbsolutePath, out.getAbsolutePath) -// } - def getExtension(f: File): String = { val i = f.getName.lastIndexOf('.'); if (i > 0 && i < f.getName.length() - 1) diff --git a/scala/qscript/oneoffs/depristo/AssessChipCoverageOfPanel.q b/scala/qscript/oneoffs/depristo/AssessChipCoverageOfPanel.q new file mode 100755 index 000000000..feabc48d1 --- /dev/null +++ b/scala/qscript/oneoffs/depristo/AssessChipCoverageOfPanel.q @@ -0,0 +1,89 @@ +package oneoffs.depristo + +import org.broadinstitute.sting.queue.extensions.gatk._ +import org.broadinstitute.sting.queue.QScript + +class AssessChipCoverageOfPanel extends QScript { + qscript => + @Argument(doc="Path to GATK jar",required=false,shortName="gatkjarfile") var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar") + @Argument(doc="Panel VCF",required=true,shortName="panelVCF") var panelVCF: File = _ + @Argument(doc="BAM File",required=true, shortName="bam") var bam: File = null + @Argument(doc="Bundle path",required=false, shortName="bundle") var bundle: File = new File("/humgen/gsa-hpprojects/GATK/bundle/current/b37/") + + @Argument(shortName = "R", doc="ref", required=true) + var referenceFile: File = _ + + @Argument(shortName = "L", doc="intervals", required=false) + val TARGET_INTERVAL: String = null; + + def HM3_VCF: File = new File(bundle + "/hapmap_3.3.b37.sites.vcf") + def OMNI_VCF: File = new File(bundle + "/1000G_omni2.5.b37.sites.vcf") + + trait GATKArgs extends CommandLineGATK { + this.logging_level = "INFO"; + this.jarFile = gatkJarFile; + if ( TARGET_INTERVAL != null ) + this.intervalsString = List(TARGET_INTERVAL); + this.reference_sequence = referenceFile; + this.memoryLimit = 2 + } + + // -------------------------------------------------------------------------------- + // + // GENOTYPING SPECIFIC SITES IN A BAM FILE + // + // -------------------------------------------------------------------------------- + + class GenotypeBAMAtSites(@Input bam: File, @Input sitesVCF: File, @Output genotypesVCF: File) extends UnifiedGenotyper with GATKArgs { + this.input_file = List(bam) + this.o = genotypesVCF + this.stand_call_conf = 0.0 + this.out_mode = org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES + this.gt_mode = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES + this.rodBind :+= new RodBind("alleles","VCF",sitesVCF) + + // we only want chromosome counts annotations + this.BTI = "alleles" + this.G = List("none") + this.A :+= "ChromosomeCounts" + this.nsl = true + + // make sure we have the right intervals + this.BTIMR = org.broadinstitute.sting.utils.interval.IntervalSetRule.INTERSECTION + } + + // todo -- include AF from 1000G in annotation + // todo -- distance from SNP to nearest chip SNP + + class EvalCalls(@Input vcf: File) extends VariantEval with GATKArgs { + this.o = swapExt(vcf, ".vcf", ".vcf.eval") + this.rodBind :+= RodBind("eval", "VCF", vcf) + this.rodBind :+= RodBind("compOMNI", "VCF", OMNI_VCF) + this.rodBind :+= RodBind("compHapMap3", "VCF", HM3_VCF) + } + + class AnnotateCalls(@Input vcf: File, @Output file: File) extends VariantAnnotator with GATKArgs { + this.o = file + this.rodBind :+= RodBind("variant", "VCF", vcf) + this.rodBind :+= RodBind("compOMNI", "VCF", OMNI_VCF) + this.rodBind :+= RodBind("compHapMap3", "VCF", HM3_VCF) + } + + class MakeTable(@Input vcf: File, @Output table: File) extends VariantsToTable with GATKArgs { + this.o = table + this.rodBind :+= RodBind("variants", "VCF", vcf) + this.allowMissingData = true + this.fields = List("CHROM", "POS", "REF", "ALT", "TRANSITION", "HapMap3", "OMNI", "AC", "AN", "AF") + } + + def script = { + val genotyped = new File(swapExt(bam, ".bam", "_genotyped_at." + panelVCF.getName).getName) + val annotated = new File(swapExt(genotyped, ".vcf", ".annotated.vcf")) + val table = new File(swapExt(annotated, ".vcf", ".vcf.table")) + + add(new GenotypeBAMAtSites(bam, panelVCF, genotyped)) + add(new AnnotateCalls(genotyped, annotated)) + add(new EvalCalls(annotated)) + add(new MakeTable(annotated, table)) + } +} \ No newline at end of file