GATK Resource bundle, latest version (now with b37 -> b36 support). Oneoff scala script that assesses chip coverage of calls
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5703 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6cce3e00f3
commit
122d5845d3
|
|
@ -39,6 +39,7 @@ class GATKResourcesBundle extends QScript {
|
|||
class Reference( val name: String, val file: File ) { }
|
||||
var b37: Reference = _
|
||||
var hg18: Reference = _
|
||||
var b36: Reference = _
|
||||
var exampleFASTA: Reference = _
|
||||
var refs: List[Reference] = _
|
||||
|
||||
|
|
@ -54,12 +55,12 @@ class GATKResourcesBundle extends QScript {
|
|||
def liftover(in: File, inRef: Reference, out: File, outRef: Reference): CommandLineFunction = {
|
||||
//Console.printf("liftover(%s => %s)%n", inRef.name, outRef.name)
|
||||
(inRef.name, outRef.name) match {
|
||||
// case ("b37", "hg19") =>
|
||||
// return new HGRFC2UCSC(in, out)
|
||||
case ("b37", "hg19") =>
|
||||
return new LiftOverPerl(in, out, new File("chainFiles/b37Tohg19.chain"), inRef, outRef)
|
||||
case ("b37", "hg18") =>
|
||||
return new LiftOverPerl(in, out, new File("chainFiles/b37tohg18.chain"), inRef, outRef)
|
||||
// case ("b37", "b36") =>
|
||||
// return new LiftOverPerl(in, out, new File("chainFiles/b37Tob36.chain"), inRef, outRef)
|
||||
case ("b37", "b36") =>
|
||||
return new LiftOverPerl(in, out, new File("chainFiles/b37tob36.chain"), inRef, outRef)
|
||||
case _ => return null
|
||||
}
|
||||
}
|
||||
|
|
@ -101,10 +102,12 @@ class GATKResourcesBundle extends QScript {
|
|||
//
|
||||
b37 = new Reference("b37", new File("/humgen/1kg/reference/human_g1k_v37.fasta"))
|
||||
hg18 = new Reference("hg18", new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"))
|
||||
b36 = new Reference("b36", new File("/humgen/1kg/reference/human_b36_both.fasta"))
|
||||
exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta"))
|
||||
refs = List(b37, hg18, exampleFASTA)
|
||||
refs = List(b37, hg18, b36, exampleFASTA)
|
||||
|
||||
addResource(new Resource(b37.file, "", b37, false))
|
||||
addResource(new Resource(b36.file, "", b36, false))
|
||||
addResource(new Resource(hg18.file, "", hg18, false))
|
||||
|
||||
//
|
||||
|
|
@ -134,6 +137,8 @@ class GATKResourcesBundle extends QScript {
|
|||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/refGene_b37.sorted.txt",
|
||||
"refGene", b37, true, false))
|
||||
|
||||
addResource(new Resource("chainFiles/hg18tob37.chain", "", hg18, false, false))
|
||||
addResource(new Resource("chainFiles/b36tob37.chain", "", b36, false, false))
|
||||
|
||||
// todo -- chain files?
|
||||
// todo 1000G SNP and indel call sets?
|
||||
|
|
@ -304,10 +309,6 @@ class GATKResourcesBundle extends QScript {
|
|||
oldRef.file.replace(".fasta", ""), jobTempDir)
|
||||
}
|
||||
|
||||
// class HGRFC2UCSC(@Input val in: File, @Output val out: File) extends CommandLineFunction {
|
||||
// def commandLine = "python ../python/vcf_b36_to_hg18.py %s %s".format(in.getAbsolutePath, out.getAbsolutePath)
|
||||
// }
|
||||
|
||||
def getExtension(f: File): String = {
|
||||
val i = f.getName.lastIndexOf('.');
|
||||
if (i > 0 && i < f.getName.length() - 1)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,89 @@
|
|||
package oneoffs.depristo
|
||||
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
|
||||
class AssessChipCoverageOfPanel extends QScript {
|
||||
qscript =>
|
||||
@Argument(doc="Path to GATK jar",required=false,shortName="gatkjarfile") var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar")
|
||||
@Argument(doc="Panel VCF",required=true,shortName="panelVCF") var panelVCF: File = _
|
||||
@Argument(doc="BAM File",required=true, shortName="bam") var bam: File = null
|
||||
@Argument(doc="Bundle path",required=false, shortName="bundle") var bundle: File = new File("/humgen/gsa-hpprojects/GATK/bundle/current/b37/")
|
||||
|
||||
@Argument(shortName = "R", doc="ref", required=true)
|
||||
var referenceFile: File = _
|
||||
|
||||
@Argument(shortName = "L", doc="intervals", required=false)
|
||||
val TARGET_INTERVAL: String = null;
|
||||
|
||||
def HM3_VCF: File = new File(bundle + "/hapmap_3.3.b37.sites.vcf")
|
||||
def OMNI_VCF: File = new File(bundle + "/1000G_omni2.5.b37.sites.vcf")
|
||||
|
||||
trait GATKArgs extends CommandLineGATK {
|
||||
this.logging_level = "INFO";
|
||||
this.jarFile = gatkJarFile;
|
||||
if ( TARGET_INTERVAL != null )
|
||||
this.intervalsString = List(TARGET_INTERVAL);
|
||||
this.reference_sequence = referenceFile;
|
||||
this.memoryLimit = 2
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// GENOTYPING SPECIFIC SITES IN A BAM FILE
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
class GenotypeBAMAtSites(@Input bam: File, @Input sitesVCF: File, @Output genotypesVCF: File) extends UnifiedGenotyper with GATKArgs {
|
||||
this.input_file = List(bam)
|
||||
this.o = genotypesVCF
|
||||
this.stand_call_conf = 0.0
|
||||
this.out_mode = org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES
|
||||
this.gt_mode = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
|
||||
this.rodBind :+= new RodBind("alleles","VCF",sitesVCF)
|
||||
|
||||
// we only want chromosome counts annotations
|
||||
this.BTI = "alleles"
|
||||
this.G = List("none")
|
||||
this.A :+= "ChromosomeCounts"
|
||||
this.nsl = true
|
||||
|
||||
// make sure we have the right intervals
|
||||
this.BTIMR = org.broadinstitute.sting.utils.interval.IntervalSetRule.INTERSECTION
|
||||
}
|
||||
|
||||
// todo -- include AF from 1000G in annotation
|
||||
// todo -- distance from SNP to nearest chip SNP
|
||||
|
||||
class EvalCalls(@Input vcf: File) extends VariantEval with GATKArgs {
|
||||
this.o = swapExt(vcf, ".vcf", ".vcf.eval")
|
||||
this.rodBind :+= RodBind("eval", "VCF", vcf)
|
||||
this.rodBind :+= RodBind("compOMNI", "VCF", OMNI_VCF)
|
||||
this.rodBind :+= RodBind("compHapMap3", "VCF", HM3_VCF)
|
||||
}
|
||||
|
||||
class AnnotateCalls(@Input vcf: File, @Output file: File) extends VariantAnnotator with GATKArgs {
|
||||
this.o = file
|
||||
this.rodBind :+= RodBind("variant", "VCF", vcf)
|
||||
this.rodBind :+= RodBind("compOMNI", "VCF", OMNI_VCF)
|
||||
this.rodBind :+= RodBind("compHapMap3", "VCF", HM3_VCF)
|
||||
}
|
||||
|
||||
class MakeTable(@Input vcf: File, @Output table: File) extends VariantsToTable with GATKArgs {
|
||||
this.o = table
|
||||
this.rodBind :+= RodBind("variants", "VCF", vcf)
|
||||
this.allowMissingData = true
|
||||
this.fields = List("CHROM", "POS", "REF", "ALT", "TRANSITION", "HapMap3", "OMNI", "AC", "AN", "AF")
|
||||
}
|
||||
|
||||
def script = {
|
||||
val genotyped = new File(swapExt(bam, ".bam", "_genotyped_at." + panelVCF.getName).getName)
|
||||
val annotated = new File(swapExt(genotyped, ".vcf", ".annotated.vcf"))
|
||||
val table = new File(swapExt(annotated, ".vcf", ".vcf.table"))
|
||||
|
||||
add(new GenotypeBAMAtSites(bam, panelVCF, genotyped))
|
||||
add(new AnnotateCalls(genotyped, annotated))
|
||||
add(new EvalCalls(annotated))
|
||||
add(new MakeTable(annotated, table))
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue