From 8bb92160b50b871888159e8b66f70cc301d2ba39 Mon Sep 17 00:00:00 2001 From: carneiro Date: Tue, 3 May 2011 21:19:42 +0000 Subject: [PATCH] Script to identify mendelian violations in the CEU Trio and follow up with supposedly incorrect SNP calls git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5744 348d0f76-0448-11de-a6fe-93d51630548a --- .../oneoffs/carneiro/mendelianViolation.scala | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100755 scala/qscript/oneoffs/carneiro/mendelianViolation.scala diff --git a/scala/qscript/oneoffs/carneiro/mendelianViolation.scala b/scala/qscript/oneoffs/carneiro/mendelianViolation.scala new file mode 100755 index 000000000..e285bfa60 --- /dev/null +++ b/scala/qscript/oneoffs/carneiro/mendelianViolation.scala @@ -0,0 +1,56 @@ +import java.io.File +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.queue.extensions.gatk.{SelectVariants, RodBind} +import org.broadinstitute.sting.queue.QScript + +/* +* Created by IntelliJ IDEA. +* User: carneiro +* Date: 4/12/11 +* Time: 11:24 AM +*/ + +class mendelianViolation extends QScript +{ + + @Argument(shortName="trio", doc="input trio VCF file", required=false) + var trio: File = new File("/home/radon00/carneiro/prj/trio/analysis/calls/CEUTrio.WEx.recalibrated.vcf") + + @Argument(shortName="daughter", doc="daughter input VCF file", required=false) + var daughter: File = "/humgen/gsa-hpprojects/dev/rpoplin/perfectCalls/NA12878/chart/sting/NA12878.HiSeq19.recalibrated.hapmapCut.vcf" + + @Argument(shortName="family", doc="family string", required=false) + var family: String = "NA12891+NA12892=NA12878" + + @Argument(shortName="mvq", doc="mendelian violation quality", required=false) + var mvq: Double = 20 + + @Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=false) + var GATKjar: File = new File("/humgen/gsa-scr1/carneiro/stable/dist/GenomeAnalysisTK.jar") + + def script = { + val reference = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") + val trioViolations = "trio_violations.vcf" + val daughterViolations = "daughter_violations.vcf" + + val mv = new SelectVariants() + mv.rodBind :+= RodBind("variant", "VCF", trio) + mv.family = family + mv.reference_sequence = reference + mv.mvq = mvq + mv.out = trioViolations + mv.jarFile = GATKjar + mv.memoryLimit = 4 + + val intersection = new SelectVariants() + intersection.rodBind :+= RodBind("variant", "VCF", daughter) + intersection.rodBind :+= RodBind("conc","VCF", trioViolations) + intersection.reference_sequence = reference + intersection.conc = "conc" + intersection.out = daughterViolations + intersection.jarFile = GATKjar + intersection.memoryLimit = 4 + + add(mv, intersection) + } +}