From bffb8bb01f0838cd10e8ff64b83869c9218ab73f Mon Sep 17 00:00:00 2001 From: chartl Date: Fri, 8 Oct 2010 14:04:53 +0000 Subject: [PATCH] The SVN repository is not for dumb analysis-specific scripts. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4460 348d0f76-0448-11de-a6fe-93d51630548a --- scala/qscript/chartl/omni_bootstrap_refine.q | 124 ------------------- 1 file changed, 124 deletions(-) delete mode 100755 scala/qscript/chartl/omni_bootstrap_refine.q diff --git a/scala/qscript/chartl/omni_bootstrap_refine.q b/scala/qscript/chartl/omni_bootstrap_refine.q deleted file mode 100755 index e20fcf550..000000000 --- a/scala/qscript/chartl/omni_bootstrap_refine.q +++ /dev/null @@ -1,124 +0,0 @@ -import java.io.{FileReader, File, BufferedReader} -import net.sf.picard.reference.FastaSequenceFile -import org.broadinstitute.sting.datasources.pipeline.Pipeline -import org.broadinstitute.sting.gatk.DownsampleType -import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model -import org.broadinstitute.sting.queue.extensions.gatk._ -import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction -import org.broadinstitute.sting.queue.extensions.samtools._ -import org.broadinstitute.sting.queue.{QException, QScript} -import collection.JavaConversions._ -import org.broadinstitute.sting.utils.yaml.YamlUtils -import org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType - -class omni_bootstrap_refine extends QScript { - => script - - val ref = new File("/humgen/1kg/reference/human_g1k_v37.fasta") - val gatkJar = new File("/humgen/gsa-scr1/chartl/sting/dist/GenomeAnalysisTK.jar") - - trait FooTrait extends CommandLineGATK { - this.reference_sequence = script.ref - this.jarFile = script.gatkJar - this.intervalsString :+= "20" - } - - class SampleOverlap extends CommandLineFunction { - @Input(doc="omni vcf") var omni: File = _ - @Input(doc="other vcf") var other: File = _ - @Output(doc="output sn file") var out : File = _ - - def commandLine = "%s ; %s ; %s ; %s".format(this.cmd1, this.cmd2, this.cmd3, this.cmd4) - - def cmd1 : String = { - return "head -n 500 %s | grep #CHR | cut -f10- | tr '\t' '\n' | sort > temp1".format(omni.getAbsolutePath) - } - - def cmd2 : String = { - return "head -n 500 %s | grep #CHR | cut -f10- | tr '\t' '\n'| sort > temp2".format(other.getAbsolutePath) - } - - def cmd3 : String = { - return "cat temp1 temp2 | sort | uniq -c | awk '{if($1 == 2) print $2}' > %s".format(out.getAbsolutePath) - } - - def cmd4 : String = { - return "rm temp1 temp2" - } - } - - class BeagleRefinement extends CommandLineFunction { - @Input(doc="The beagle input file") var beagleInput: File = _ - var beagleOutputBase: String = _ - var beagleMemoryGigs: Int = 4 - - /** - * Note: These get set - */ - @Output(doc="The beagle phased file") var beaglePhasedFile: File = _ - @Output(doc="The beagle likelihood file") var beagleLikelihoods: File = _ - @Output(doc="The beagle r2 file") var beagleRSquared: File = _ - var beagleOutputDir: String = _ - - def freezeOutputs = { - if ( beagleInput.getParent == null ) { - beagleOutputDir = "" - } else { - beagleOutputDir = beagleInput.getParent - } - beaglePhasedFile = new File(beagleOutputDir+beagleOutputBase+"."+beagleInput.getName+".phased.gz") - beagleLikelihoods = new File(beagleOutputDir+beagleOutputBase+"."+beagleInput.getName+".gprobs.gz") - beagleRSquared = new File(beagleOutputDir+beagleOutputBase+"."+beagleInput.getName+".r2") - } - - def commandLine = "java -Djava.io.tmpdir=%s -Xmx%dg -jar %s like=%s out=%s".format(beagleInput.getParent,beagleMemoryGigs,beagleJar,beagleInput.getAbsolutePath,beagleOutputBase) - } - - def runme(pop: File, omni: File, base: String) : List[CommandLineFunction] = { - var clf : List[CommandLineFunction] = Nil - val s_overlap = new File(base+"_sample_overlap.txt") - var calcOverlap = new SampleOverlap - calcOverlap.omni = omni - calcOverlap.other = pop - calcOverlap.out = s_overlap - - clf += calcOverlap - - val subset_omni = swapExt(omni,".vcf","_%s_subset.vcf".format(base)) - val subset_pop = swapExt(pop,".vcf","_%s_subset.vcf".format(base)) - - var omSubset = new SelectVariants with FooTrait - omSubset.variantVCF = omni - omSubset.sample = s_overlap.getAbsolutePath - omSubset.out = subset_omni - - clf += omSubset - - var popSubset = new SelectVariants with FooTrait - popSubset.variantVCF = pop - popSubset.sample = s_overlap.getAbsolutePath - popSubset.out = subset_pop - - clf += popSubset - - var bootRefine = new ProduceBeagleInput with FooTrait - bootRefine.variantVCF = popSubset - bootRefine.rodBind :+= new RodBind("validation","VCF",subset_omni) - bootRefine.bootstrap_vcf = swapExt(subset_omni,".vcf",".boot.vcf") - bootRefine.bootstrap = Some(2) - bootRefine.validation_genotype_ptrue = Some(0.98); - bootRefine.out = new File(base+".beagle_input.beagle") - - clf += bootRefine - - var runBeagle = new BeagleRefinement - runBeagle.beagleInput = bootRefine.out - runBeagle.beagleOutputBase = base+"_beagle" - runBeagle.freezeOutputs() - - clf += runBeagle - - var putBack = new BeagleOutputToVCF with FooTrait - putBack. - } -} \ No newline at end of file