diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 22b6a9b72..f2c14c99b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -914,5 +914,5 @@ public class VariantEvalWalker extends RodWalker implements Tr return eval.hasGenotypes() ? eval.getNSamples() : nSamples; } - protected Logger getLogger() { return logger; } + public Logger getLogger() { return logger; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java index ea904d26e..5dcfa7cd9 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java @@ -19,7 +19,7 @@ import java.io.PrintStream; * Takes an interval list and annotates intervals with genes and exons falling within that interval * Was written in order to annotate the Whole Exome Agilent designs at the Broad institute * Bind the refseq rod as -B refseq,refseq,/path/to/refGene.txt - * Bind the interval list as -B interval_list,intervals,/path/to/intervals.interval_list + * Bind the interval list as -B interval_list,bed,/path/to/intervals.interval_list * Bind the additional files file as -B gene*,bed,/path/to/other/file.bed * @Author chartl * @Date Apr 26, 2010 @@ -39,7 +39,7 @@ public class DesignFileGeneratorWalker extends RodWalker { return null; } - List intervalsList= tracker.getReferenceMetaData("interval_list"); + List intervalsList= tracker.getGATKFeatureMetaData("interval_list",false); List refseqList = tracker.getReferenceMetaData("refseq"); List bedList = tracker.getGATKFeatureMetaData("gene",false); diff --git a/scala/qscript/chartl/fullCallingPipelineV2.q b/scala/qscript/chartl/fullCallingPipelineV2.q index 0f0efe01c..48c92418b 100755 --- a/scala/qscript/chartl/fullCallingPipelineV2.q +++ b/scala/qscript/chartl/fullCallingPipelineV2.q @@ -61,6 +61,4 @@ class fullCallingPipelineV2 extends QScript { addAll(lib.StandardCallingPipeline(bamFiles,indel_vcf,recal_vcf,handfilt_vcf,fcp.pipelineArgs.target_titv,fcp.pipelineArgs.refseqTable)) } - - def addAll(clfs: List[CommandLineFunction]) = { clfs.foreach(c => add(c)) } } diff --git a/scala/qscript/chartl/private_mutations.q b/scala/qscript/chartl/private_mutations.q new file mode 100755 index 000000000..d4fe3bd70 --- /dev/null +++ b/scala/qscript/chartl/private_mutations.q @@ -0,0 +1,57 @@ +import collection.JavaConversions._ +import java.io.FileNotFoundException +import org.broadinstitute.sting.datasources.pipeline._ +import org.broadinstitute.sting.queue.extensions.gatk.{VariantFiltration, UnifiedGenotyper} +import org.broadinstitute.sting.queue.library.clf.vcf._ +import org.broadinstitute.sting.queue.pipeline._ +import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.utils.yaml.YamlUtils + +class private_mutations extends QScript { + @Argument(shortName="yaml",fullName="eomiYaml",doc="Project YAML file",required=true) var eomiYaml: File = _ + @Argument(shortName="sting",fullName="stingDir",doc="path to the Sting directory",required=true) var sting: String = _ + @Argument(shortName="out",fullName="finalVCF",doc="the merged vcf to write to", required=true) var finalMergedVCF : File = _ + + var gatkjar : File = _ + def script = { + gatkjar = new File(sting+"/dist/GenomeAnalysisTK.jar") + var input_pipeline : Pipeline = YamlUtils.load(classOf[Pipeline],eomiYaml) + var eomi_pipeline : Pipeline = new Pipeline + // use only QC-positive samples + eomi_pipeline.setProject( input_pipeline.getProject ) + eomi_pipeline.setSamples( input_pipeline.getSamples.filter( p => p.getTags.get("QCStatus").equals("PASS")) ) + var vcLib : VariantCalling = new VariantCalling(eomi_pipeline,gatkjar) + var pmLib : ProjectManagement = new ProjectManagement(sting) + + /*eomi_pipeline.getSamples.foreach( p => + if ( ! p.getBamFiles.get("recalibrated").exists) throw new FileNotFoundException( + p.getBamFiles.get("recalibrated").getAbsolutePath+" does not exist" ))*/ + + var batches : List[List[PipelineSample]] = eomi_pipeline.getSamples.toList.grouped(100).toList + var genotypers : List[UnifiedGenotyper] = batches.map( pl => pl.map( p => p.getBamFiles.get("recalibrated") ) ).zipWithIndex.map( + b => vcLib.StandardUnifiedGenotyper(b._1,new File(eomi_pipeline.getProject.getName+"_batch%d.raw.vcf".format(1+b._2)))) + addAll(genotypers) + + var handFilters : List[VariantFiltration] = genotypers.map( g => vcLib.StandardHandfilter(g.out,swapExt(g.out,".raw.vcf",".handfiltered.vcf"))) + + addAll(handFilters) + + addAll(pmLib.MergeBatches(handFilters.map( _.out), batches.flatten.map( p => p.getBamFiles.get("recalibrated")), + finalMergedVCF,eomi_pipeline.getProject.getReferenceFile,20)) + + var afr_sams : List[PipelineSample] = eomi_pipeline.getSamples.toList.filter( p => p.getTags.get("Population").equals("AFRAMR")) + var eur_sams : List[PipelineSample] = eomi_pipeline.getSamples.toList.filter( p => p.getTags.get("Population").equals("EURAMR") || + p.getTags.get("Population").equals("UNK")) + + var variant_loci : VCFExtractIntervals = new VCFExtractIntervals(finalMergedVCF,swapExt(finalMergedVCF,".vcf",".intervals.list"),false) + + add(variant_loci) + + var extract_afr : VCFExtractSamples = new VCFExtractSamples(finalMergedVCF,swapExt(finalMergedVCF,".vcf",".afr.vcf"),afr_sams.map(_.getId)) + var extract_eur : VCFExtractSamples = new VCFExtractSamples(finalMergedVCF,swapExt(finalMergedVCF,".vcf",".eur+unk.vcf"),eur_sams.map(_.getId)) + + add(extract_afr) + add(extract_eur) + } + +} \ No newline at end of file diff --git a/scala/src/org/broadinstitute/sting/queue/QScript.scala b/scala/src/org/broadinstitute/sting/queue/QScript.scala index 3736abdaa..8cb9221f2 100755 --- a/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -69,6 +69,10 @@ trait QScript extends Logging { functions.foreach(function => function.addOrder = QScript.nextAddOrder) this.functions ++= functions } + + def addAll(functions: List[QFunction] ) = { + functions.foreach( f => add(f) ) + } } object QScript { diff --git a/scala/src/org/broadinstitute/sting/queue/library/clf/vcf/VCFExtractIntervals.scala b/scala/src/org/broadinstitute/sting/queue/library/clf/vcf/VCFExtractIntervals.scala new file mode 100755 index 000000000..ffb4476d5 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/library/clf/vcf/VCFExtractIntervals.scala @@ -0,0 +1,21 @@ +package org.broadinstitute.sting.queue.library.clf.vcf + +import java.io.File +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import org.broadinstitute.sting.queue.function.CommandLineFunction + +class VCFExtractIntervals(inVCF: File, outList: File, passOnly: Boolean ) extends CommandLineFunction { + def this(vcf: File, list: File ) = this(vcf,list,false) + def this(vcf: File) = this(vcf, new File(vcf.getAbsolutePath.replace("vcf","intervals.list")),false) + def this(vcf: File, removeFilters: Boolean) = this(vcf, new File(vcf.getAbsolutePath.replace("vcf","intervals.list")), removeFilters) + + @Input(doc="The VCF from which to extract an interval list") var inputVCF : File = inVCF + @Output(doc="The file to write the interval list to") var outputList : File = outList + @Argument(doc="Whether to use all sites, or only the unfiltered sites") var usePFOnly: Boolean = passOnly + + def commandLine = { + if ( usePFOnly ) "grep PASS %s | awk '{print $1\":\"$2}' > %s".format(inputVCF.getAbsolutePath,outputList.getAbsolutePath) + else "grep -v \\\\# %s | awk '{print $1\":\"$2}' > %s".format(inputVCF.getAbsolutePath,outputList.getAbsolutePath) + } + +} \ No newline at end of file diff --git a/scala/src/org/broadinstitute/sting/queue/library/clf/vcf/VCFExtractSamples.scala b/scala/src/org/broadinstitute/sting/queue/library/clf/vcf/VCFExtractSamples.scala new file mode 100755 index 000000000..9c784e108 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/library/clf/vcf/VCFExtractSamples.scala @@ -0,0 +1,33 @@ +package org.broadinstitute.sting.queue.library.clf.vcf + +import java.io.File +import collection.JavaConversions._ +import org.broadinstitute.sting.commandline._ +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.utils.text.XReadLines + +class VCFExtractSamples(inVCF: File, outVCF: File, samples: List[String]) extends CommandLineFunction { + @Input(doc="input VCF from which to extract samples") var inputVCF : File = inVCF + @Output(doc="output VCF to write extracted samples to") var outputVCF : File = outVCF + @Argument(doc="List of samples to extract from the VCF") var sampleList : List[String] = samples + + var sampleGrep : String = _ + + def this(in: File, out: File, samples: File) = this(in,out, (new XReadLines(samples)).readLines.toList) + + override def freezeFieldValues = { + this.logger.warn("Note: Using VCFExtractSamples invalidates AC/AF/AN annotations. This is an explicit warning.") + sampleGrep = "\"" + sampleList.foldLeft[String](sampleList.get(0))( (a,b) => a + "|" + b) + "\"" + super.freezeFieldValues + } + + def commandLine = { + + var first : String = "head -n 500 %s | grep \\\\#\\\\# > %s".format(inputVCF.getAbsolutePath,outputVCF.getAbsolutePath) + var second : String = "head -n 500 %s | grep \\\\#CHR | tr '\\t' '\\n' | awk '{print NF\"\\t\"$1}' ".format(inputVCF) + second += "| egrep %s | awk '{print $1}' | tr '\\n' ',' | xargs -i cut -f1-9,\\{\\} %s | grep -v \\\\#\\\\# >> %s".format(sampleGrep,inputVCF.getAbsolutePath,outputVCF.getAbsolutePath) + + first+" ; "+second + } + +} \ No newline at end of file diff --git a/scala/src/org/broadinstitute/sting/queue/pipeline/ProjectManagement.scala b/scala/src/org/broadinstitute/sting/queue/pipeline/ProjectManagement.scala index 8592305f4..3e6379149 100755 --- a/scala/src/org/broadinstitute/sting/queue/pipeline/ProjectManagement.scala +++ b/scala/src/org/broadinstitute/sting/queue/pipeline/ProjectManagement.scala @@ -16,6 +16,8 @@ class ProjectManagement(stingPath: String) { var stingDirPath : String = stingPath + def this(f : File) = this(f.getAbsolutePath) + class PassFilterAlleles(vcf_files: List[File], out_list: File) extends CommandLineFunction { @Input(doc="List of VCFs to extract PF sites from") var vcfs = vcf_files @Output(doc="The file to write the site list to") var out_vcf = out_list @@ -24,7 +26,7 @@ class ProjectManagement(stingPath: String) { def commandLine = { "egrep \"FORMAT|format\" %s | cut -f1-8 > %s ; grep PASS %s | tr ':' '\\t' | awk '{print $2\"\\t\"$3\"\\t\"$4\"\\t\"$5\"\\t\"$6\"\\t.\\t.\\t.\"}' | sort -n -k2,2 | uniq | perl %s - %s.fai >> %s".format( - vcf_files(1).getAbsolutePath, out_vcf.getAbsolutePath, vcf_files.foldLeft[String]("")( (b,a) => b + " " + a.getAbsolutePath), sortByRef, ref.getAbsolutePath, out_vcf.getAbsolutePath + vcf_files(0).getAbsolutePath, out_vcf.getAbsolutePath, vcf_files.foldLeft[String]("")( (b,a) => b + " " + a.getAbsolutePath), sortByRef, ref.getAbsolutePath, out_vcf.getAbsolutePath ) } } @@ -50,7 +52,7 @@ class ProjectManagement(stingPath: String) { cmds :+= ints - var calcs: List[UGCalcLikelihoods] = allBams.grouped(size).toList.zipWithIndex.map(u => LikelihoodCalc(u._1,ref,ints.intervals,pfSites.out_vcf, new File("batch%d.likelihoods.vcf".format(u._2)))) + var calcs: List[UGCalcLikelihoods] = allBams.grouped(size).toList.zipWithIndex.map(u => LikelihoodCalc(u._1,ref,ints.intervals,pfSites.out_vcf, new File("MBatch%d.likelihoods.vcf".format(u._2)))) cmds ++= calcs