diff --git a/scala/qscript/chartl/private_mutations.q b/scala/qscript/chartl/private_mutations.q index 9afd5c900..ea4edadd8 100755 --- a/scala/qscript/chartl/private_mutations.q +++ b/scala/qscript/chartl/private_mutations.q @@ -2,7 +2,7 @@ import collection.JavaConversions._ import java.io.FileNotFoundException import org.broadinstitute.sting.datasources.pipeline._ import org.broadinstitute.sting.queue.extensions.gatk._ -import org.broadinstitute.sting.queue.library.clf.vcf._ +import org.broadinstitute.sting.queue.library.ipf.vcf._ import org.broadinstitute.sting.queue.pipeline._ import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.utils.yaml.YamlUtils @@ -63,7 +63,7 @@ class private_mutations extends QScript { eval_all.out = swapExt(finalMergedVCF,".vcf",".perm.csv") eval_all.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV) - //add(eval_all) + add(eval_all) var eval_afr : VariantEval = vcLib.addTrait(new VariantEval) eval_afr.rodBind :+= new RodBind("evalAFR","VCF",extract_afr.outputVCF) @@ -73,7 +73,7 @@ class private_mutations extends QScript { eval_afr.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV) eval_afr.noStandard = true - //add(eval_afr) + add(eval_afr) var eval_eur : VariantEval = vcLib.addTrait(new VariantEval) eval_eur.rodBind :+= new RodBind("compAFR","VCF",extract_afr.outputVCF) @@ -83,7 +83,7 @@ class private_mutations extends QScript { eval_eur.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV) eval_eur.noStandard = true - //add(eval_eur) + add(eval_eur) } -} \ No newline at end of file +} diff --git a/scala/qscript/lib/Vcf2Table.q b/scala/qscript/lib/Vcf2Table.q new file mode 100755 index 000000000..c6028b2b4 --- /dev/null +++ b/scala/qscript/lib/Vcf2Table.q @@ -0,0 +1,22 @@ +import org.broadinstitute.sting.commandline.Hidden +import org.broadinstitute.sting.queue.QScript + +import org.broadinstitute.sting.queue.library.ipf.vcf.VCFInfoToTable +import collection.JavaConversions._ + +class Vcf2Table extends QScript { + @Argument(shortName="vcf",doc="VCF file",required=true) var vcf : File = _ + @Argument(shortName="f",doc="Info fields to extract",required=false) var fields : java.util.List[String] = new java.util.ArrayList[String] + @Argument(shortName="o",doc="Output file",required=true) var output : File = _ + @Argument(shortName="useFilters",doc="Use filtered sites?",required=false) var useFilters : Boolean = false + @Hidden @Argument(shortName="pass",doc="set the hack filter string to this value",required=false) var filterString : String = "PASS" + @Hidden @Argument(shortName="notfound",doc="set the hack no entry string to this value",required=false) var keyNotFound : String = "NA" + + def script = { + var vcf2table : VCFInfoToTable = new VCFInfoToTable(vcf,output,fields,useFilters) + vcf2table.PF_KEY = filterString + vcf2table.NO_KEY = keyNotFound + add(vcf2table) + this.functions.foreach(u => logger.debug("added: %s%n".format(u.toString))) + } +} \ No newline at end of file diff --git a/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFInfoToTable.scala b/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFInfoToTable.scala new file mode 100755 index 000000000..59f659cb3 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFInfoToTable.scala @@ -0,0 +1,56 @@ +package org.broadinstitute.sting.queue.library.ipf.vcf + +import org.broadinstitute.sting.commandline._ +import org.broadinstitute.sting.utils.text.XReadLines +import collection.JavaConversions._ +import java.io.{PrintStream, PrintWriter, File} +import collection.immutable.HashSet +import collection.mutable.HashMap +import org.broadinstitute.sting.queue.function.InProcessFunction + +class VCFInfoToTable(vcf: File, table: File, annots: List[String], keepFilter: Boolean) extends InProcessFunction { + def this(in: File, out: File, anns: List[String]) = this(in,out,anns,true) + def this(in: File, out: File) = this(in,out,Nil,true) + def this(in: File) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")), Nil, true) + def this(in: File, anns: List[String]) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")),anns,true) + def this(in: File, anns: java.util.List[String]) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")),anns.toList,true) + def this(in: File, anns: List[String], keep: Boolean) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")),anns,keep) + def this(in: File, anns: java.util.List[String], keep: Boolean) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")),anns.toList,keep) + def this(in: File, out: File, anns: java.util.List[String], keep: Boolean) = this(in,out,anns.toList,keep) + + @Input(doc="VCF file from which to extract annotion") var inVCF: File = vcf + @Output(doc="Table file to which to write") var outTable: File = table + @Argument(doc="Annotations to extract from info field") var annotations: List[String] = annots + @Argument(doc="Keep filtered records?") var keepFilteredRecs: Boolean = keepFilter + + // set as vars so pipelnes can hack these values + var NO_KEY : String = "NA" + var PF_KEY : String = "PASS" + + var out : PrintWriter = _ + var annotation_set : HashSet[String] = new HashSet[String] + + //todo -- Khalid: Why is run not being called? + def run = { + logger.debug("RUN IS CALLED") + annotation_set ++= annotations + out = new PrintWriter(new PrintStream(outTable)) + asScalaIterator(new XReadLines(inVCF)).foreach(lineToTable) + } + + def lineToTable(line : String) = { + if ( ! line.startsWith("#") ) { + val spline = line.split("\t") + if ( spline(6).equals(PF_KEY) || keepFilteredRecs ) { + val iMap = spline(7).split(";").map(_.split("=")).filter(p => annotation_set.contains(p.apply(0))).foldLeft(new HashMap[String,String])( (a,b) => a += new Tuple2(b.apply(0),b.apply(1)) ) + out.print("%s%n".format(annotations.map( u => { + if ( iMap.contains(u) ) { + iMap.get(u) + } else { + NO_KEY + } + }).reduceLeft((a,b) => a + "\t" + b))) + } + } + } +} \ No newline at end of file