Commit for Khalid -- should be a scala version of vcf2table but for some reason the run method isn't getting called.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4841 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-12-15 00:44:15 +00:00
parent 01323447c6
commit 2217837845
3 changed files with 83 additions and 5 deletions

View File

@ -2,7 +2,7 @@ import collection.JavaConversions._
import java.io.FileNotFoundException
import org.broadinstitute.sting.datasources.pipeline._
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.library.clf.vcf._
import org.broadinstitute.sting.queue.library.ipf.vcf._
import org.broadinstitute.sting.queue.pipeline._
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.utils.yaml.YamlUtils
@ -63,7 +63,7 @@ class private_mutations extends QScript {
eval_all.out = swapExt(finalMergedVCF,".vcf",".perm.csv")
eval_all.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV)
//add(eval_all)
add(eval_all)
var eval_afr : VariantEval = vcLib.addTrait(new VariantEval)
eval_afr.rodBind :+= new RodBind("evalAFR","VCF",extract_afr.outputVCF)
@ -73,7 +73,7 @@ class private_mutations extends QScript {
eval_afr.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV)
eval_afr.noStandard = true
//add(eval_afr)
add(eval_afr)
var eval_eur : VariantEval = vcLib.addTrait(new VariantEval)
eval_eur.rodBind :+= new RodBind("compAFR","VCF",extract_afr.outputVCF)
@ -83,7 +83,7 @@ class private_mutations extends QScript {
eval_eur.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV)
eval_eur.noStandard = true
//add(eval_eur)
add(eval_eur)
}
}
}

View File

@ -0,0 +1,22 @@
import org.broadinstitute.sting.commandline.Hidden
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.library.ipf.vcf.VCFInfoToTable
import collection.JavaConversions._
class Vcf2Table extends QScript {
@Argument(shortName="vcf",doc="VCF file",required=true) var vcf : File = _
@Argument(shortName="f",doc="Info fields to extract",required=false) var fields : java.util.List[String] = new java.util.ArrayList[String]
@Argument(shortName="o",doc="Output file",required=true) var output : File = _
@Argument(shortName="useFilters",doc="Use filtered sites?",required=false) var useFilters : Boolean = false
@Hidden @Argument(shortName="pass",doc="set the hack filter string to this value",required=false) var filterString : String = "PASS"
@Hidden @Argument(shortName="notfound",doc="set the hack no entry string to this value",required=false) var keyNotFound : String = "NA"
def script = {
var vcf2table : VCFInfoToTable = new VCFInfoToTable(vcf,output,fields,useFilters)
vcf2table.PF_KEY = filterString
vcf2table.NO_KEY = keyNotFound
add(vcf2table)
this.functions.foreach(u => logger.debug("added: %s%n".format(u.toString)))
}
}

View File

@ -0,0 +1,56 @@
package org.broadinstitute.sting.queue.library.ipf.vcf
import org.broadinstitute.sting.commandline._
import org.broadinstitute.sting.utils.text.XReadLines
import collection.JavaConversions._
import java.io.{PrintStream, PrintWriter, File}
import collection.immutable.HashSet
import collection.mutable.HashMap
import org.broadinstitute.sting.queue.function.InProcessFunction
class VCFInfoToTable(vcf: File, table: File, annots: List[String], keepFilter: Boolean) extends InProcessFunction {
def this(in: File, out: File, anns: List[String]) = this(in,out,anns,true)
def this(in: File, out: File) = this(in,out,Nil,true)
def this(in: File) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")), Nil, true)
def this(in: File, anns: List[String]) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")),anns,true)
def this(in: File, anns: java.util.List[String]) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")),anns.toList,true)
def this(in: File, anns: List[String], keep: Boolean) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")),anns,keep)
def this(in: File, anns: java.util.List[String], keep: Boolean) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")),anns.toList,keep)
def this(in: File, out: File, anns: java.util.List[String], keep: Boolean) = this(in,out,anns.toList,keep)
@Input(doc="VCF file from which to extract annotion") var inVCF: File = vcf
@Output(doc="Table file to which to write") var outTable: File = table
@Argument(doc="Annotations to extract from info field") var annotations: List[String] = annots
@Argument(doc="Keep filtered records?") var keepFilteredRecs: Boolean = keepFilter
// set as vars so pipelnes can hack these values
var NO_KEY : String = "NA"
var PF_KEY : String = "PASS"
var out : PrintWriter = _
var annotation_set : HashSet[String] = new HashSet[String]
//todo -- Khalid: Why is run not being called?
def run = {
logger.debug("RUN IS CALLED")
annotation_set ++= annotations
out = new PrintWriter(new PrintStream(outTable))
asScalaIterator(new XReadLines(inVCF)).foreach(lineToTable)
}
def lineToTable(line : String) = {
if ( ! line.startsWith("#") ) {
val spline = line.split("\t")
if ( spline(6).equals(PF_KEY) || keepFilteredRecs ) {
val iMap = spline(7).split(";").map(_.split("=")).filter(p => annotation_set.contains(p.apply(0))).foldLeft(new HashMap[String,String])( (a,b) => a += new Tuple2(b.apply(0),b.apply(1)) )
out.print("%s%n".format(annotations.map( u => {
if ( iMap.contains(u) ) {
iMap.get(u)
} else {
NO_KEY
}
}).reduceLeft((a,b) => a + "\t" + b)))
}
}
}
}