Thanks to mark: VCFInfoToTable removed in favor of a more flexible walker. Slight change to the argument structure of the walker to make it play more nicely with Queue: the field list parsing is pushed into the command line system (e.g. the variable is exposed as a List<String> and not a String, so Queue doesn't have to join a list into a string only to have it broken out again. This also allows the user to specify -F field1 -F field2 -F field3 if he/she so desires.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4842 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2217837845
commit
3e75431bc8
|
|
@ -47,7 +47,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
protected PrintStream out;
|
||||
|
||||
@Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true)
|
||||
public String FIELDS;
|
||||
public List<String> fieldsToTake = null;
|
||||
|
||||
@Argument(fullName="showFiltered", shortName="raw", doc="Include filtered records")
|
||||
public boolean showFiltered = false;
|
||||
|
|
@ -59,11 +59,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="ignoreMultiAllelic", shortName="IMA", doc="If provided, we will not require the site to be biallelic", required=false)
|
||||
public boolean ignoreMultiAllelic = false;
|
||||
|
||||
private List<String> fieldsToTake;
|
||||
|
||||
public void initialize() {
|
||||
fieldsToTake = Arrays.asList(FIELDS.split(","));
|
||||
|
||||
out.println(Utils.join("\t", fieldsToTake));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import org.broadinstitute.sting.commandline.Hidden
|
||||
import org.broadinstitute.sting.queue.extensions.gatk.{RodBind, VariantsToTable}
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
|
||||
import org.broadinstitute.sting.queue.library.ipf.vcf.VCFInfoToTable
|
||||
import collection.JavaConversions._
|
||||
|
||||
class Vcf2Table extends QScript {
|
||||
|
|
@ -9,14 +8,21 @@ class Vcf2Table extends QScript {
|
|||
@Argument(shortName="f",doc="Info fields to extract",required=false) var fields : java.util.List[String] = new java.util.ArrayList[String]
|
||||
@Argument(shortName="o",doc="Output file",required=true) var output : File = _
|
||||
@Argument(shortName="useFilters",doc="Use filtered sites?",required=false) var useFilters : Boolean = false
|
||||
@Hidden @Argument(shortName="pass",doc="set the hack filter string to this value",required=false) var filterString : String = "PASS"
|
||||
@Hidden @Argument(shortName="notfound",doc="set the hack no entry string to this value",required=false) var keyNotFound : String = "NA"
|
||||
@Argument(shortName="r",doc="Reference file") var ref : File = _
|
||||
@Argument(shortName="i",doc="Intervals",required=false) var ints : java.util.List[File] = new java.util.ArrayList[File]
|
||||
@Argument(shortName="g",doc="gatk jar",required=true) var gatk: File = _
|
||||
|
||||
|
||||
def script = {
|
||||
var vcf2table : VCFInfoToTable = new VCFInfoToTable(vcf,output,fields,useFilters)
|
||||
vcf2table.PF_KEY = filterString
|
||||
vcf2table.NO_KEY = keyNotFound
|
||||
var vcf2table : VariantsToTable = new VariantsToTable
|
||||
vcf2table.rodBind :+= new RodBind("variant","vcf",vcf)
|
||||
vcf2table.reference_sequence = ref
|
||||
vcf2table.intervals = ints.toList
|
||||
vcf2table.raw = useFilters
|
||||
vcf2table.out = output
|
||||
vcf2table.F = fields.toList
|
||||
vcf2table.jarFile = gatk
|
||||
add(vcf2table)
|
||||
this.functions.foreach(u => logger.debug("added: %s%n".format(u.toString)))
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
package org.broadinstitute.sting.queue.library.ipf.vcf
|
||||
|
||||
import org.broadinstitute.sting.commandline._
|
||||
import org.broadinstitute.sting.utils.text.XReadLines
|
||||
import collection.JavaConversions._
|
||||
import java.io.{PrintStream, PrintWriter, File}
|
||||
import collection.immutable.HashSet
|
||||
import collection.mutable.HashMap
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
|
||||
class VCFInfoToTable(vcf: File, table: File, annots: List[String], keepFilter: Boolean) extends InProcessFunction {
|
||||
def this(in: File, out: File, anns: List[String]) = this(in,out,anns,true)
|
||||
def this(in: File, out: File) = this(in,out,Nil,true)
|
||||
def this(in: File) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")), Nil, true)
|
||||
def this(in: File, anns: List[String]) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")),anns,true)
|
||||
def this(in: File, anns: java.util.List[String]) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")),anns.toList,true)
|
||||
def this(in: File, anns: List[String], keep: Boolean) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")),anns,keep)
|
||||
def this(in: File, anns: java.util.List[String], keep: Boolean) = this(in, new File(in.getAbsolutePath.replace(".vcf",".info.table")),anns.toList,keep)
|
||||
def this(in: File, out: File, anns: java.util.List[String], keep: Boolean) = this(in,out,anns.toList,keep)
|
||||
|
||||
@Input(doc="VCF file from which to extract annotion") var inVCF: File = vcf
|
||||
@Output(doc="Table file to which to write") var outTable: File = table
|
||||
@Argument(doc="Annotations to extract from info field") var annotations: List[String] = annots
|
||||
@Argument(doc="Keep filtered records?") var keepFilteredRecs: Boolean = keepFilter
|
||||
|
||||
// set as vars so pipelnes can hack these values
|
||||
var NO_KEY : String = "NA"
|
||||
var PF_KEY : String = "PASS"
|
||||
|
||||
var out : PrintWriter = _
|
||||
var annotation_set : HashSet[String] = new HashSet[String]
|
||||
|
||||
//todo -- Khalid: Why is run not being called?
|
||||
def run = {
|
||||
logger.debug("RUN IS CALLED")
|
||||
annotation_set ++= annotations
|
||||
out = new PrintWriter(new PrintStream(outTable))
|
||||
asScalaIterator(new XReadLines(inVCF)).foreach(lineToTable)
|
||||
}
|
||||
|
||||
def lineToTable(line : String) = {
|
||||
if ( ! line.startsWith("#") ) {
|
||||
val spline = line.split("\t")
|
||||
if ( spline(6).equals(PF_KEY) || keepFilteredRecs ) {
|
||||
val iMap = spline(7).split(";").map(_.split("=")).filter(p => annotation_set.contains(p.apply(0))).foldLeft(new HashMap[String,String])( (a,b) => a += new Tuple2(b.apply(0),b.apply(1)) )
|
||||
out.print("%s%n".format(annotations.map( u => {
|
||||
if ( iMap.contains(u) ) {
|
||||
iMap.get(u)
|
||||
} else {
|
||||
NO_KEY
|
||||
}
|
||||
}).reduceLeft((a,b) => a + "\t" + b)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue