In-process versions of sample extraction and interval-list conversion for VCF files. Required an in-process-function branch of the queue library.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4827 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-12-13 17:36:53 +00:00
parent e219f6a4b5
commit f795b25c47
3 changed files with 68 additions and 1 deletions

View File

@ -2,7 +2,7 @@ import collection.JavaConversions._
import java.io.FileNotFoundException
import org.broadinstitute.sting.datasources.pipeline._
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.library.clf.vcf._
import org.broadinstitute.sting.queue.library.ipf.vcf._
import org.broadinstitute.sting.queue.pipeline._
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.utils.yaml.YamlUtils

View File

@ -0,0 +1,31 @@
package org.broadinstitute.sting.queue.library.ipf.vcf
import collection.JavaConversions._
import org.broadinstitute.sting.queue.function.InProcessFunction
import org.broadinstitute.sting.commandline._
import org.broadinstitute.sting.utils.text.XReadLines
import java.io.{PrintStream, PrintWriter, File}
class VCFExtractIntervals(inVCF: File, outList: File, useFilterSites: Boolean) extends InProcessFunction {
def this(in : File, out: File) = this(in,out,true)
def this(in : File) = this(in,new File(in.getAbsolutePath.replace(".vcf",".intervals.list")),true)
@Input(doc="The VCF to convert to an interval list") var vcfIn : File = inVCF
@Output(doc="The intervals file to write to") var listOut : File = outList
@Argument(doc="Keep filtered sites?") var keepFilters : Boolean = useFilterSites
var out : PrintWriter = _
def run = {
out = new PrintWriter(new PrintStream(listOut))
asScalaIterator(new XReadLines(vcfIn)).foreach(vcf2int)
}
def vcf2int( vcfLine: String ) : Unit = {
var spline = vcfLine.split("\t")
if ( ! vcfLine.startsWith("#") && (spline(6).equals("PASS") || keepFilters) ) {
out.print("%s:%s%n".format(spline(0),spline(1)))
}
}
}

View File

@ -0,0 +1,36 @@
package org.broadinstitute.sting.queue.library.ipf.vcf
import org.broadinstitute.sting.queue.function.InProcessFunction
import org.broadinstitute.sting.utils.text.XReadLines
import collection.JavaConversions._
import org.broadinstitute.sting.commandline._
import java.io.{PrintWriter, PrintStream, File}
class VCFExtractSamples(inVCF: File, outVCF: File, samples: List[String]) extends InProcessFunction {
def this(in: File, out: File, samples: File) = this(in,out, (new XReadLines(samples)).readLines.toList)
@Input(doc="VCF from which to extract samples") var inputVCF : File = inVCF
@Output(doc="VCF to which to write the sample-subset vcf") var outputVCF : File = outVCF
@Argument(doc="The samples to extract from the VCF") var extractSamples : List[String] = samples
var out : PrintWriter = _
var columns : List[Int] = 0 to 8 toList
def run = {
out = new PrintWriter(new PrintStream(outputVCF))
asScalaIterator(new XReadLines(inputVCF)).foreach(subset)
}
def subset( line : String ) {
if ( line.startsWith("##") ) {
out.print("%s%n".format(line))
} else {
val spline = line.split("\t")
if ( spline(0).equals("#CHROM") ) {
columns ++= spline.zipWithIndex.filter( p => samples.contains(p._1) ).map( p => p._2 )
}
out.print("%s%n".format(columns.map(p => spline(p)).reduceLeft(_ + "\t" + _)))
}
}
}