In-process versions of sample extraction and interval-list conversion for VCF files. Required an in-process-function branch of the queue library.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4827 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e219f6a4b5
commit
f795b25c47
|
|
@ -2,7 +2,7 @@ import collection.JavaConversions._
|
|||
import java.io.FileNotFoundException
|
||||
import org.broadinstitute.sting.datasources.pipeline._
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.library.clf.vcf._
|
||||
import org.broadinstitute.sting.queue.library.ipf.vcf._
|
||||
import org.broadinstitute.sting.queue.pipeline._
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
||||
|
|
|
|||
|
|
@ -0,0 +1,31 @@
|
|||
package org.broadinstitute.sting.queue.library.ipf.vcf
|
||||
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
import org.broadinstitute.sting.commandline._
|
||||
import org.broadinstitute.sting.utils.text.XReadLines
|
||||
import java.io.{PrintStream, PrintWriter, File}
|
||||
|
||||
class VCFExtractIntervals(inVCF: File, outList: File, useFilterSites: Boolean) extends InProcessFunction {
|
||||
def this(in : File, out: File) = this(in,out,true)
|
||||
def this(in : File) = this(in,new File(in.getAbsolutePath.replace(".vcf",".intervals.list")),true)
|
||||
|
||||
@Input(doc="The VCF to convert to an interval list") var vcfIn : File = inVCF
|
||||
@Output(doc="The intervals file to write to") var listOut : File = outList
|
||||
@Argument(doc="Keep filtered sites?") var keepFilters : Boolean = useFilterSites
|
||||
|
||||
var out : PrintWriter = _
|
||||
|
||||
def run = {
|
||||
out = new PrintWriter(new PrintStream(listOut))
|
||||
asScalaIterator(new XReadLines(vcfIn)).foreach(vcf2int)
|
||||
}
|
||||
|
||||
def vcf2int( vcfLine: String ) : Unit = {
|
||||
var spline = vcfLine.split("\t")
|
||||
if ( ! vcfLine.startsWith("#") && (spline(6).equals("PASS") || keepFilters) ) {
|
||||
out.print("%s:%s%n".format(spline(0),spline(1)))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
package org.broadinstitute.sting.queue.library.ipf.vcf
|
||||
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
import org.broadinstitute.sting.utils.text.XReadLines
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.commandline._
|
||||
import java.io.{PrintWriter, PrintStream, File}
|
||||
|
||||
class VCFExtractSamples(inVCF: File, outVCF: File, samples: List[String]) extends InProcessFunction {
|
||||
def this(in: File, out: File, samples: File) = this(in,out, (new XReadLines(samples)).readLines.toList)
|
||||
|
||||
@Input(doc="VCF from which to extract samples") var inputVCF : File = inVCF
|
||||
@Output(doc="VCF to which to write the sample-subset vcf") var outputVCF : File = outVCF
|
||||
@Argument(doc="The samples to extract from the VCF") var extractSamples : List[String] = samples
|
||||
|
||||
var out : PrintWriter = _
|
||||
var columns : List[Int] = 0 to 8 toList
|
||||
|
||||
def run = {
|
||||
out = new PrintWriter(new PrintStream(outputVCF))
|
||||
asScalaIterator(new XReadLines(inputVCF)).foreach(subset)
|
||||
}
|
||||
|
||||
def subset( line : String ) {
|
||||
if ( line.startsWith("##") ) {
|
||||
out.print("%s%n".format(line))
|
||||
} else {
|
||||
val spline = line.split("\t")
|
||||
if ( spline(0).equals("#CHROM") ) {
|
||||
columns ++= spline.zipWithIndex.filter( p => samples.contains(p._1) ).map( p => p._2 )
|
||||
}
|
||||
|
||||
out.print("%s%n".format(columns.map(p => spline(p)).reduceLeft(_ + "\t" + _)))
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue