In-process versions of sample extraction and interval-list conversion for VCF files. Required an in-process-function branch of the queue library.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4827 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e219f6a4b5
commit
f795b25c47
|
|
@ -2,7 +2,7 @@ import collection.JavaConversions._
|
||||||
import java.io.FileNotFoundException
|
import java.io.FileNotFoundException
|
||||||
import org.broadinstitute.sting.datasources.pipeline._
|
import org.broadinstitute.sting.datasources.pipeline._
|
||||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||||
import org.broadinstitute.sting.queue.library.clf.vcf._
|
import org.broadinstitute.sting.queue.library.ipf.vcf._
|
||||||
import org.broadinstitute.sting.queue.pipeline._
|
import org.broadinstitute.sting.queue.pipeline._
|
||||||
import org.broadinstitute.sting.queue.QScript
|
import org.broadinstitute.sting.queue.QScript
|
||||||
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
package org.broadinstitute.sting.queue.library.ipf.vcf
|
||||||
|
|
||||||
|
import collection.JavaConversions._
|
||||||
|
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||||
|
import org.broadinstitute.sting.commandline._
|
||||||
|
import org.broadinstitute.sting.utils.text.XReadLines
|
||||||
|
import java.io.{PrintStream, PrintWriter, File}
|
||||||
|
|
||||||
|
class VCFExtractIntervals(inVCF: File, outList: File, useFilterSites: Boolean) extends InProcessFunction {
|
||||||
|
def this(in : File, out: File) = this(in,out,true)
|
||||||
|
def this(in : File) = this(in,new File(in.getAbsolutePath.replace(".vcf",".intervals.list")),true)
|
||||||
|
|
||||||
|
@Input(doc="The VCF to convert to an interval list") var vcfIn : File = inVCF
|
||||||
|
@Output(doc="The intervals file to write to") var listOut : File = outList
|
||||||
|
@Argument(doc="Keep filtered sites?") var keepFilters : Boolean = useFilterSites
|
||||||
|
|
||||||
|
var out : PrintWriter = _
|
||||||
|
|
||||||
|
def run = {
|
||||||
|
out = new PrintWriter(new PrintStream(listOut))
|
||||||
|
asScalaIterator(new XReadLines(vcfIn)).foreach(vcf2int)
|
||||||
|
}
|
||||||
|
|
||||||
|
def vcf2int( vcfLine: String ) : Unit = {
|
||||||
|
var spline = vcfLine.split("\t")
|
||||||
|
if ( ! vcfLine.startsWith("#") && (spline(6).equals("PASS") || keepFilters) ) {
|
||||||
|
out.print("%s:%s%n".format(spline(0),spline(1)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
package org.broadinstitute.sting.queue.library.ipf.vcf
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||||
|
import org.broadinstitute.sting.utils.text.XReadLines
|
||||||
|
import collection.JavaConversions._
|
||||||
|
import org.broadinstitute.sting.commandline._
|
||||||
|
import java.io.{PrintWriter, PrintStream, File}
|
||||||
|
|
||||||
|
class VCFExtractSamples(inVCF: File, outVCF: File, samples: List[String]) extends InProcessFunction {
|
||||||
|
def this(in: File, out: File, samples: File) = this(in,out, (new XReadLines(samples)).readLines.toList)
|
||||||
|
|
||||||
|
@Input(doc="VCF from which to extract samples") var inputVCF : File = inVCF
|
||||||
|
@Output(doc="VCF to which to write the sample-subset vcf") var outputVCF : File = outVCF
|
||||||
|
@Argument(doc="The samples to extract from the VCF") var extractSamples : List[String] = samples
|
||||||
|
|
||||||
|
var out : PrintWriter = _
|
||||||
|
var columns : List[Int] = 0 to 8 toList
|
||||||
|
|
||||||
|
def run = {
|
||||||
|
out = new PrintWriter(new PrintStream(outputVCF))
|
||||||
|
asScalaIterator(new XReadLines(inputVCF)).foreach(subset)
|
||||||
|
}
|
||||||
|
|
||||||
|
def subset( line : String ) {
|
||||||
|
if ( line.startsWith("##") ) {
|
||||||
|
out.print("%s%n".format(line))
|
||||||
|
} else {
|
||||||
|
val spline = line.split("\t")
|
||||||
|
if ( spline(0).equals("#CHROM") ) {
|
||||||
|
columns ++= spline.zipWithIndex.filter( p => samples.contains(p._1) ).map( p => p._2 )
|
||||||
|
}
|
||||||
|
|
||||||
|
out.print("%s%n".format(columns.map(p => spline(p)).reduceLeft(_ + "\t" + _)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue