diff --git a/scala/qscript/chartl/private_mutations.q b/scala/qscript/chartl/private_mutations.q index 0e92c341f..1388a0037 100755 --- a/scala/qscript/chartl/private_mutations.q +++ b/scala/qscript/chartl/private_mutations.q @@ -2,7 +2,7 @@ import collection.JavaConversions._ import java.io.FileNotFoundException import org.broadinstitute.sting.datasources.pipeline._ import org.broadinstitute.sting.queue.extensions.gatk._ -import org.broadinstitute.sting.queue.library.clf.vcf._ +import org.broadinstitute.sting.queue.library.ipf.vcf._ import org.broadinstitute.sting.queue.pipeline._ import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.utils.yaml.YamlUtils diff --git a/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala b/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala new file mode 100755 index 000000000..2dd057cb6 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala @@ -0,0 +1,31 @@ +package org.broadinstitute.sting.queue.library.ipf.vcf + +import collection.JavaConversions._ +import org.broadinstitute.sting.queue.function.InProcessFunction +import org.broadinstitute.sting.commandline._ +import org.broadinstitute.sting.utils.text.XReadLines +import java.io.{PrintStream, PrintWriter, File} + +class VCFExtractIntervals(inVCF: File, outList: File, useFilterSites: Boolean) extends InProcessFunction { + def this(in : File, out: File) = this(in,out,true) + def this(in : File) = this(in,new File(in.getAbsolutePath.replace(".vcf",".intervals.list")),true) + + @Input(doc="The VCF to convert to an interval list") var vcfIn : File = inVCF + @Output(doc="The intervals file to write to") var listOut : File = outList + @Argument(doc="Keep filtered sites?") var keepFilters : Boolean = useFilterSites + + var out : PrintWriter = _ + + def run = { + out = new PrintWriter(new PrintStream(listOut)) + asScalaIterator(new XReadLines(vcfIn)).foreach(vcf2int) + } + + def vcf2int( vcfLine: String ) : Unit = { + var spline = vcfLine.split("\t") + if ( ! vcfLine.startsWith("#") && (spline(6).equals("PASS") || keepFilters) ) { + out.print("%s:%s%n".format(spline(0),spline(1))) + } + } + +} \ No newline at end of file diff --git a/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSamples.scala b/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSamples.scala new file mode 100755 index 000000000..b9f75b0b6 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSamples.scala @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.queue.library.ipf.vcf + +import org.broadinstitute.sting.queue.function.InProcessFunction +import org.broadinstitute.sting.utils.text.XReadLines +import collection.JavaConversions._ +import org.broadinstitute.sting.commandline._ +import java.io.{PrintWriter, PrintStream, File} + +class VCFExtractSamples(inVCF: File, outVCF: File, samples: List[String]) extends InProcessFunction { + def this(in: File, out: File, samples: File) = this(in,out, (new XReadLines(samples)).readLines.toList) + + @Input(doc="VCF from which to extract samples") var inputVCF : File = inVCF + @Output(doc="VCF to which to write the sample-subset vcf") var outputVCF : File = outVCF + @Argument(doc="The samples to extract from the VCF") var extractSamples : List[String] = samples + + var out : PrintWriter = _ + var columns : List[Int] = 0 to 8 toList + + def run = { + out = new PrintWriter(new PrintStream(outputVCF)) + asScalaIterator(new XReadLines(inputVCF)).foreach(subset) + } + + def subset( line : String ) { + if ( line.startsWith("##") ) { + out.print("%s%n".format(line)) + } else { + val spline = line.split("\t") + if ( spline(0).equals("#CHROM") ) { + columns ++= spline.zipWithIndex.filter( p => samples.contains(p._1) ).map( p => p._2 ) + } + + out.print("%s%n".format(columns.map(p => spline(p)).reduceLeft(_ + "\t" + _))) + } + } +} \ No newline at end of file