From 4d9bc84bd5fb59316bd01e0dcd6bb670c1dfe49f Mon Sep 17 00:00:00 2001 From: chartl Date: Mon, 31 Jan 2011 19:18:31 +0000 Subject: [PATCH] Initial commit of in-process helper functions for making the BCM more robust git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5144 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/queue/library/ipf/SortByRef.scala | 37 ++++++++++++++++++ .../library/ipf/vcf/VCFExtractSites.scala | 39 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100755 scala/src/org/broadinstitute/sting/queue/library/ipf/SortByRef.scala create mode 100755 scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSites.scala diff --git a/scala/src/org/broadinstitute/sting/queue/library/ipf/SortByRef.scala b/scala/src/org/broadinstitute/sting/queue/library/ipf/SortByRef.scala new file mode 100755 index 000000000..2ffedbc31 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/library/ipf/SortByRef.scala @@ -0,0 +1,37 @@ +package org.broadinstitute.sting.queue.library.ipf + +import collection.JavaConversions._ +import org.broadinstitute.sting.queue.function.InProcessFunction +import org.broadinstitute.sting.commandline._ +import org.broadinstitute.sting.utils.text.XReadLines +import java.io.{PrintStream, PrintWriter, File} +import collection.immutable.HashMap + + +class SortByRef( input: File, reference: File, output: File ) extends InProcessFunction { + @Input(doc="The file to be sorted") var inFile: File = input + @Input(doc="The reference fasta index") var fai: File = reference + @Output(doc="The file to write the sorted file to") var outFile : File = output + @Argument(doc="The character or expression that separates entries") var separator : String = "\t" + @Argument(doc="The position of the contig in the file (1-based)") var pos: Int = 1 + @Argument(doc="Comment characters (lines will be ignored)") var comment: List[String] = List("#") + + var contigMap: HashMap[String,Int] = new HashMap[String,Int]; + + def contigVal( line : String ) : Int = { + if ( comment.contains(line.charAt(0)) ) { + return -1; + } + + if ( contigMap.size < 1 ) { // no contigs + ( new XReadLines(fai)).readLines.map( u => u.split("\t").head).zipWithIndex.foreach( u => contigMap += u ) + } + + return contigMap( line.split(separator)(pos-1) ) + } + + def run = { + var w : PrintWriter = new PrintWriter(new PrintStream(outFile)) + ( new XReadLines(inFile) ).readLines.sortBy(contigVal).foreach( u => w.println(u) ) + } +} \ No newline at end of file diff --git a/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSites.scala b/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSites.scala new file mode 100755 index 000000000..61e7076f9 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSites.scala @@ -0,0 +1,39 @@ +package org.broadinstitute.sting.queue.library.ipf.vcf + +import collection.JavaConversions._ +import org.broadinstitute.sting.queue.function.InProcessFunction +import org.broadinstitute.sting.commandline._ +import org.broadinstitute.sting.utils.text.XReadLines +import java.io.{PrintStream, PrintWriter, File} + + +class VCFExtractSites( vcf: File, output: File) extends InProcessFunction { + @Input(doc="VCF file from which to extract sites") var inVCF: File = vcf + @Output(doc="Sites VCF file to write to") var outVCF: File = output + @Argument(doc="Keep non-PASS sites") var keepFilters: Boolean = false + @Argument(doc="Keep info field") var keepInfo : Boolean = true + @Argument(doc="Keep qual field") var keepQual : Boolean = true + + def lineMap( line: String ) : String = { + if ( line.startsWith("##") ) { return line } + val spline = line.split("\t",9) + + if ( spline(6) == "PASS" || keepFilters ) { + if ( ! keepInfo ) { + spline(7) = "." + } + if ( ! keepQual ) { + spline(5) = "." + } + return spline.slice(0,8).reduceLeft( _ + "\t" + _ ) + } + + return "" + } + + def run { + var w: PrintWriter = new PrintWriter( new PrintStream(outVCF) ) + ( new XReadLines(inVCF) ).readLines().map(lineMap).view.filter( u => u != "" ).foreach( u => w.println(u) ) + } + +} \ No newline at end of file