Initial commit of in-process helper functions for making the BCM more robust

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5144 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2011-01-31 19:18:31 +00:00
parent d4f744a4d4
commit 4d9bc84bd5
2 changed files with 76 additions and 0 deletions

View File

@ -0,0 +1,37 @@
package org.broadinstitute.sting.queue.library.ipf
import collection.JavaConversions._
import org.broadinstitute.sting.queue.function.InProcessFunction
import org.broadinstitute.sting.commandline._
import org.broadinstitute.sting.utils.text.XReadLines
import java.io.{PrintStream, PrintWriter, File}
import collection.immutable.HashMap
class SortByRef( input: File, reference: File, output: File ) extends InProcessFunction {
@Input(doc="The file to be sorted") var inFile: File = input
@Input(doc="The reference fasta index") var fai: File = reference
@Output(doc="The file to write the sorted file to") var outFile : File = output
@Argument(doc="The character or expression that separates entries") var separator : String = "\t"
@Argument(doc="The position of the contig in the file (1-based)") var pos: Int = 1
@Argument(doc="Comment characters (lines will be ignored)") var comment: List[String] = List("#")
var contigMap: HashMap[String,Int] = new HashMap[String,Int];
def contigVal( line : String ) : Int = {
if ( comment.contains(line.charAt(0)) ) {
return -1;
}
if ( contigMap.size < 1 ) { // no contigs
( new XReadLines(fai)).readLines.map( u => u.split("\t").head).zipWithIndex.foreach( u => contigMap += u )
}
return contigMap( line.split(separator)(pos-1) )
}
def run = {
var w : PrintWriter = new PrintWriter(new PrintStream(outFile))
( new XReadLines(inFile) ).readLines.sortBy(contigVal).foreach( u => w.println(u) )
}
}

View File

@ -0,0 +1,39 @@
package org.broadinstitute.sting.queue.library.ipf.vcf
import collection.JavaConversions._
import org.broadinstitute.sting.queue.function.InProcessFunction
import org.broadinstitute.sting.commandline._
import org.broadinstitute.sting.utils.text.XReadLines
import java.io.{PrintStream, PrintWriter, File}
class VCFExtractSites( vcf: File, output: File) extends InProcessFunction {
@Input(doc="VCF file from which to extract sites") var inVCF: File = vcf
@Output(doc="Sites VCF file to write to") var outVCF: File = output
@Argument(doc="Keep non-PASS sites") var keepFilters: Boolean = false
@Argument(doc="Keep info field") var keepInfo : Boolean = true
@Argument(doc="Keep qual field") var keepQual : Boolean = true
def lineMap( line: String ) : String = {
if ( line.startsWith("##") ) { return line }
val spline = line.split("\t",9)
if ( spline(6) == "PASS" || keepFilters ) {
if ( ! keepInfo ) {
spline(7) = "."
}
if ( ! keepQual ) {
spline(5) = "."
}
return spline.slice(0,8).reduceLeft( _ + "\t" + _ )
}
return ""
}
def run {
var w: PrintWriter = new PrintWriter( new PrintStream(outVCF) )
( new XReadLines(inVCF) ).readLines().map(lineMap).view.filter( u => u != "" ).foreach( u => w.println(u) )
}
}