Initial commit of in-process helper functions for making the BCM more robust
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5144 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d4f744a4d4
commit
4d9bc84bd5
|
|
@ -0,0 +1,37 @@
|
|||
package org.broadinstitute.sting.queue.library.ipf
|
||||
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
import org.broadinstitute.sting.commandline._
|
||||
import org.broadinstitute.sting.utils.text.XReadLines
|
||||
import java.io.{PrintStream, PrintWriter, File}
|
||||
import collection.immutable.HashMap
|
||||
|
||||
|
||||
class SortByRef( input: File, reference: File, output: File ) extends InProcessFunction {
|
||||
@Input(doc="The file to be sorted") var inFile: File = input
|
||||
@Input(doc="The reference fasta index") var fai: File = reference
|
||||
@Output(doc="The file to write the sorted file to") var outFile : File = output
|
||||
@Argument(doc="The character or expression that separates entries") var separator : String = "\t"
|
||||
@Argument(doc="The position of the contig in the file (1-based)") var pos: Int = 1
|
||||
@Argument(doc="Comment characters (lines will be ignored)") var comment: List[String] = List("#")
|
||||
|
||||
var contigMap: HashMap[String,Int] = new HashMap[String,Int];
|
||||
|
||||
def contigVal( line : String ) : Int = {
|
||||
if ( comment.contains(line.charAt(0)) ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( contigMap.size < 1 ) { // no contigs
|
||||
( new XReadLines(fai)).readLines.map( u => u.split("\t").head).zipWithIndex.foreach( u => contigMap += u )
|
||||
}
|
||||
|
||||
return contigMap( line.split(separator)(pos-1) )
|
||||
}
|
||||
|
||||
def run = {
|
||||
var w : PrintWriter = new PrintWriter(new PrintStream(outFile))
|
||||
( new XReadLines(inFile) ).readLines.sortBy(contigVal).foreach( u => w.println(u) )
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
package org.broadinstitute.sting.queue.library.ipf.vcf
|
||||
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
import org.broadinstitute.sting.commandline._
|
||||
import org.broadinstitute.sting.utils.text.XReadLines
|
||||
import java.io.{PrintStream, PrintWriter, File}
|
||||
|
||||
|
||||
class VCFExtractSites( vcf: File, output: File) extends InProcessFunction {
|
||||
@Input(doc="VCF file from which to extract sites") var inVCF: File = vcf
|
||||
@Output(doc="Sites VCF file to write to") var outVCF: File = output
|
||||
@Argument(doc="Keep non-PASS sites") var keepFilters: Boolean = false
|
||||
@Argument(doc="Keep info field") var keepInfo : Boolean = true
|
||||
@Argument(doc="Keep qual field") var keepQual : Boolean = true
|
||||
|
||||
def lineMap( line: String ) : String = {
|
||||
if ( line.startsWith("##") ) { return line }
|
||||
val spline = line.split("\t",9)
|
||||
|
||||
if ( spline(6) == "PASS" || keepFilters ) {
|
||||
if ( ! keepInfo ) {
|
||||
spline(7) = "."
|
||||
}
|
||||
if ( ! keepQual ) {
|
||||
spline(5) = "."
|
||||
}
|
||||
return spline.slice(0,8).reduceLeft( _ + "\t" + _ )
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
def run {
|
||||
var w: PrintWriter = new PrintWriter( new PrintStream(outVCF) )
|
||||
( new XReadLines(inVCF) ).readLines().map(lineMap).view.filter( u => u != "" ).foreach( u => w.println(u) )
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue