Initial commit of in-process helper functions for making the BCM more robust
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5144 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d4f744a4d4
commit
4d9bc84bd5
|
|
@ -0,0 +1,37 @@
|
||||||
|
package org.broadinstitute.sting.queue.library.ipf
|
||||||
|
|
||||||
|
import collection.JavaConversions._
|
||||||
|
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||||
|
import org.broadinstitute.sting.commandline._
|
||||||
|
import org.broadinstitute.sting.utils.text.XReadLines
|
||||||
|
import java.io.{PrintStream, PrintWriter, File}
|
||||||
|
import collection.immutable.HashMap
|
||||||
|
|
||||||
|
|
||||||
|
class SortByRef( input: File, reference: File, output: File ) extends InProcessFunction {
|
||||||
|
@Input(doc="The file to be sorted") var inFile: File = input
|
||||||
|
@Input(doc="The reference fasta index") var fai: File = reference
|
||||||
|
@Output(doc="The file to write the sorted file to") var outFile : File = output
|
||||||
|
@Argument(doc="The character or expression that separates entries") var separator : String = "\t"
|
||||||
|
@Argument(doc="The position of the contig in the file (1-based)") var pos: Int = 1
|
||||||
|
@Argument(doc="Comment characters (lines will be ignored)") var comment: List[String] = List("#")
|
||||||
|
|
||||||
|
var contigMap: HashMap[String,Int] = new HashMap[String,Int];
|
||||||
|
|
||||||
|
def contigVal( line : String ) : Int = {
|
||||||
|
if ( comment.contains(line.charAt(0)) ) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( contigMap.size < 1 ) { // no contigs
|
||||||
|
( new XReadLines(fai)).readLines.map( u => u.split("\t").head).zipWithIndex.foreach( u => contigMap += u )
|
||||||
|
}
|
||||||
|
|
||||||
|
return contigMap( line.split(separator)(pos-1) )
|
||||||
|
}
|
||||||
|
|
||||||
|
def run = {
|
||||||
|
var w : PrintWriter = new PrintWriter(new PrintStream(outFile))
|
||||||
|
( new XReadLines(inFile) ).readLines.sortBy(contigVal).foreach( u => w.println(u) )
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,39 @@
|
||||||
|
package org.broadinstitute.sting.queue.library.ipf.vcf
|
||||||
|
|
||||||
|
import collection.JavaConversions._
|
||||||
|
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||||
|
import org.broadinstitute.sting.commandline._
|
||||||
|
import org.broadinstitute.sting.utils.text.XReadLines
|
||||||
|
import java.io.{PrintStream, PrintWriter, File}
|
||||||
|
|
||||||
|
|
||||||
|
class VCFExtractSites( vcf: File, output: File) extends InProcessFunction {
|
||||||
|
@Input(doc="VCF file from which to extract sites") var inVCF: File = vcf
|
||||||
|
@Output(doc="Sites VCF file to write to") var outVCF: File = output
|
||||||
|
@Argument(doc="Keep non-PASS sites") var keepFilters: Boolean = false
|
||||||
|
@Argument(doc="Keep info field") var keepInfo : Boolean = true
|
||||||
|
@Argument(doc="Keep qual field") var keepQual : Boolean = true
|
||||||
|
|
||||||
|
def lineMap( line: String ) : String = {
|
||||||
|
if ( line.startsWith("##") ) { return line }
|
||||||
|
val spline = line.split("\t",9)
|
||||||
|
|
||||||
|
if ( spline(6) == "PASS" || keepFilters ) {
|
||||||
|
if ( ! keepInfo ) {
|
||||||
|
spline(7) = "."
|
||||||
|
}
|
||||||
|
if ( ! keepQual ) {
|
||||||
|
spline(5) = "."
|
||||||
|
}
|
||||||
|
return spline.slice(0,8).reduceLeft( _ + "\t" + _ )
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
def run {
|
||||||
|
var w: PrintWriter = new PrintWriter( new PrintStream(outVCF) )
|
||||||
|
( new XReadLines(inVCF) ).readLines().map(lineMap).view.filter( u => u != "" ).foreach( u => w.println(u) )
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue