Bug fixes in the in process function (spoiled by python: was not closing my writers). SortByRef now works somewhat like the perl script does, rather than doing a memory-expensive sort. Adding a QTools qscript which is kinda clunky, and will be used mostly for integration tests of these IPFs, pending some better way to construct argument collections and function accessors at compile-time.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5182 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9ddc95c833
commit
5398cf620a
|
|
@ -0,0 +1,68 @@
|
|||
import org.broadinstitute.sting.queue.library.ipf.vcf.{VCFExtractIntervals, VCFExtractSamples, VCFSimpleMerge, VCFExtractSites}
|
||||
import org.broadinstitute.sting.queue.library.ipf.SortByRef
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import collection.JavaConversions._
|
||||
|
||||
// todo -- should the argument collection on which this runs be generated at compile-time into extensions??
|
||||
// todo -- maybe a compile-time generated enum of available library functions? (ipf of course)
|
||||
class QTools extends QScript {
|
||||
@Argument(doc="Tool to run",shortName="T", required=true) var qtool : String = _
|
||||
@Argument(doc="input VCF",shortName="ivcf",required=false) var inVCF : File = _
|
||||
@Argument(doc="input VCF files",shortName="vcfs",required=false) var inVCFs : String = _
|
||||
@Argument(doc="output file",shortName="out",required=true) var output : File = _
|
||||
@Argument(doc="reference file",shortName="ref",required=false) var ref : File = _
|
||||
@Argument(doc="The samples to extract",shortName="sm",required=false) var samples : String = _
|
||||
@Argument(doc="Keep filtered sites when merging or extracting?",shortName="kf",required=false) var keepFilters : Boolean = false
|
||||
// todo -- additional arguments or argument collection
|
||||
|
||||
def script = {
|
||||
if ( qtool.equals("VCFExtractSites") ) {
|
||||
runVCFExtractSites
|
||||
}
|
||||
|
||||
if ( qtool.equals("VCFSimpleMerge") ) {
|
||||
runVCFSimpleMerge
|
||||
}
|
||||
|
||||
if ( qtool.equals("VCFExtractSamples") ) {
|
||||
runVCFExtractSamples
|
||||
}
|
||||
|
||||
if ( qtool.equals("VCFExtractIntervals") ) {
|
||||
runVCFExtractIntervals
|
||||
}
|
||||
|
||||
if ( qtool.equals("SortByRef") ) {
|
||||
runSortByRef
|
||||
}
|
||||
}
|
||||
|
||||
def runVCFExtractSites = {
|
||||
var ves : VCFExtractSites = new VCFExtractSites(inVCF,output)
|
||||
add(ves)
|
||||
}
|
||||
|
||||
def runVCFSimpleMerge = {
|
||||
var vsm : VCFSimpleMerge = new VCFSimpleMerge
|
||||
vsm.vcfs = inVCFs.split(",").toList.map(new File(_))
|
||||
vsm.outVCF = output
|
||||
vsm.fai = new File(ref.getAbsolutePath+".fai")
|
||||
|
||||
add(vsm)
|
||||
}
|
||||
|
||||
def runVCFExtractSamples = {
|
||||
var ves : VCFExtractSamples = new VCFExtractSamples(inVCF,output,samples.split(",").toList)
|
||||
add(ves)
|
||||
}
|
||||
|
||||
def runVCFExtractIntervals = {
|
||||
var vei : VCFExtractIntervals = new VCFExtractIntervals(inVCF,output,keepFilters)
|
||||
add(vei)
|
||||
}
|
||||
|
||||
def runSortByRef = {
|
||||
var sbr : SortByRef = new SortByRef(inVCF,new File(ref.getAbsolutePath+".fai"),output)
|
||||
add(sbr)
|
||||
}
|
||||
}
|
||||
|
|
@ -14,24 +14,44 @@ class SortByRef( input: File, reference: File, output: File ) extends InProcessF
|
|||
@Output(doc="The file to write the sorted file to") var outFile : File = output
|
||||
@Argument(doc="The character or expression that separates entries") var separator : String = "\t"
|
||||
@Argument(doc="The position of the contig in the file (1-based)") var pos: Int = 1
|
||||
@Argument(doc="Comment characters (lines will be ignored)") var comment: List[String] = List("#")
|
||||
@Argument(doc="Comment characters (lines will be brought to file head)") var comment: List[String] = List("#")
|
||||
|
||||
var contigMap: HashMap[String,Int] = new HashMap[String,Int];
|
||||
val COMMENT_STRING = "@#!"
|
||||
|
||||
def contigVal( line : String ) : Int = {
|
||||
if ( comment.contains(line.charAt(0)) ) {
|
||||
return -1;
|
||||
}
|
||||
var contigMap: List[(String,PrintWriter,File)] = Nil;
|
||||
|
||||
def entryToTriplet( line : String ) : (String,PrintWriter,File) = {
|
||||
val ctig : String = line.split("\t",2)(0)
|
||||
val tmpf : File = File.createTempFile("sbr",".tmp")
|
||||
val pw : PrintWriter = new PrintWriter(new PrintStream(tmpf))
|
||||
return (ctig,pw,tmpf)
|
||||
}
|
||||
|
||||
def contigVal( line : String ) : PrintWriter = {
|
||||
|
||||
if ( contigMap.size < 1 ) { // no contigs
|
||||
( new XReadLines(fai)).readLines.map( u => u.split("\t").head).zipWithIndex.foreach( u => contigMap += u )
|
||||
contigMap :+= entryToTriplet(COMMENT_STRING+"\t.")
|
||||
contigMap ++= ( new XReadLines(fai)).readLines.map( entryToTriplet(_)).toList
|
||||
}
|
||||
|
||||
return contigMap( line.split(separator)(pos-1) )
|
||||
if ( comment.contains(line.charAt(0).toString) ) {
|
||||
return contigMap.find( u => u._1.equals(COMMENT_STRING)).head._2;
|
||||
}
|
||||
|
||||
val matches = contigMap.find( u => u._1.equals(line.split(separator)(pos-1)))
|
||||
if ( matches.isEmpty ) {
|
||||
System.out.println("Empty match for "+line)
|
||||
return contigMap(0)._2
|
||||
} else { return matches.head._2 }
|
||||
}
|
||||
|
||||
def run = {
|
||||
var w : PrintWriter = new PrintWriter(new PrintStream(outFile))
|
||||
( new XReadLines(inFile) ).readLines.sortBy(contigVal).foreach( u => w.println(u) )
|
||||
System.out.println("Writing to temp files...")
|
||||
( new XReadLines(inFile) ).readLines.foreach( u => contigVal(u).println(u) )
|
||||
contigMap.foreach( u => u._2.close )
|
||||
System.out.println("Concatenating...")
|
||||
contigMap.map( u => new XReadLines(u._3) ).foreach( u => asScalaIterator(u).foreach(u => w.println(u)))
|
||||
w.close()
|
||||
}
|
||||
}
|
||||
|
|
@ -19,6 +19,7 @@ class VCFExtractIntervals(inVCF: File, outList: File, useFilterSites: Boolean) e
|
|||
def run = {
|
||||
out = new PrintWriter(new PrintStream(listOut))
|
||||
asScalaIterator(new XReadLines(vcfIn)).foreach(vcf2int)
|
||||
out.close
|
||||
}
|
||||
|
||||
def vcf2int( vcfLine: String ) : Unit = {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import collection.JavaConversions._
|
|||
import org.broadinstitute.sting.commandline._
|
||||
import java.io.{PrintWriter, PrintStream, File}
|
||||
|
||||
class VCFExtractSamples(inVCF: File, outVCF: File, samples: List[String]) extends InProcessFunction {
|
||||
class VCFExtractSamples(inVCF: File, outVCF: File, samples: List[String]) extends InProcessFunction {
|
||||
def this(in: File, out: File, samples: File) = this(in,out, (new XReadLines(samples)).readLines.toList)
|
||||
|
||||
@Input(doc="VCF from which to extract samples") var inputVCF : File = inVCF
|
||||
|
|
@ -19,6 +19,7 @@ class VCFExtractSamples(inVCF: File, outVCF: File, samples: List[String]) extend
|
|||
def run = {
|
||||
out = new PrintWriter(new PrintStream(outputVCF))
|
||||
asScalaIterator(new XReadLines(inputVCF)).foreach(subset)
|
||||
out.close
|
||||
}
|
||||
|
||||
def subset( line : String ) {
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ class VCFExtractSites( vcf: File, output: File) extends InProcessFunction {
|
|||
def lineMap( line: String ) : String = {
|
||||
if ( line.startsWith("##") ) { return line }
|
||||
val spline = line.split("\t",9)
|
||||
if ( spline(0).startsWith("#")) { return spline.slice(0,8).reduceLeft( _+"\t"+_) }
|
||||
|
||||
if ( spline(6) == "PASS" || keepFilters ) {
|
||||
if ( ! keepInfo ) {
|
||||
|
|
@ -31,9 +32,34 @@ class VCFExtractSites( vcf: File, output: File) extends InProcessFunction {
|
|||
return ""
|
||||
}
|
||||
|
||||
def lineMapDebug( line: String ) : String = {
|
||||
System.out.printf("Input: %s%n ",line)
|
||||
val o = lineMap(line)
|
||||
System.out.printf("Output: %s%n",o)
|
||||
|
||||
return o
|
||||
}
|
||||
|
||||
def debugFilter ( line : String ) : Boolean = {
|
||||
System.out.printf("Filter In: %s%n",line)
|
||||
if ( line != "" ) {
|
||||
System.out.printf("Not filtered %n")
|
||||
return true
|
||||
} else {
|
||||
System.out.printf("Filtered%n")
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
def debugPrint(line: String, k : PrintWriter) : Unit = {
|
||||
System.out.printf("Into print: %s%n",line)
|
||||
k.println(line)
|
||||
}
|
||||
|
||||
def run {
|
||||
var w: PrintWriter = new PrintWriter( new PrintStream(outVCF) )
|
||||
( new XReadLines(inVCF) ).readLines().map(lineMap).view.filter( u => u != "" ).foreach( u => w.println(u) )
|
||||
asScalaIterator[String](new XReadLines(inVCF)).map(lineMap).filter( u => u != "" ).foreach( u => w.println(u) )
|
||||
w.close
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue