PAC sets the refseq file to optional.
Additional ipf for expanding interval lists. Either there's heavy latency on the toro -- or it's not working properly yet (e.g. the system print in the same scope as the file print outputs a line, but no file shows up on the system) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4874 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f0ab7b849a
commit
6db86ae0c6
|
|
@ -0,0 +1,83 @@
|
|||
package org.broadinstitute.sting.queue.library.ipf
|
||||
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
import org.broadinstitute.sting.commandline._
|
||||
import java.io.{PrintStream, File}
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.utils.text.XReadLines
|
||||
import net.sf.picard.reference.FastaSequenceFile
|
||||
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocParser}
|
||||
|
||||
// todo -- this is unsafe. Need to use a reference dictionary to ensure no off-contig targets are created
|
||||
class ExpandIntervals(in : File, start: Int, size: Int, out: File, ref: File, opType: String) extends InProcessFunction {
|
||||
@Input(doc="The interval list to expand") val inList : File = in
|
||||
@Input(doc="The reference sequence") val refDict : File = ref
|
||||
@Argument(doc="Number of basepair to start the expanded interval") val startInt : Int = start
|
||||
@Argument(doc="Number of baispair to stop the expanded interval") val sizeInt : Int = size
|
||||
@Output(doc="The output intervals file to write to") val outList : File = out
|
||||
@Argument(doc="The output format for the intervals") val outTypeStr = opType
|
||||
|
||||
val output : PrintStream = new PrintStream(outList)
|
||||
val parser : GenomeLocParser = new GenomeLocParser(new FastaSequenceFile(ref,true))
|
||||
val xrl : XReadLines = new XReadLines(inList)
|
||||
val outType = if ( outTypeStr.equals("INTERVALS") ) IntervalOutputType.INTERVALS else IntervalOutputType.BED
|
||||
|
||||
var previous : GenomeLoc = _
|
||||
var current : GenomeLoc = _
|
||||
var next : GenomeLoc = _
|
||||
|
||||
def run = {
|
||||
// todo -- there's got to be a special view that gives prevous, current, next
|
||||
asScalaIterable(xrl).filter( ! _.startsWith("@")).map(parseGenomeInterval(_)).foreach(expand(_))
|
||||
}
|
||||
|
||||
def expand(loc : GenomeLoc) : Unit = {
|
||||
if ( start > 0 ) {
|
||||
previous = current
|
||||
current = next
|
||||
next = loc
|
||||
if ( current == null ) {
|
||||
return
|
||||
}
|
||||
val new1 = parser.createGenomeLoc(current.getContig,current.getStart-startInt-sizeInt,current.getStart-startInt)
|
||||
val new2 = parser.createGenomeLoc(current.getContig,current.getStop+startInt,current.getStop+startInt+sizeInt)
|
||||
if ( ok(new1) ) {
|
||||
System.out.println("Printing! %s".format(repr(new1)))
|
||||
output.print("%s%n".format(repr(new1)))
|
||||
}
|
||||
if ( ok(new2) ) {
|
||||
System.out.println("Printing! %s".format(repr(new2)))
|
||||
output.print("%s%n".format(repr(new2)))
|
||||
}
|
||||
previous = current
|
||||
} else {
|
||||
output.print("%s%n".format(repr(parser.createGenomeLoc(loc.getContig,loc.getStart-sizeInt,loc.getStop+sizeInt))))
|
||||
}
|
||||
}
|
||||
|
||||
def ok( loc : GenomeLoc ) : Boolean = {
|
||||
//System.out.println("%s - %s - %s".format(repr(next),repr(loc),repr(previous)))
|
||||
( next == null || loc.distance(next) >= start) && (previous == null || loc.distance(previous) >= start)
|
||||
}
|
||||
|
||||
def repr(loc : GenomeLoc) : String = {
|
||||
if ( loc == null ) return "null"
|
||||
if ( outType == IntervalOutputType.INTERVALS ) {
|
||||
return "%s:%d-%d".format(loc.getContig,loc.getStart,loc.getStop)
|
||||
} else if ( outType == IntervalOutputType.BED ) {
|
||||
return "%s\t%d\t%d".format(loc.getContig,loc.getStart-1,loc.getStop)
|
||||
} else {
|
||||
return "FORMAT?"
|
||||
}
|
||||
}
|
||||
|
||||
def parseGenomeInterval( s : String ) : GenomeLoc = {
|
||||
val sp = s.split("\\s+")
|
||||
if ( s.contains(":") ) parser.parseGenomeInterval(s) else parser.createGenomeLoc(sp(0),sp(1).toInt,sp(2).toInt)
|
||||
}
|
||||
|
||||
object IntervalOutputType extends Enumeration("INTERVALS","BED") {
|
||||
type IntervalOutputType = Value
|
||||
val INTERVALS,BED = Value
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
package org.broadinstitute.sting.queue.library.ipf.intervals
|
||||
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
import org.broadinstitute.sting.commandline._
|
||||
import java.io.{PrintStream, File}
|
||||
import net.sf.samtools.{SAMSequenceRecord, SAMFileHeader, SAMSequenceDictionary}
|
||||
import org.broadinstitute.sting.utils.text.XReadLines
|
||||
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocParser}
|
||||
|
||||
abstract class IntersectIntervals(iVals: List[File], outFile: File) extends InProcessFunction {
|
||||
/* @Input(doc="List of interval files to find the intersection of") val intervals : List[File] = iVals
|
||||
@Output(doc="Output interval file to which to write") val output : File = outFile
|
||||
@Argument(doc="List of contigs for the interval list (defaults to human b37 autosome); IN ORDER")
|
||||
var contigs : List[String] = (new Range(1,22)).map(u => "%d".format(u)) ::: List("X","Y","MT")
|
||||
@Argument(doc="Assume the input interval lists are sorted in the proper order") var assumeSorted = false
|
||||
|
||||
|
||||
val out : PrintStream = new PrintStream(output)
|
||||
|
||||
def run = {
|
||||
var dict : SAMSequenceDictionary = new SAMSequenceDictionary
|
||||
contigs.map( u => new SAMSequenceRecord(u, Integer.MAX_VALUE ) ).foreach(u => dict.addSequence(u))
|
||||
val parser : GenomeLocParser = new GenomeLocParser(dict)
|
||||
val locList : IntervalIterator = new IntervalIterator(intervals,parser,assumeSorted)
|
||||
|
||||
}
|
||||
|
||||
class IntervalIterator(ivals : List[File], parser: GenomeLocParser, sorted: Boolean) extends Iterable[GenomeLoc] {
|
||||
val readers : List[XReadLinesBuffer] = ivals.map(f => new XReadLinesBuffer(new XReadLines(f), parser))
|
||||
val filesAreSorted : Boolean = sorted
|
||||
|
||||
def hasNext = ! readers.filter( _.hasNext ).isEmpty
|
||||
def next = if ( assumeSorted ) nextElement else nextSortedElement
|
||||
def nextElement =
|
||||
|
||||
|
||||
}
|
||||
|
||||
class XReadLinesBuffer( x: XReadLines, p: GenomeLocParser) extends Iterable[GenomeLoc] {
|
||||
val xrl: XReadLines = x
|
||||
val parser : GenomeLocParser = p
|
||||
var current : GenomeLoc = _
|
||||
|
||||
def hasNext : Boolean = curLine != null || x.hasNext
|
||||
def next : String = {
|
||||
if ( curLine == null && x.hasNext ) curLine = x.next
|
||||
val retLine = curLine
|
||||
if ( x.hasNext ) curLine = x.next else curLine = null
|
||||
retLine
|
||||
}
|
||||
|
||||
} */
|
||||
|
||||
}
|
||||
|
|
@ -15,7 +15,7 @@ class PipelineArgumentCollection {
|
|||
@Input(doc="path to trigger track (for UnifiedGenotyper)", shortName="trigger", required=false)
|
||||
var trigger: File = _
|
||||
|
||||
@Input(doc="path to refseqTable (for GenomicAnnotator)", shortName="refseqTable")
|
||||
@Input(doc="path to refseqTable (for GenomicAnnotator)", shortName="refseqTable",required=false)
|
||||
var refseqTable: File = _
|
||||
|
||||
@Input(doc="path to Picard FixMateInformation.jar. See http://picard.sourceforge.net/ .", required=false)
|
||||
|
|
|
|||
Loading…
Reference in New Issue