First draft of a script to measure performance of read walkers when merging
dynamically. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5570 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b722ebf244
commit
798fb6a7a2
|
|
@ -0,0 +1,81 @@
|
|||
import java.io.PrintWriter
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException
|
||||
|
||||
/**
|
||||
* A pipeline for Queue that runs a custom walker outside of the GATK jar.
|
||||
* NOTE: This code is an unsupported example for soliciting feedback on how to improve Queue.
|
||||
* Future syntax will simplify running the GATK so please expect the syntax below to change significantly.
|
||||
*/
|
||||
class PrintReadsAcrossManySamples extends QScript {
|
||||
// The full packaged jar should be used.
|
||||
// You can build this jar via 'ant package' and then find it under
|
||||
// 'Sting/dist/packages/GenomeAnalysisTK-*/GenomeAnalysisTK.jar'
|
||||
@Input(doc="The path to the packaged GenomeAnalysisTK.jar file.", shortName="gatk")
|
||||
var gatkJar: File = null
|
||||
|
||||
@Input(doc="The reference file for the bam files.", shortName="R")
|
||||
var referenceFile: File = null
|
||||
|
||||
// NOTE: Do not initialize List, Set, or Option to null
|
||||
// as you won't be able to update the collection.
|
||||
// By default set:
|
||||
// List[T] = Nil
|
||||
// Set[T] = Set.empty[T]
|
||||
// Option[T] = None
|
||||
@Input(doc="One or more bam files.", shortName="I")
|
||||
var bamFiles: List[File] = Nil
|
||||
|
||||
@Input(doc="Name of the test case", fullName="test_case",required=false)
|
||||
var testCaseName: String = "."
|
||||
|
||||
@Input(doc="Max number of bam files to process", fullName="max_bams",required=false)
|
||||
var maxBams = 1
|
||||
|
||||
@Input(doc="Step size",fullName="step_size",required=false)
|
||||
var stepSize = 1
|
||||
|
||||
// This trait allows us set the variables below in one place,
|
||||
// and then reuse this trait on each CommandLineGATK function below.
|
||||
trait PrintReadsAcrossManySamplesArguments extends CommandLineGATK {
|
||||
this.jarFile = PrintReadsAcrossManySamples.this.gatkJar
|
||||
this.reference_sequence = PrintReadsAcrossManySamples.this.referenceFile
|
||||
this.memoryLimit = 8
|
||||
}
|
||||
|
||||
|
||||
def script = {
|
||||
if(bamFiles.size != 1)
|
||||
throw new ReviewedStingException("-I argument must consist of exactly one file containing a list of BAM files.");
|
||||
|
||||
var lines: List[String] = List[String]()
|
||||
for(line <- scala.io.Source.fromFile(bamFiles(0)).getLines) {
|
||||
lines = lines ::: List(line)
|
||||
}
|
||||
|
||||
for(numBams <- 1 to math.min(maxBams,lines.size) by stepSize) {
|
||||
val dir = new File(testCaseName + "/%03d_bams".format(numBams))
|
||||
dir.mkdir()
|
||||
|
||||
val file = new File(dir,"bams.list")
|
||||
val writer = new PrintWriter(file)
|
||||
|
||||
for(bamIndex <- 0 to numBams-1)
|
||||
writer.println(lines(bamIndex))
|
||||
|
||||
writer.close()
|
||||
|
||||
// Create the function that we can run.
|
||||
val printreads = new PrintReads with PrintReadsAcrossManySamplesArguments
|
||||
|
||||
printreads.jobOutputFile = new File(dir, "PrintReads.out")
|
||||
printreads.input_file = List(file)
|
||||
printreads.reference_sequence = referenceFile
|
||||
printreads.out = new File("/dev/null")
|
||||
|
||||
add(printreads)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
#!/bin/sh
|
||||
java -Djava.io.tmpdir=/broad/shptmp/hanna -jar ~/src/Sting/dist/Queue.jar \
|
||||
--script PrintReadsAcrossManySamples.q \
|
||||
-gatk ~/src/Sting/dist/GenomeAnalysisTK.jar \
|
||||
-R /humgen/1kg/reference/human_g1k_v37.fasta \
|
||||
-I ~/tests/1600samples/1kg_t2d.list --max_bams 2000 --step_size 10 -bsub -jobQueue week $1
|
||||
Loading…
Reference in New Issue