Changed fullCallingPipeline to output all contigs in the refence if scattering.

When the cleaner interval scatter count is set to one explicitly setting the intrevals to Nil.
TODO: Need to add an option that lets the user choose from the command line to scatter all contigs or just those in the intervals list.  For now can get relatively the same behavior by setting the interval scatter count equal to the number of contigs+1, assuming the random contigs come at the end of the sequence dictionary.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4565 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2010-10-24 03:01:06 +00:00
parent 3039c0de3c
commit 80259b9e20
2 changed files with 6 additions and 6 deletions

View File

@ -42,8 +42,8 @@ class fullCallingPipeline extends QScript {
@Input(doc="level of parallelism for UnifiedGenotyper", shortName="snpScatter", required=false) @Input(doc="level of parallelism for UnifiedGenotyper", shortName="snpScatter", required=false)
var num_snp_scatter_jobs = 20 var num_snp_scatter_jobs = 20
@Input(doc="level of parallelism for IndelGenotyperV2", shortName="indelScatter", required=false) //@Input(doc="level of parallelism for IndelGenotyperV2", shortName="indelScatter", required=false)
var num_indel_scatter_jobs = 5 //var num_indel_scatter_jobs = 5
@Input(doc="Skip indel-cleaning for BAM files (for testing only)", shortName="skipCleaning", required=false) @Input(doc="Skip indel-cleaning for BAM files (for testing only)", shortName="skipCleaning", required=false)
var skip_cleaning = false var skip_cleaning = false
@ -92,8 +92,7 @@ class fullCallingPipeline extends QScript {
// get contigs (needed for indel cleaning parallelism) // get contigs (needed for indel cleaning parallelism)
val contigs = IntervalScatterFunction.distinctContigs( val contigs = IntervalScatterFunction.distinctContigs(
qscript.pipeline.getProject.getReferenceFile, qscript.pipeline.getProject.getReferenceFile)
List(qscript.pipeline.getProject.getIntervalList.toString))
for ( sample <- recalibratedSamples ) { for ( sample <- recalibratedSamples ) {
val sampleId = sample.getId val sampleId = sample.getId
@ -122,7 +121,7 @@ class fullCallingPipeline extends QScript {
realigner.input_file = targetCreator.input_file realigner.input_file = targetCreator.input_file
realigner.targetIntervals = targetCreator.out realigner.targetIntervals = targetCreator.out
realigner.intervals = Nil realigner.intervals = Nil
realigner.intervalsString = contigs realigner.intervalsString = Nil
realigner.scatterCount = { realigner.scatterCount = {
if (num_cleaner_scatter_jobs.isDefined) if (num_cleaner_scatter_jobs.isDefined)
num_cleaner_scatter_jobs.get min contigs.size num_cleaner_scatter_jobs.get min contigs.size
@ -132,6 +131,7 @@ class fullCallingPipeline extends QScript {
// if scatter count is > 1, do standard scatter gather, if not, explicitly set up fix mates // if scatter count is > 1, do standard scatter gather, if not, explicitly set up fix mates
if (realigner.scatterCount > 1) { if (realigner.scatterCount > 1) {
realigner.intervalsString = contigs
realigner.out = cleaned_bam realigner.out = cleaned_bam
// While gathering run fix mates. // While gathering run fix mates.
realigner.setupScatterFunction = { realigner.setupScatterFunction = {

View File

@ -74,7 +74,7 @@ object IntervalScatterFunction {
locs.toList locs.toList
} }
def distinctContigs(reference: File, intervals: List[String]) = { def distinctContigs(reference: File, intervals: List[String] = Nil) = {
val referenceSource = new ReferenceDataSource(reference) val referenceSource = new ReferenceDataSource(reference)
val locs = parseLocs(referenceSource, intervals) val locs = parseLocs(referenceSource, intervals)
var contig: String = null var contig: String = null