From 80259b9e2055b5ca08428dc30fe4b7606add1d5f Mon Sep 17 00:00:00 2001 From: kshakir Date: Sun, 24 Oct 2010 03:01:06 +0000 Subject: [PATCH] Changed fullCallingPipeline to output all contigs in the refence if scattering. When the cleaner interval scatter count is set to one explicitly setting the intrevals to Nil. TODO: Need to add an option that lets the user choose from the command line to scatter all contigs or just those in the intervals list. For now can get relatively the same behavior by setting the interval scatter count equal to the number of contigs+1, assuming the random contigs come at the end of the sequence dictionary. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4565 348d0f76-0448-11de-a6fe-93d51630548a --- scala/qscript/fullCallingPipeline.q | 10 +++++----- .../extensions/gatk/IntervalScatterFunction.scala | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scala/qscript/fullCallingPipeline.q b/scala/qscript/fullCallingPipeline.q index eae68b9f3..cc51cd09c 100755 --- a/scala/qscript/fullCallingPipeline.q +++ b/scala/qscript/fullCallingPipeline.q @@ -42,8 +42,8 @@ class fullCallingPipeline extends QScript { @Input(doc="level of parallelism for UnifiedGenotyper", shortName="snpScatter", required=false) var num_snp_scatter_jobs = 20 - @Input(doc="level of parallelism for IndelGenotyperV2", shortName="indelScatter", required=false) - var num_indel_scatter_jobs = 5 + //@Input(doc="level of parallelism for IndelGenotyperV2", shortName="indelScatter", required=false) + //var num_indel_scatter_jobs = 5 @Input(doc="Skip indel-cleaning for BAM files (for testing only)", shortName="skipCleaning", required=false) var skip_cleaning = false @@ -92,8 +92,7 @@ class fullCallingPipeline extends QScript { // get contigs (needed for indel cleaning parallelism) val contigs = IntervalScatterFunction.distinctContigs( - qscript.pipeline.getProject.getReferenceFile, - List(qscript.pipeline.getProject.getIntervalList.toString)) + qscript.pipeline.getProject.getReferenceFile) for ( sample <- recalibratedSamples ) { val sampleId = sample.getId @@ -122,7 +121,7 @@ class fullCallingPipeline extends QScript { realigner.input_file = targetCreator.input_file realigner.targetIntervals = targetCreator.out realigner.intervals = Nil - realigner.intervalsString = contigs + realigner.intervalsString = Nil realigner.scatterCount = { if (num_cleaner_scatter_jobs.isDefined) num_cleaner_scatter_jobs.get min contigs.size @@ -132,6 +131,7 @@ class fullCallingPipeline extends QScript { // if scatter count is > 1, do standard scatter gather, if not, explicitly set up fix mates if (realigner.scatterCount > 1) { + realigner.intervalsString = contigs realigner.out = cleaned_bam // While gathering run fix mates. realigner.setupScatterFunction = { diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala index 9d6f801cb..a760b220b 100644 --- a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala @@ -74,7 +74,7 @@ object IntervalScatterFunction { locs.toList } - def distinctContigs(reference: File, intervals: List[String]) = { + def distinctContigs(reference: File, intervals: List[String] = Nil) = { val referenceSource = new ReferenceDataSource(reference) val locs = parseLocs(referenceSource, intervals) var contig: String = null