diff --git a/build.xml b/build.xml
index d640e6339..224bb6417 100644
--- a/build.xml
+++ b/build.xml
@@ -12,8 +12,8 @@
-
-
+
+
@@ -66,8 +66,8 @@
-
-
+
+
@@ -113,7 +113,7 @@
-
+
@@ -143,7 +143,7 @@
-
+
@@ -211,21 +211,21 @@
-
-
+
+
Generating Queue GATK extensions...
-
+
-
+
-
-
+
+
Building Queue GATK extensions...
-
+
@@ -245,7 +245,7 @@
-
+
@@ -295,21 +295,21 @@
-
+
+
+
+
+
+
+
-
-
-
-
-
-
@@ -352,12 +352,6 @@
-
-
-
-
-
-
@@ -556,7 +550,7 @@
-
+
@@ -564,14 +558,11 @@
-
+
+
+
-
-
-
-
-
diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index fe8821a61..33e8d3a22 100755
--- a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -171,7 +171,7 @@ public class GATKArgumentCollection {
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
@ElementList(required = false)
- @Argument(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching : or a .txt file containing the filter strings one per line.", required = false)
+ @Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching : or a .txt file containing the filter strings one per line.", required = false)
public List readGroupBlackList = null;
/**
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
index 153138768..ac8a1a384 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
@@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.*;
import net.sf.samtools.util.StringUtil;
import org.broad.tribble.util.variantcontext.VariantContext;
-import org.broadinstitute.sting.commandline.Hidden;
+import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@@ -44,8 +44,6 @@ import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.CommandLineUtils;
import java.io.File;
import java.io.FileWriter;
@@ -63,7 +61,7 @@ public class IndelRealigner extends ReadWalker {
public static final String ORIGINAL_POSITION_TAG = "OP";
public static final String PROGRAM_RECORD_NAME = "GATK IndelRealigner";
- @Argument(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true)
+ @Input(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true)
protected String intervalsFile = null;
@Argument(fullName="LODThresholdForCleaning", shortName="LOD", doc="LOD threshold above which the cleaner will clean", required=false)
@@ -72,7 +70,7 @@ public class IndelRealigner extends ReadWalker {
@Argument(fullName="entropyThreshold", shortName="entropy", doc="percentage of mismatches at a locus to be considered having high entropy", required=false)
protected double MISMATCH_THRESHOLD = 0.15;
- @Argument(fullName="output", shortName="O", required=false, doc="Output bam")
+ @Output(fullName="output", shortName="O", required=false, doc="Output bam")
protected String writerFilename = null;
@Argument(fullName="bam_compression", shortName="compress", required=false, doc="Compression level to use for output bams [default:5]")
@@ -115,15 +113,15 @@ public class IndelRealigner extends ReadWalker {
// DEBUGGING OPTIONS FOLLOW
@Hidden
- @Argument(fullName="indelsFileForDebugging", shortName="indels", required=false, doc="Output file (text) for the indels found; FOR DEBUGGING PURPOSES ONLY")
+ @Output(fullName="indelsFileForDebugging", shortName="indels", required=false, doc="Output file (text) for the indels found; FOR DEBUGGING PURPOSES ONLY")
protected String OUT_INDELS = null;
@Hidden
- @Argument(fullName="statisticsFileForDebugging", shortName="stats", doc="print out statistics (what does or doesn't get cleaned); FOR DEBUGGING PURPOSES ONLY", required=false)
+ @Output(fullName="statisticsFileForDebugging", shortName="stats", doc="print out statistics (what does or doesn't get cleaned); FOR DEBUGGING PURPOSES ONLY", required=false)
protected String OUT_STATS = null;
@Hidden
- @Argument(fullName="SNPsFileForDebugging", shortName="snps", doc="print out whether mismatching columns do or don't get cleaned out; FOR DEBUGGING PURPOSES ONLY", required=false)
+ @Output(fullName="SNPsFileForDebugging", shortName="snps", doc="print out whether mismatching columns do or don't get cleaned out; FOR DEBUGGING PURPOSES ONLY", required=false)
protected String OUT_SNPS = null;
// the intervals input by the user
diff --git a/packages/Queue.xml b/packages/Queue.xml
index 951064070..c7a6dda72 100644
--- a/packages/Queue.xml
+++ b/packages/Queue.xml
@@ -11,8 +11,8 @@
-
-
+
+
diff --git a/scala/qscript/kshakir/CleanBamFile.scala b/scala/qscript/kshakir/CleanBamFile.scala
new file mode 100644
index 000000000..729cea827
--- /dev/null
+++ b/scala/qscript/kshakir/CleanBamFile.scala
@@ -0,0 +1,143 @@
+import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction
+import org.broadinstitute.sting.queue.QScript
+import org.broadinstitute.sting.queue.extensions.gatk._
+
+class CleanBamFile extends QScript {
+ qscript =>
+
+ @Argument(doc="gatk jar", shortName="gatk")
+ var gatkJar: File = _
+
+ @Argument(doc="fix mates jar", shortName="fixMates")
+ var fixMatesJar: File = _
+
+ @Input(doc="Script that can merge text files, for example Sting/shell/mergeText.sh.", shortName="MTS")
+ var mergeTextScript: String = _
+
+ @Argument(doc="base name for output files", shortName="base")
+ var baseName: String = _
+
+ @Input(doc="reference genome", shortName="R")
+ var referenceFile: File = _
+
+ @Input(doc="recalibrated bam", shortName="I")
+ var recalibratedBam: File = _
+
+ @Argument(doc="read group blacklist conversion script that can convert firehose outputs to a GATK blacklist file.", shortName="RGBLS")
+ var readGroupBlackListScript: String = _
+
+ @Argument(doc="read group blacklist", shortName="RGBL", required=false)
+ var readGroupBlackList: String = _
+
+ @Argument(doc="intervals", shortName="L", required=false)
+ var intervals: File = _
+
+ @Argument(doc="Script that can split the interval file by contig, for example Sting/python/splitIntervalsByContig.py.", shortName="RTCSS")
+ var targetCreatorScatterScript: String = _
+
+ @Argument(doc="RealignerTargetCreator scatter count. " +
+ "Best if it is either 1 or the number of contigs in the interval list. " +
+ "If used the compute farm must also be used.", shortName="RTCSC")
+ var targetCreatorScatterCount = 0
+
+ @Argument(doc="Script that can split the intervals evenly, for example Sting/shell/splitIntervals.sh.", shortName="IRSS")
+ var indelRealignerScatterScript: String = _
+
+ @Argument(doc="IndelRealigner scatter count.", shortName="IRSC")
+ var indelRealignerScatterCount = 0
+
+ @Input(doc="dbsnp file", shortName="D")
+ var dbsnpFile: File = _
+
+ trait GATKCommonArgs extends CommandLineGATK {
+ this.jarFile = qscript.gatkJar
+ this.reference_sequence = qscript.referenceFile
+ this.intervals = qscript.intervals
+ this.input_file :+= recalibratedBam
+ this.cleanupTempDirectories = true
+ }
+
+ def baseFile(suffix: String) = new File(baseName + suffix)
+
+ def script = {
+ val blacklistConverter = new CommandLineFunction {
+ @Output(doc="blacklist file") var blacklistFile: File = _
+ def commandLine = readGroupBlackListScript + " " + blacklistFile + " " + readGroupBlackList
+ }
+
+ if (readGroupBlackList != null) {
+ blacklistConverter.blacklistFile = baseFile(".blacklist.txt")
+ add(blacklistConverter)
+ }
+
+ // -T RealignerTargetCreator -I -R -o .merged.intervals
+ val targetCreator = new RealignerTargetCreator with GATKCommonArgs
+ targetCreator.memoryLimit = Some(2)
+ targetCreator.read_group_black_list :+= blacklistConverter.blacklistFile
+ targetCreator.out = baseFile(".merged.intervals")
+ targetCreator.scatterCount = targetCreatorScatterCount
+ targetCreator.setupScatterFunction = {
+ case (scatter: IntervalScatterFunction, _) =>
+ scatter.splitIntervalsScript = targetCreatorScatterScript
+ }
+ targetCreator.setupGatherFunction = {
+ case (gather: SimpleTextGatherFunction, _) =>
+ gather.mergeTextScript = mergeTextScript
+ }
+
+ // -T IndelRealigner -I -R -stats .indel.stats
+ // -O .unfixed.cleaned.bam -maxInRam 200000 -targetIntervals -D
+ val realigner = new IndelRealigner with GATKCommonArgs
+ realigner.memoryLimit = Some(4)
+ realigner.read_group_black_list :+= blacklistConverter.blacklistFile
+ realigner.statisticsFileForDebugging = baseFile(".indel.stats")
+ realigner.maxReadsInRam = Some(200000)
+ realigner.targetIntervals = targetCreator.out
+ realigner.DBSNP = dbsnpFile
+ realigner.scatterCount = indelRealignerScatterCount
+
+ val bamIndex = new BamIndexFunction
+
+ if (realigner.scatterCount > 1) {
+ realigner.output = baseFile(".cleaned.bam")
+ // While gathering run fix mates.
+ realigner.setupScatterFunction = {
+ case (scatter: IntervalScatterFunction, _) =>
+ scatter.splitIntervalsScript = indelRealignerScatterScript
+ }
+ realigner.setupGatherFunction = {
+ case (gather: PicardBamJarFunction, _) =>
+ gather.memoryLimit = Some(4)
+ gather.jarFile = fixMatesJar
+ // Don't pass this AS=true to fix mates!
+ gather.assumeSorted = None
+ case (gather: SimpleTextGatherFunction, _) =>
+ gather.mergeTextScript = mergeTextScript
+ }
+
+ bamIndex.bamFile = realigner.output
+ } else {
+ realigner.output = baseFile(".unfixed.cleaned.bam")
+
+ // Explicitly run fix mates if the function won't be scattered.
+ var fixMates = new PicardBamJarFunction {
+ // Declare inputs/outputs for dependency tracking.
+ @Input(doc="unfixed bam") var unfixed: File = _
+ @Output(doc="fixed bam") var fixed: File = _
+ def inputBams = List(unfixed)
+ def outputBam = fixed
+ }
+ fixMates.memoryLimit = Some(4)
+ fixMates.jarFile = fixMatesJar
+ fixMates.unfixed = realigner.output
+ fixMates.fixed = baseFile(".cleaned.bam")
+
+ bamIndex.bamFile = fixMates.fixed
+
+ // Add the fix mates explicitly
+ add(fixMates)
+ }
+
+ add(targetCreator, realigner, bamIndex)
+ }
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/QSettings.scala b/scala/src/org/broadinstitute/sting/queue/QSettings.scala
index 1ed32f1c7..b97bf176f 100644
--- a/scala/src/org/broadinstitute/sting/queue/QSettings.scala
+++ b/scala/src/org/broadinstitute/sting/queue/QSettings.scala
@@ -12,7 +12,7 @@ class QSettings {
var jobNamePrefix: String = QSettings.processNamePrefix
@Argument(fullName="job_queue", shortName="jobQueue", doc="Default queue for compute farm jobs.", required=false)
- var jobQueue: String = "broad"
+ var jobQueue: String = _
@Argument(fullName="job_project", shortName="jobProject", doc="Default project for compute farm jobs.", required=false)
var jobProject: String = "Queue"
diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala
index 13ce477c4..8a442f6f5 100644
--- a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala
+++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala
@@ -1,17 +1,14 @@
package org.broadinstitute.sting.queue.extensions.gatk
-import org.broadinstitute.sting.queue.function.JarCommandLineFunction
-import org.broadinstitute.sting.commandline.Argument
import org.broadinstitute.sting.queue.function.scattergather.GatherFunction
+import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction
/**
- * Merges BAM files using Picards MergeSampFiles.jar.
+ * Merges BAM files using Picards MergeSamFiles.jar.
* At the Broad the jar can be found at /seq/software/picard/current/bin/MergeSamFiles.jar. Outside the broad see http://picard.sourceforge.net/")
*/
-class BamGatherFunction extends GatherFunction with JarCommandLineFunction {
- @Argument(doc="Compression level 1-9", required=false)
- var compressionLevel: Option[Int] = None
-
- override def commandLine = super.commandLine + "%s%s%s".format(
- optional(" COMPRESSION_LEVEL=", compressionLevel), " AS=true VALIDATION_STRINGENCY=SILENT SO=coordinate OUTPUT=" + originalOutput, repeat(" INPUT=", gatherParts))
+class BamGatherFunction extends GatherFunction with PicardBamJarFunction {
+ this.assumeSorted = Some(true)
+ protected def inputBams = gatherParts
+ protected def outputBam = originalOutput
}
diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardBamJarFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardBamJarFunction.scala
new file mode 100644
index 000000000..efd861f9b
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardBamJarFunction.scala
@@ -0,0 +1,27 @@
+package org.broadinstitute.sting.queue.extensions.picard
+
+import org.broadinstitute.sting.queue.function.JarCommandLineFunction
+import java.io.File
+
+/**
+ * Wraps a Picard jar that operates on BAM files.
+ * See http://picard.sourceforge.net/ for more info.
+ *
+ * Since the jar files take slightly different arguments
+ * some values are optional.
+ */
+trait PicardBamJarFunction extends JarCommandLineFunction {
+ var validationStringency = "SILENT"
+ var sortOrder = "coordinate"
+ var compressionLevel: Option[Int] = None
+ var maxRecordsInRam: Option[Int] = None
+ var assumeSorted: Option[Boolean] = None
+
+ protected def inputBams: List[File]
+ protected def outputBam: File
+
+ override def commandLine = super.commandLine + "%s%s%s".format(
+ optional(" COMPRESSION_LEVEL=", compressionLevel), optional(" VALIDATION_STRINGENCY=", validationStringency),
+ optional(" SO=", sortOrder), optional( " MAX_RECORDS_IN_RAM=", maxRecordsInRam), optional(" ASSUME_SORTED=", assumeSorted),
+ " OUTPUT=" + outputBam, repeat(" INPUT=", inputBams), " TMP_DIR=" + jobTempDir)
+}