From ca8458644371eb47596a22a385b92679924a9b33 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 16:15:57 -0400 Subject: [PATCH 02/93] Adding default intellij configuration files --- .idea/.name | 1 + .idea/ant.xml | 15 + .idea/codeStyleSettings.xml | 13 + .idea/compiler.xml | 21 + .idea/copyright/profiles_settings.xml | 5 + .idea/encodings.xml | 5 + .idea/highlighting.xml | 8 + .idea/inspectionProfiles/Project_Default.xml | 11 + .../inspectionProfiles/profiles_settings.xml | 7 + .idea/libraries/GATK_libraries.xml | 13 + .idea/misc.xml | 32 ++ .idea/modules.xml | 9 + .idea/scopes/scope_settings.xml | 5 + .idea/uiDesigner.xml | 125 ++++++ .idea/vcs.xml | 10 + .idea/workspace.xml | 386 ++++++++++++++++++ cmi-gatk.iml | 23 ++ 17 files changed, 689 insertions(+) create mode 100644 .idea/.name create mode 100644 .idea/ant.xml create mode 100644 .idea/codeStyleSettings.xml create mode 100644 .idea/compiler.xml create mode 100644 .idea/copyright/profiles_settings.xml create mode 100644 .idea/encodings.xml create mode 100644 .idea/highlighting.xml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/libraries/GATK_libraries.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/scopes/scope_settings.xml create mode 100644 .idea/uiDesigner.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml create mode 100644 cmi-gatk.iml diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 000000000..7014f65a5 --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +cmi-gatk \ No newline at end of file diff --git a/.idea/ant.xml b/.idea/ant.xml new file mode 100644 index 000000000..4674eeac9 --- /dev/null +++ b/.idea/ant.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml new file mode 100644 index 000000000..9178b389f --- /dev/null +++ b/.idea/codeStyleSettings.xml @@ -0,0 +1,13 @@ + + + + + + + diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 000000000..ded2e9a1d --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,21 @@ + + + + + + diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml new file mode 100644 index 000000000..3572571ad --- /dev/null +++ b/.idea/copyright/profiles_settings.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 000000000..e206d70d8 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/.idea/highlighting.xml b/.idea/highlighting.xml new file mode 100644 index 000000000..f33b64d94 --- /dev/null +++ b/.idea/highlighting.xml @@ -0,0 +1,8 @@ + + + + + + diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 000000000..b8c243dbe --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,11 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 000000000..3b312839b --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml new file mode 100644 index 000000000..970d0a3dc --- /dev/null +++ b/.idea/libraries/GATK_libraries.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 000000000..afd7f3778 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,32 @@ + + + + + + + + + + http://www.w3.org/1999/xhtml + + + + + + + diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 000000000..09caa2933 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml new file mode 100644 index 000000000..922003b84 --- /dev/null +++ b/.idea/scopes/scope_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml new file mode 100644 index 000000000..3b0002030 --- /dev/null +++ b/.idea/uiDesigner.xml @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 000000000..cbc984988 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,10 @@ + + + + + + + + + diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 000000000..87ab79287 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,386 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + localhost + 5050 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cmi-gatk + + + + + + + + GATK libraries + + + + + + + + + diff --git a/cmi-gatk.iml b/cmi-gatk.iml new file mode 100644 index 000000000..e63aff535 --- /dev/null +++ b/cmi-gatk.iml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + From 4aad135f8c14e0d8d60fa4782024b1a5f29dd5dc Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 17:01:17 -0400 Subject: [PATCH 03/93] Generic input file name recognition (still need to implement support to FastQ, but it now can at least accept it) --- .../qscripts/DataProcessingPipeline.scala | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 56f6460fb..c21db30ce 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,6 +96,7 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS + val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -165,12 +166,15 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - // first revert the BAM file to the original qualities - val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") - val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") + val extension = bam.toString.substring(bam.toString.length - 4) + + + + val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") + val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -444,7 +448,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -452,7 +456,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From 4324bd72fdec5b9215ec10a6bd41b60c83135157 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 10:51:53 -0400 Subject: [PATCH 04/93] Updating Intellij enviroment and adding Scala --- .idea/libraries/GATK_libraries.xml | 1 - .idea/misc.xml | 2 +- .idea/workspace.xml | 221 ++++++++++++++++++++++++----- cmi-gatk.iml | 10 +- 4 files changed, 192 insertions(+), 42 deletions(-) diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml index 970d0a3dc..b363bbe6c 100644 --- a/.idea/libraries/GATK_libraries.xml +++ b/.idea/libraries/GATK_libraries.xml @@ -6,7 +6,6 @@ - diff --git a/.idea/misc.xml b/.idea/misc.xml index afd7f3778..a79280c52 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -24,7 +24,7 @@ http://www.w3.org/1999/xhtml - + diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 87ab79287..f6d4567fd 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -1,7 +1,12 @@ - + + + + + + + + + + + + + + - + @@ -112,33 +140,7 @@ - - - - - - - - - - - - - - - - - - - - - + @@ -147,7 +149,7 @@ - + @@ -228,8 +230,9 @@ - + + @@ -286,7 +289,7 @@ + + - + + + + + + + + + + + + + + + + + - + @@ -333,6 +464,18 @@ + + + Detection + + + + + @@ -346,6 +489,7 @@ + 1.6 diff --git a/cmi-gatk.iml b/cmi-gatk.iml index e63aff535..4dbee1336 100644 --- a/cmi-gatk.iml +++ b/cmi-gatk.iml @@ -1,5 +1,13 @@ + + + + + + @@ -17,7 +25,7 @@ - + From 65b100f9b0de9ba03a35f1bb51b1c8e55af92513 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 12:02:34 -0400 Subject: [PATCH 05/93] Reverting the DPP to the original version, going to create a new simplified version for CMI in private. --- .../qscripts/DataProcessingPipeline.scala | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index c21db30ce..56f6460fb 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,7 +96,6 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS - val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -166,15 +165,12 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - val extension = bam.toString.substring(bam.toString.length - 4) - - - - val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") - val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") + // first revert the BAM file to the original qualities + val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") + val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -448,7 +444,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -456,7 +452,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From cb8d4c97e119bd76b382dcb5cc69277700456897 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:13:50 -0400 Subject: [PATCH 06/93] First implementation of a generic 'bundled' Data Processing Pipeline for germline and cancer. not ready for prime time yet! --- .../src/org/broadinstitute/sting/queue/util/QScriptUtils.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 1529d9951..f684e533f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -57,7 +57,8 @@ object QScriptUtils { for (file <- fromFile(in).getLines()) if (!file.startsWith("#") && !file.isEmpty ) list :+= new File(file.trim()) - list.sortWith(_.compareTo(_) < 0) +// list.sortWith(_.compareTo(_) < 0) + list } /** From c9c2682f8688d5978b001d21eee4fd7f111c9350 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:18:44 -0400 Subject: [PATCH 08/93] removing annoying xml from IDEA configuration --- .idea/workspace.xml | 529 -------------------------------------------- 1 file changed, 529 deletions(-) delete mode 100644 .idea/workspace.xml diff --git a/.idea/workspace.xml b/.idea/workspace.xml deleted file mode 100644 index f6d4567fd..000000000 --- a/.idea/workspace.xml +++ /dev/null @@ -1,529 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - localhost - 5050 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Detection - - - - - - - - - - - - - - - 1.6 - - - - - - - - cmi-gatk - - - - - - - - GATK libraries - - - - - - - - - From 3e68fee76489a6667d070210c88aa0e3509ad2a8 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 27 Sep 2012 11:04:56 -0400 Subject: [PATCH 11/93] Removed the intellij files from the root and made an example package for new users. This allows users to start at the same page and then change it as they see fit without interfering with the repo (thanks guillermo!) --- .idea/.name | 1 - .idea/ant.xml | 15 --- .idea/codeStyleSettings.xml | 13 -- .idea/compiler.xml | 21 --- .idea/copyright/profiles_settings.xml | 5 - .idea/encodings.xml | 5 - .idea/highlighting.xml | 8 -- .idea/inspectionProfiles/Project_Default.xml | 11 -- .../inspectionProfiles/profiles_settings.xml | 7 - .idea/libraries/GATK_libraries.xml | 12 -- .idea/misc.xml | 32 ----- .idea/modules.xml | 9 -- .idea/scopes/scope_settings.xml | 5 - .idea/uiDesigner.xml | 125 ------------------ .idea/vcs.xml | 10 -- cmi-gatk.iml | 31 ----- intellij_example.tar.bz2 | Bin 0 -> 7520 bytes 17 files changed, 310 deletions(-) delete mode 100644 .idea/.name delete mode 100644 .idea/ant.xml delete mode 100644 .idea/codeStyleSettings.xml delete mode 100644 .idea/compiler.xml delete mode 100644 .idea/copyright/profiles_settings.xml delete mode 100644 .idea/encodings.xml delete mode 100644 .idea/highlighting.xml delete mode 100644 .idea/inspectionProfiles/Project_Default.xml delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml delete mode 100644 .idea/libraries/GATK_libraries.xml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/scopes/scope_settings.xml delete mode 100644 .idea/uiDesigner.xml delete mode 100644 .idea/vcs.xml delete mode 100644 cmi-gatk.iml create mode 100644 intellij_example.tar.bz2 diff --git a/.idea/.name b/.idea/.name deleted file mode 100644 index 7014f65a5..000000000 --- a/.idea/.name +++ /dev/null @@ -1 +0,0 @@ -cmi-gatk \ No newline at end of file diff --git a/.idea/ant.xml b/.idea/ant.xml deleted file mode 100644 index 4674eeac9..000000000 --- a/.idea/ant.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml deleted file mode 100644 index 9178b389f..000000000 --- a/.idea/codeStyleSettings.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - diff --git a/.idea/compiler.xml b/.idea/compiler.xml deleted file mode 100644 index ded2e9a1d..000000000 --- a/.idea/compiler.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml deleted file mode 100644 index 3572571ad..000000000 --- a/.idea/copyright/profiles_settings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml deleted file mode 100644 index e206d70d8..000000000 --- a/.idea/encodings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/.idea/highlighting.xml b/.idea/highlighting.xml deleted file mode 100644 index f33b64d94..000000000 --- a/.idea/highlighting.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index b8c243dbe..000000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 3b312839b..000000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml deleted file mode 100644 index b363bbe6c..000000000 --- a/.idea/libraries/GATK_libraries.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index a79280c52..000000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - - - - http://www.w3.org/1999/xhtml - - - - - - - diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 09caa2933..000000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml deleted file mode 100644 index 922003b84..000000000 --- a/.idea/scopes/scope_settings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml deleted file mode 100644 index 3b0002030..000000000 --- a/.idea/uiDesigner.xml +++ /dev/null @@ -1,125 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index cbc984988..000000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - diff --git a/cmi-gatk.iml b/cmi-gatk.iml deleted file mode 100644 index 4dbee1336..000000000 --- a/cmi-gatk.iml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/intellij_example.tar.bz2 b/intellij_example.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..bce16045cd1cc476305c5e59d07ff9b94b8e5d73 GIT binary patch literal 7520 zcmV-m9iQStT4*^jL0KkKS)7!cJOD4F|M~yi5CDHu|NsC0|M36+|L{No06+`?06+jh z1{h!`o8{-7h4qB5wR>^y>D#MWyS_S4UPrYW)`~XXKBY)=w?n(T?w=>u4%}F@T^prq zM&}*98+P}1CC>JGY3}jFJo}ytO>BKWz24nKq=%-|-tIdTv1Z%Ou|py0B#;mw z0$?VFLqIe|HdE0LQ`8zCr9V{lAE^M-CYlc@B!L1ZL{xsM>S#SrDYXw!Hq`?_27mwn z0006)At_HHdU~eyLFyU;NYu?W zO%G7?YBUW!LqkS@9-*KBcwhGq&L2>c7{2Bv%KPh~XqovPD`P$wqr0wA{OX>F7ZKmy zDsL?$v;dI_RDWe46~zQU)lvw>NPvKQ{r?Z1(}ww2_)M*fQwCl2HJP1e>Ri?dCZpMN zP=M?SA|wk1NPtvh=J3} zDw+~D0aFk#a8yBk1VvNe{^Q2@ATLqre5hlqO+ zJH6U_g@n*il^HZ-l$Xlw^mBWD_ji=uWfv%71y1==(-ozyjYp|LKHs?f9BLgry=7~; zQzc6vctrO5Ohh{&v$XFjif1`0L{s*Olu^?q8x}#rqzyaoVB#B4g(81Jx};qlYMjj? z2dYy`yjW&t6?wYx0N9Uh!RRNLa}rLdMi`DPNP-P&$TYbGk_IqjZvxK0qQ|cbe@$>KA+XQo15<95gH&cGF>(8JUi?q#;jjQhX7>?$D_)jV*fSGa99hISqAgCirHOBRlz z5-9I|sR$Ai`Yfa3X$-2y?KsEHOGX z@HXt%cKK^%iU{@ItVyi`Mvy~M#VuYFejK)2%tD1XPotWR1C6g7^>_3=M{GBTR=x6} zL$ie9VdKIYD#p_7X2Ij5c!ACsYkPJWj&|WWp}5G;)cO~hDhd@kB;Qp{=JQuL+IA<+ z9G6K-0B_4%_wIgYeR?@~V~!zKJsytphBh&$h7$B()y02uDWc?iHKJ3Oou2*+MfMx^=gIf{X3|lWppAz z-I7p3R)QX!Idb;|10w?J`_#t4+9oPM5Ep6hKa7!-%b*Z%Ne=e{WH2)WCKzJBSF^U& zK`^`eM3i5TFVExdQsTLk;G@qq9{>}@N65UeiIk4 z@~k{rAI=Uv%!sgaYY$=o+l0!JKSDr9R$?^eCXzi64~|&0Ce%+awpg_DqW1T;JnT{d zru{VON@j0;c#A;BkSsr#?|iam4$XGUx$RTb)H_pMY<00{Yt{q!{n-}(G#x&{RZq!9 z?#T+_N>%`kP|_a@fmo2-OJrXX!d!y~G&Dn3dYM^vWSZJ!#jw;(nuR5VhC?^k`y8?h4xtX)+j?PU}@iN>ljD+(Qmb z8dniBZ(_g=D1mLVH)}(M5H0vcNC?Cv1(;-)C)j(xXY2nS+gLe%zZ*W^p1!|_zv=O} z_Q!+DzkQ#@*8b@JY?61J{_8LQ7Wa7yo_=DdhrF->(?ZCJE#uL2ybpcSMKqaVBwSv) zNU6k^_VtahV8JC~!U6{d!bq&!4p|_$D-~)<6%s3$x=9Tg-hN~ZTTL>hPndt*gp7cg z+wR+roDg$1Hu?=(-&o%#Ayf15RN!Z%05j!NTZ&r35#?&$A)o_Kq_+aG2Tmv-^Ti}kzPUMz}gW$fs9 zJC4FgjTw|ACp)ypVq7XolvK9GOTU706q(I(#Eu(UtE5jUBYT&==_1{Y?P%VlGrzMb zBE|_o9do|DW~w@PU%$xNzAis=G|wxF)AvA^EC+4@aDj~7z-#+Yu_mAdbYOG_n;S_f z2MF%vrdGcJHml~P2xvLQOIQJlnTe)5Z8JtNk{R2)nVMB@y3m5+0?^4LZPW(|mtd*? zDv;N1W_Rb+ea4+?lrAx`155T=*|7;B>>wiy;Ml)7#VrgvNDTg;;hzJ_HNI$RTTqf4 zfyMeU1R+7uY;dsXgq*(jmUH&D>*C?4NtOzLb-!$s%bXb_Sue0mN5$wkpEM3kv8IvGA=_?Zzv1%_C1tsyl^1@RKH{a zciZw1@6q0f3Zxbb;ED{RY}JFbgv`jo;vnjTP++40u_0!9X$&Jll(K*-u?XPE#-34@ z(2*iq168t)2LIl%)odnH8EI_Yl*{iaXWjeIC?DX z@F63H#+Z$15ur99k-Q~?3g|8Zc=x8AR4D_lS&{5(Wb(9OWn~6P1uWA@lOmW&8qFIU z7SBN;v4-l*AW>A(2|!UvMJG5MwJuL6M+i`eC_Z*#ky3?khBR#txTdfRi~`O|X#<>q zay;fL4M@CAa10!#6DOq2oZBZ*`QRkS_ry;K+IvB5vHgUgDqb| z3@cbx6TJE0H(;83Aleb%_>debXaSWl^D1it&Z*F@y=N{NvpYj~%SKvwWbjz}(2QH5 zzP2fmjwex!Q0DDKn@@wk;d+8XInO!CB-!he7tkDYlzvB5io3y3~eaI5w=k}~_n&-pi{RD^=|p|INIK%M7^Ju>U^q=8=L%d;bA7r& zav%4cJ$b-?7%3m*P6<7LL;|e%;!{W=n0z(DI%^o;Bei5F?M_70?60j?x99A# z3qM)=cXkVG2kmpPJ8oP#mdXvhiT3f-MRyJ&15Gf(d7`7!iiTN|-|lq+clXLmg&CS~L$9jF&hP0Bm8xWJJK=$OMFB>@>4P6z#Xn3dcffp+LAC z!`V2YWzCFixsCZ_7L;F_QL9T?&p+$c)*hPKS8gSO~VTFj|ro3G{Iuof@swuwn#16JUnNC5#N!PbpVLE zvff%F#RxrAU($v^EeV!2DVBm~xc6xi-$OudTP+k6K2v02fE5E45$L!=gi;#?e7UiT zMhuXoWDk1Yu>gn#4)=DU3)lONdeFJ7$d8PZ2g9kss?ltvQp=qrImnG5r+V3zF4fd( z=B;z~@$<2{hrSQ|A=lv6Q712!p!qTbzdD`k(bs>{2Pp3?yqCWFJqT zzsdD8a3Kh21F_vQhkQ6nV4J6$yI@VnB-^q@1Poy1V4pzHT>!|B!-$_&1xU~WX+}BS| zI+H9JIpulj0|c`K4h+Jv+i8{#sDpzsY3kK&nf6^uxxRKVpRY}nq>h`5p#2eSW1JCB zoQDl9g0bddzNVPmI`lIIC+n3Z-%)6 z)m1D@3`N7B@Sgj)AR36~a4}*AFBJoK>y5WL$p%MGX_9PAb!p#rxvRNmYjZFf_p^MM zS*F2;S$4gmw~WV5MV$7|80zk2IN^|p!(1CUj&QiGuRFLn?P!*xF?*d9(+V$}IXT1} zTha_h(muFR1txBW5H`^S1RVSVru%s#2xv)Akeo8AEDyF+y-8{s&ujwd5cgA z{vZA1IN}ss`1mWy37bexU#GAM?R5hGixGW*YN1F6 zNe^y$@Ot{b0s=j%3iJAec(;rNi2(`#p%tYeftrrHgl%C`y`dwy83`F7-YoY_ox@e$ zL&B&_2$DC2^*`wAL)aSaJF>S~WkZ7Y-Du;z{ zz=U=xW(E3CCn_nOid4kc4!;tEos&Z9t35>kz>s9yLG>gOM({gKT^#n%!cAcT@WPry3G z2V$fF(Bh#$fe>JI^|(v(OqCu~Gq|s3b?7=j5C#Z=5wJo%cpU|9AqFF9LZ;v@MFUXd zs2N){bdrWWIl=TxEeH;2AWyYeEYN~qcFD}6*o@`j5Unt1JJgj_YN^CRhB!bw5Oy*P z1DcHm8GutysIu}&!*1M~s!zPA&Oi}v=#?bEF!B<|)OQu(7P)!*MrromGcLP%0kRC2-f z-Ma&_=fdV8BrOEsXnl*Tw^mqWLHgJM!Kwz2*{PSy3 zu1y0}WCf==J8l%kErz3IvwaFAt0TVM@-$^1MCp&5TKidOUd1p zYf^#|AzIhOeK`tn48Ym)u!~-}_Q+l&S8a}MP zqt<;`1h6{%dl7fl4{{^4=5Z9~GklcT&KW~EA3Cu3&;?mZn=%p-SGX&lf>n(5j&iOY zYQ5cz$?+OcwiV^|gbZ zDl{t&8X}C%;tMPtc~k`|q$FrGfm%YMX(X@VI=uFI0&+ldLE%k6`SmVPkC_HWF>Jjk zUDTu0qvTmpsIKA3Q+lPhpdN}HV1^2u*J2-*FWMiW;5$l`j~ao*gwCP$RM5i#_C_Ex zH)zO<;CO6Q=Wwgery}Pl$N)(r*fN-s03O~z4|h@!W6)yYC(`*Uf$0mj(1PL>cZ3#Z z4TbDihB0%-2YpbP;Si{_nnei&VL3_(rX}Qwue;<9@ZzbMbgFgWaWzMX7*HysutU{s z6$yK$fP%s@0>K!oAjF959~anz@iKxw9tVi2%a|O!4?k>B;`D(%h5u(vcUQi+mcKd#qyCaVfaiWCOtfxqGP5gZ9C@~hnd`7h<>b=e_+%r-y-BvC8wl_K{AeW^0CX@V5&D!>>QsSU^i-+=i>^k-=!j-OGaIbfVpkOo zov0DlGYvzkWdki>W|%{31zZ$_CjdJbXwoesiiTpR&LP}<$G(Ags#wwbgBY)fP=_5T zQ4Xh%0B4=sfb<{^vtZvf!~pRs?Q2_tCyTUQVcvR>^r#ARkO%Guv=4>qj_B@o4jqbM zt7aXvQ8X%2q25s!VhE5U6mV4RggF8x`vC+b>2V@PWF#OH>&BrAM934Jfz;}KW^E!s zf_sHg^L}8s>!NOo>yjx%-@oa32x!;2(Ek5!w#C`ezx9s#*X(t`x1;-X6+ z7Tr_H3_VZlT{4sbK3ug=pGr!XjFT71U-2y+_{;q+T4bJi(b= zap}Jvh}lP|*qy&ou8QcP>pMaQngHZp=&Vk$3eem_f*S=$R8;1V4~ZCnf}#?r8iXVQ z0j{X_oxDt(h>0TcAk-a!;E`5M2WU_pz^PFx0iuHO=~{qY8>t!!C^m$obS{f0N*n5P z90a#3kBG3kQg+Y~XP`YbFs-J>`*$M_gdyA-C_|>(hkfw|+l<+w zf}KDpwJ=nwn0Cr`@CL~cY#kjD!*FzDfO#Xz58x^D4{2FCkP+ji120 z`AK?g`I(q&&XIvsdP8c|-6RwyD9PaUmZHX{h)-f%GL}>`AdodPFC>U>q}W4M$7Yrv zXn_GT_PIBf+CkyE8OEjAE9XU38OMGBuvkEl2#MCGQP6b&cqsd};rC<;+96%VZpibB za5i6A1WKt|DOYLN0p46>D$r5;!~>XE;UL^8L$+U03KV+NtWE_Trs&B5hj{M;05y_z( zIB@{aYt}fs#2_Ui8xH+<1|!JJl0XMS2m}_23jO|IA2?LhL%8il_4O)!cmxPzpqfGY zc__a@$5HV6afV(GYNQppH5ZI_4QL$Cz*M(x41|T~>WW%5dc!&O=BaR(D8S87Uc(Wb z4GAcqP$MD;MhMW2k||&T_-I1Sc7quRcUP6)AC5{qas)meG;bs!BEU|AAWLmX=fy+6 z$p=H}qdcCy^{S?BA;kyA^aH=II~+xSl}Oz`ntcF$U;6-Xk#{x0N-aG@bNDK>cMb(tan literal 0 HcmV?d00001 From a640afa995a7be890d1753085e568b58f9e449d2 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 27 Sep 2012 11:09:41 -0400 Subject: [PATCH 12/93] adding some directories to gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 8623fa076..927caf98d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,8 @@ queueScatterGather /bar* integrationtests/ public/testdata/onTheFlyOutputTest.vcf +build/ +dist/ +dump/ +lib/ +out/ From dca7c7fa9cb183a57a08952147847ad97f334cd4 Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Wed, 3 Oct 2012 16:25:34 -0400 Subject: [PATCH 15/93] initial cancer pipeline with mutations and partial indel support --- .../queue/extensions/cancer/MuTect.scala | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala new file mode 100644 index 000000000..623d397d4 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -0,0 +1,378 @@ +package org.broadinstitute.sting.queue.extensions.cancer + +import java.io.File +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.commandline.Gather +import org.broadinstitute.sting.commandline.Input +import org.broadinstitute.sting.commandline.Output +import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction +import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} + +class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { + analysisName = "MuTect" + analysis_type = "MuTect" + scatterClass = classOf[LocusScatterFunction] + + /** used for debugging, basically exit as soon as we get the reads */ + @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") + var noop: Boolean = _ + + /** add many additional columns of statistics to the output file */ + @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") + var enable_extended_output: Boolean = _ + + /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ + @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") + var artifact_detection_mode: Boolean = _ + + /** name to use for tumor in output files */ + @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") + var tumor_sample_name: String = _ + + /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ + @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") + var bam_tumor_sample_name: String = _ + + /** name to use for normal in output files */ + @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") + var normal_sample_name: String = _ + + /** force output for each site */ + @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") + var force_output: Boolean = _ + + /** force output for all alleles at each site */ + @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") + var force_alleles: Boolean = _ + + /** Initial LOD threshold for calling tumor variant */ + @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var initial_tumor_lod: Option[Float] = None + + /** Format string for initial_tumor_lod */ + @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") + var initial_tumor_lodFormat: String = "%s" + + /** LOD threshold for calling tumor variant */ + @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var tumor_lod: Option[Float] = None + + /** Format string for tumor_lod */ + @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") + var tumor_lodFormat: String = "%s" + + /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ + @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") + var fraction_contamination: Option[Float] = None + + /** Format string for fraction_contamination */ + @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") + var fraction_contaminationFormat: String = "%s" + + /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ + @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fraction: Option[Float] = None + + /** Format string for minimum_mutation_cell_fraction */ + @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fractionFormat: String = "%s" + + /** LOD threshold for calling normal non-germline */ + @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") + var normal_lod: Option[Float] = None + + /** Format string for normal_lod */ + @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") + var normal_lodFormat: String = "%s" + + /** LOD threshold for calling normal non-variant */ + @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") + var normal_artifact_lod: Option[Float] = None + + /** Format string for normal_artifact_lod */ + @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") + var normal_artifact_lodFormat: String = "%s" + + /** LOD threshold for calling strand bias */ + @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_lod: Option[Float] = None + + /** Format string for strand_artifact_lod */ + @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") + var strand_artifact_lodFormat: String = "%s" + + /** power threshold for calling strand bias */ + @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_power_threshold: Option[Float] = None + + /** Format string for strand_artifact_power_threshold */ + @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") + var strand_artifact_power_thresholdFormat: String = "%s" + + /** LOD threshold for calling normal non-variant at dbsnp sites */ + @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lod: Option[Float] = None + + /** Format string for dbsnp_normal_lod */ + @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lodFormat: String = "%s" + + /** Power threshold for normal to determine germline vs variant */ + @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_threshold: Option[Float] = None + + /** Format string for somatic_classification_normal_power_threshold */ + @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_thresholdFormat: String = "%s" + + /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ + @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fraction: Option[Float] = None + + /** Format string for minimum_normal_allele_fraction */ + @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fractionFormat: String = "%s" + + /** for computational efficiency, reject sites with allelic fraction below this threshold */ + @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") + var tumor_f_pretest: Option[Float] = None + + /** Format string for tumor_f_pretest */ + @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") + var tumor_f_pretestFormat: String = "%s" + + /** threshold for minimum base quality score */ + @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") + var min_qscore: Option[Int] = None + + /** how many gapped events (ins/del) are allowed in proximity to this candidate */ + @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") + var gap_events_threshold: Option[Int] = None + + /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ + @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fraction: Option[Float] = None + + /** Format string for heavily_clipped_read_fraction */ + @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fractionFormat: String = "%s" + + /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ + @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_threshold: Option[Float] = None + + /** Format string for clipping_bias_pvalue_threshold */ + @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_thresholdFormat: String = "%s" + + /** threshold for determining if there is relatedness between the alt and ref allele read piles */ + @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") + var fraction_mapq0_threshold: Option[Float] = None + + /** Format string for fraction_mapq0_threshold */ + @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") + var fraction_mapq0_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact median */ + @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") + var pir_median_threshold: Option[Double] = None + + /** Format string for pir_median_threshold */ + @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") + var pir_median_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact MAD */ + @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") + var pir_mad_threshold: Option[Double] = None + + /** Format string for pir_mad_threshold */ + @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") + var pir_mad_thresholdFormat: String = "%s" + + /** required minimum value for tumor alt allele maximum mapping quality score */ + @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") + var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None + + /** threshold for maximum alternate allele counts in normal */ + @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_count: Option[Int] = None + + /** threshold for maximum alternate allele quality score sum in normal */ + @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None + + /** threshold for maximum alternate allele fraction in normal */ + @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fraction: Option[Double] = None + + /** Format string for max_alt_allele_in_normal_fraction */ + @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fractionFormat: String = "%s" + + /** Phred scale quality score constant to use in power calculations */ + @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_qscore: Option[Int] = None + + /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ + @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") + var absolute_copy_number_data: File = _ + + /** Allelic fraction constant to use in power calculations */ + @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_af: Option[Double] = None + + /** Format string for power_constant_af */ + @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") + var power_constant_afFormat: String = "%s" + + /** Call-stats output */ + @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** + * Short name of out + * @return Short name of out + */ + def o = this.out + + /** + * Short name of out + * @param value Short name of out + */ + def o_=(value: File) { this.out = value } + + /** VCF file of DBSNP information */ + @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") + var dbsnp: Seq[File] = Nil + + /** Dependencies on any indexes of dbsnp */ + @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") + private var dbsnpIndexes: Seq[File] = Nil + + /** VCF file of COSMIC sites */ + @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") + var cosmic: Seq[File] = Nil + + /** Dependencies on any indexes of cosmic */ + @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") + private var cosmicIndexes: Seq[File] = Nil + + /** VCF file of sites observed in normal */ + @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") + var normal_panel: Seq[File] = Nil + + /** Dependencies on any indexes of normal_panel */ + @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") + private var normal_panelIndexes: Seq[File] = Nil + + /** write out coverage in WIGGLE format to this file */ + @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_file: File = _ + + /** + * Short name of coverage_file + * @return Short name of coverage_file + */ + def cov = this.coverage_file + + /** + * Short name of coverage_file + * @param value Short name of coverage_file + */ + def cov_=(value: File) { this.coverage_file = value } + + /** write out 20x of Q20 coverage in WIGGLE format to this file */ + @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_20_q20_file: File = _ + + /** + * Short name of coverage_20_q20_file + * @return Short name of coverage_20_q20_file + */ + def cov_q20 = this.coverage_20_q20_file + + /** + * Short name of coverage_20_q20_file + * @param value Short name of coverage_20_q20_file + */ + def cov_q20_=(value: File) { this.coverage_20_q20_file = value } + + /** write out power in WIGGLE format to this file */ + @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var power_file: File = _ + + /** + * Short name of power_file + * @return Short name of power_file + */ + def pow = this.power_file + + /** + * Short name of power_file + * @param value Short name of power_file + */ + def pow_=(value: File) { this.power_file = value } + + /** write out tumor read depth in WIGGLE format to this file */ + @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var tumor_depth_file: File = _ + + /** + * Short name of tumor_depth_file + * @return Short name of tumor_depth_file + */ + def tdf = this.tumor_depth_file + + /** + * Short name of tumor_depth_file + * @param value Short name of tumor_depth_file + */ + def tdf_=(value: File) { this.tumor_depth_file = value } + + /** write out normal read depth in WIGGLE format to this file */ + @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var normal_depth_file: File = _ + + /** + * Short name of normal_depth_file + * @return Short name of normal_depth_file + */ + def ndf = this.normal_depth_file + + /** + * Short name of normal_depth_file + * @param value Short name of normal_depth_file + */ + def ndf_=(value: File) { this.normal_depth_file = value } + + /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ + @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") + var filter_mismatching_base_and_quals: Boolean = _ + + /** + * Short name of filter_mismatching_base_and_quals + * @return Short name of filter_mismatching_base_and_quals + */ + def filterMBQ = this.filter_mismatching_base_and_quals + + /** + * Short name of filter_mismatching_base_and_quals + * @param value Short name of filter_mismatching_base_and_quals + */ + def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } + + override def freezeFieldValues() { + super.freezeFieldValues() + dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + } + + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") +} From 0afde9906a1c043d3644e47e45603b9bf9e6a382 Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Wed, 3 Oct 2012 16:25:34 -0400 Subject: [PATCH 17/93] initial cancer pipeline with mutations and partial indel support --- .../queue/extensions/cancer/MuTect.scala | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala new file mode 100644 index 000000000..623d397d4 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -0,0 +1,378 @@ +package org.broadinstitute.sting.queue.extensions.cancer + +import java.io.File +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.commandline.Gather +import org.broadinstitute.sting.commandline.Input +import org.broadinstitute.sting.commandline.Output +import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction +import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} + +class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { + analysisName = "MuTect" + analysis_type = "MuTect" + scatterClass = classOf[LocusScatterFunction] + + /** used for debugging, basically exit as soon as we get the reads */ + @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") + var noop: Boolean = _ + + /** add many additional columns of statistics to the output file */ + @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") + var enable_extended_output: Boolean = _ + + /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ + @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") + var artifact_detection_mode: Boolean = _ + + /** name to use for tumor in output files */ + @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") + var tumor_sample_name: String = _ + + /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ + @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") + var bam_tumor_sample_name: String = _ + + /** name to use for normal in output files */ + @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") + var normal_sample_name: String = _ + + /** force output for each site */ + @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") + var force_output: Boolean = _ + + /** force output for all alleles at each site */ + @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") + var force_alleles: Boolean = _ + + /** Initial LOD threshold for calling tumor variant */ + @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var initial_tumor_lod: Option[Float] = None + + /** Format string for initial_tumor_lod */ + @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") + var initial_tumor_lodFormat: String = "%s" + + /** LOD threshold for calling tumor variant */ + @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var tumor_lod: Option[Float] = None + + /** Format string for tumor_lod */ + @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") + var tumor_lodFormat: String = "%s" + + /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ + @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") + var fraction_contamination: Option[Float] = None + + /** Format string for fraction_contamination */ + @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") + var fraction_contaminationFormat: String = "%s" + + /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ + @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fraction: Option[Float] = None + + /** Format string for minimum_mutation_cell_fraction */ + @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fractionFormat: String = "%s" + + /** LOD threshold for calling normal non-germline */ + @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") + var normal_lod: Option[Float] = None + + /** Format string for normal_lod */ + @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") + var normal_lodFormat: String = "%s" + + /** LOD threshold for calling normal non-variant */ + @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") + var normal_artifact_lod: Option[Float] = None + + /** Format string for normal_artifact_lod */ + @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") + var normal_artifact_lodFormat: String = "%s" + + /** LOD threshold for calling strand bias */ + @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_lod: Option[Float] = None + + /** Format string for strand_artifact_lod */ + @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") + var strand_artifact_lodFormat: String = "%s" + + /** power threshold for calling strand bias */ + @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_power_threshold: Option[Float] = None + + /** Format string for strand_artifact_power_threshold */ + @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") + var strand_artifact_power_thresholdFormat: String = "%s" + + /** LOD threshold for calling normal non-variant at dbsnp sites */ + @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lod: Option[Float] = None + + /** Format string for dbsnp_normal_lod */ + @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lodFormat: String = "%s" + + /** Power threshold for normal to determine germline vs variant */ + @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_threshold: Option[Float] = None + + /** Format string for somatic_classification_normal_power_threshold */ + @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_thresholdFormat: String = "%s" + + /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ + @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fraction: Option[Float] = None + + /** Format string for minimum_normal_allele_fraction */ + @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fractionFormat: String = "%s" + + /** for computational efficiency, reject sites with allelic fraction below this threshold */ + @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") + var tumor_f_pretest: Option[Float] = None + + /** Format string for tumor_f_pretest */ + @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") + var tumor_f_pretestFormat: String = "%s" + + /** threshold for minimum base quality score */ + @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") + var min_qscore: Option[Int] = None + + /** how many gapped events (ins/del) are allowed in proximity to this candidate */ + @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") + var gap_events_threshold: Option[Int] = None + + /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ + @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fraction: Option[Float] = None + + /** Format string for heavily_clipped_read_fraction */ + @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fractionFormat: String = "%s" + + /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ + @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_threshold: Option[Float] = None + + /** Format string for clipping_bias_pvalue_threshold */ + @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_thresholdFormat: String = "%s" + + /** threshold for determining if there is relatedness between the alt and ref allele read piles */ + @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") + var fraction_mapq0_threshold: Option[Float] = None + + /** Format string for fraction_mapq0_threshold */ + @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") + var fraction_mapq0_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact median */ + @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") + var pir_median_threshold: Option[Double] = None + + /** Format string for pir_median_threshold */ + @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") + var pir_median_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact MAD */ + @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") + var pir_mad_threshold: Option[Double] = None + + /** Format string for pir_mad_threshold */ + @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") + var pir_mad_thresholdFormat: String = "%s" + + /** required minimum value for tumor alt allele maximum mapping quality score */ + @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") + var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None + + /** threshold for maximum alternate allele counts in normal */ + @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_count: Option[Int] = None + + /** threshold for maximum alternate allele quality score sum in normal */ + @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None + + /** threshold for maximum alternate allele fraction in normal */ + @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fraction: Option[Double] = None + + /** Format string for max_alt_allele_in_normal_fraction */ + @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fractionFormat: String = "%s" + + /** Phred scale quality score constant to use in power calculations */ + @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_qscore: Option[Int] = None + + /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ + @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") + var absolute_copy_number_data: File = _ + + /** Allelic fraction constant to use in power calculations */ + @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_af: Option[Double] = None + + /** Format string for power_constant_af */ + @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") + var power_constant_afFormat: String = "%s" + + /** Call-stats output */ + @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** + * Short name of out + * @return Short name of out + */ + def o = this.out + + /** + * Short name of out + * @param value Short name of out + */ + def o_=(value: File) { this.out = value } + + /** VCF file of DBSNP information */ + @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") + var dbsnp: Seq[File] = Nil + + /** Dependencies on any indexes of dbsnp */ + @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") + private var dbsnpIndexes: Seq[File] = Nil + + /** VCF file of COSMIC sites */ + @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") + var cosmic: Seq[File] = Nil + + /** Dependencies on any indexes of cosmic */ + @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") + private var cosmicIndexes: Seq[File] = Nil + + /** VCF file of sites observed in normal */ + @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") + var normal_panel: Seq[File] = Nil + + /** Dependencies on any indexes of normal_panel */ + @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") + private var normal_panelIndexes: Seq[File] = Nil + + /** write out coverage in WIGGLE format to this file */ + @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_file: File = _ + + /** + * Short name of coverage_file + * @return Short name of coverage_file + */ + def cov = this.coverage_file + + /** + * Short name of coverage_file + * @param value Short name of coverage_file + */ + def cov_=(value: File) { this.coverage_file = value } + + /** write out 20x of Q20 coverage in WIGGLE format to this file */ + @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_20_q20_file: File = _ + + /** + * Short name of coverage_20_q20_file + * @return Short name of coverage_20_q20_file + */ + def cov_q20 = this.coverage_20_q20_file + + /** + * Short name of coverage_20_q20_file + * @param value Short name of coverage_20_q20_file + */ + def cov_q20_=(value: File) { this.coverage_20_q20_file = value } + + /** write out power in WIGGLE format to this file */ + @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var power_file: File = _ + + /** + * Short name of power_file + * @return Short name of power_file + */ + def pow = this.power_file + + /** + * Short name of power_file + * @param value Short name of power_file + */ + def pow_=(value: File) { this.power_file = value } + + /** write out tumor read depth in WIGGLE format to this file */ + @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var tumor_depth_file: File = _ + + /** + * Short name of tumor_depth_file + * @return Short name of tumor_depth_file + */ + def tdf = this.tumor_depth_file + + /** + * Short name of tumor_depth_file + * @param value Short name of tumor_depth_file + */ + def tdf_=(value: File) { this.tumor_depth_file = value } + + /** write out normal read depth in WIGGLE format to this file */ + @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var normal_depth_file: File = _ + + /** + * Short name of normal_depth_file + * @return Short name of normal_depth_file + */ + def ndf = this.normal_depth_file + + /** + * Short name of normal_depth_file + * @param value Short name of normal_depth_file + */ + def ndf_=(value: File) { this.normal_depth_file = value } + + /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ + @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") + var filter_mismatching_base_and_quals: Boolean = _ + + /** + * Short name of filter_mismatching_base_and_quals + * @return Short name of filter_mismatching_base_and_quals + */ + def filterMBQ = this.filter_mismatching_base_and_quals + + /** + * Short name of filter_mismatching_base_and_quals + * @param value Short name of filter_mismatching_base_and_quals + */ + def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } + + override def freezeFieldValues() { + super.freezeFieldValues() + dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + } + + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") +} From 3ffba77656606f4378daf0474b18794741b4423f Mon Sep 17 00:00:00 2001 From: Scott Frazer Date: Thu, 4 Oct 2012 11:37:54 -0400 Subject: [PATCH 18/93] Revert "initial cancer pipeline with mutations and partial indel support" This reverts commit 4a2e5b1fcc3ad53dbb26d43eed1220b0257e9901. --- .../queue/extensions/cancer/MuTect.scala | 378 ------------------ 1 file changed, 378 deletions(-) delete mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala deleted file mode 100644 index 623d397d4..000000000 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala +++ /dev/null @@ -1,378 +0,0 @@ -package org.broadinstitute.sting.queue.extensions.cancer - -import java.io.File -import org.broadinstitute.sting.commandline.Argument -import org.broadinstitute.sting.commandline.Gather -import org.broadinstitute.sting.commandline.Input -import org.broadinstitute.sting.commandline.Output -import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction -import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} - -class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { - analysisName = "MuTect" - analysis_type = "MuTect" - scatterClass = classOf[LocusScatterFunction] - - /** used for debugging, basically exit as soon as we get the reads */ - @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") - var noop: Boolean = _ - - /** add many additional columns of statistics to the output file */ - @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") - var enable_extended_output: Boolean = _ - - /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ - @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") - var artifact_detection_mode: Boolean = _ - - /** name to use for tumor in output files */ - @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") - var tumor_sample_name: String = _ - - /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ - @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") - var bam_tumor_sample_name: String = _ - - /** name to use for normal in output files */ - @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") - var normal_sample_name: String = _ - - /** force output for each site */ - @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") - var force_output: Boolean = _ - - /** force output for all alleles at each site */ - @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") - var force_alleles: Boolean = _ - - /** Initial LOD threshold for calling tumor variant */ - @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") - var initial_tumor_lod: Option[Float] = None - - /** Format string for initial_tumor_lod */ - @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") - var initial_tumor_lodFormat: String = "%s" - - /** LOD threshold for calling tumor variant */ - @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") - var tumor_lod: Option[Float] = None - - /** Format string for tumor_lod */ - @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") - var tumor_lodFormat: String = "%s" - - /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ - @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") - var fraction_contamination: Option[Float] = None - - /** Format string for fraction_contamination */ - @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") - var fraction_contaminationFormat: String = "%s" - - /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ - @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") - var minimum_mutation_cell_fraction: Option[Float] = None - - /** Format string for minimum_mutation_cell_fraction */ - @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") - var minimum_mutation_cell_fractionFormat: String = "%s" - - /** LOD threshold for calling normal non-germline */ - @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") - var normal_lod: Option[Float] = None - - /** Format string for normal_lod */ - @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") - var normal_lodFormat: String = "%s" - - /** LOD threshold for calling normal non-variant */ - @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") - var normal_artifact_lod: Option[Float] = None - - /** Format string for normal_artifact_lod */ - @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") - var normal_artifact_lodFormat: String = "%s" - - /** LOD threshold for calling strand bias */ - @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") - var strand_artifact_lod: Option[Float] = None - - /** Format string for strand_artifact_lod */ - @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") - var strand_artifact_lodFormat: String = "%s" - - /** power threshold for calling strand bias */ - @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") - var strand_artifact_power_threshold: Option[Float] = None - - /** Format string for strand_artifact_power_threshold */ - @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") - var strand_artifact_power_thresholdFormat: String = "%s" - - /** LOD threshold for calling normal non-variant at dbsnp sites */ - @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") - var dbsnp_normal_lod: Option[Float] = None - - /** Format string for dbsnp_normal_lod */ - @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") - var dbsnp_normal_lodFormat: String = "%s" - - /** Power threshold for normal to determine germline vs variant */ - @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") - var somatic_classification_normal_power_threshold: Option[Float] = None - - /** Format string for somatic_classification_normal_power_threshold */ - @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") - var somatic_classification_normal_power_thresholdFormat: String = "%s" - - /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ - @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") - var minimum_normal_allele_fraction: Option[Float] = None - - /** Format string for minimum_normal_allele_fraction */ - @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") - var minimum_normal_allele_fractionFormat: String = "%s" - - /** for computational efficiency, reject sites with allelic fraction below this threshold */ - @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") - var tumor_f_pretest: Option[Float] = None - - /** Format string for tumor_f_pretest */ - @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") - var tumor_f_pretestFormat: String = "%s" - - /** threshold for minimum base quality score */ - @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") - var min_qscore: Option[Int] = None - - /** how many gapped events (ins/del) are allowed in proximity to this candidate */ - @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") - var gap_events_threshold: Option[Int] = None - - /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ - @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") - var heavily_clipped_read_fraction: Option[Float] = None - - /** Format string for heavily_clipped_read_fraction */ - @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") - var heavily_clipped_read_fractionFormat: String = "%s" - - /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ - @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") - var clipping_bias_pvalue_threshold: Option[Float] = None - - /** Format string for clipping_bias_pvalue_threshold */ - @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") - var clipping_bias_pvalue_thresholdFormat: String = "%s" - - /** threshold for determining if there is relatedness between the alt and ref allele read piles */ - @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") - var fraction_mapq0_threshold: Option[Float] = None - - /** Format string for fraction_mapq0_threshold */ - @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") - var fraction_mapq0_thresholdFormat: String = "%s" - - /** threshold for clustered read position artifact median */ - @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") - var pir_median_threshold: Option[Double] = None - - /** Format string for pir_median_threshold */ - @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") - var pir_median_thresholdFormat: String = "%s" - - /** threshold for clustered read position artifact MAD */ - @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") - var pir_mad_threshold: Option[Double] = None - - /** Format string for pir_mad_threshold */ - @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") - var pir_mad_thresholdFormat: String = "%s" - - /** required minimum value for tumor alt allele maximum mapping quality score */ - @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") - var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None - - /** threshold for maximum alternate allele counts in normal */ - @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") - var max_alt_alleles_in_normal_count: Option[Int] = None - - /** threshold for maximum alternate allele quality score sum in normal */ - @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") - var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None - - /** threshold for maximum alternate allele fraction in normal */ - @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") - var max_alt_allele_in_normal_fraction: Option[Double] = None - - /** Format string for max_alt_allele_in_normal_fraction */ - @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") - var max_alt_allele_in_normal_fractionFormat: String = "%s" - - /** Phred scale quality score constant to use in power calculations */ - @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") - var power_constant_qscore: Option[Int] = None - - /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ - @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") - var absolute_copy_number_data: File = _ - - /** Allelic fraction constant to use in power calculations */ - @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") - var power_constant_af: Option[Double] = None - - /** Format string for power_constant_af */ - @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") - var power_constant_afFormat: String = "%s" - - /** Call-stats output */ - @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") - @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) - var out: File = _ - - /** - * Short name of out - * @return Short name of out - */ - def o = this.out - - /** - * Short name of out - * @param value Short name of out - */ - def o_=(value: File) { this.out = value } - - /** VCF file of DBSNP information */ - @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") - var dbsnp: Seq[File] = Nil - - /** Dependencies on any indexes of dbsnp */ - @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") - private var dbsnpIndexes: Seq[File] = Nil - - /** VCF file of COSMIC sites */ - @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") - var cosmic: Seq[File] = Nil - - /** Dependencies on any indexes of cosmic */ - @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") - private var cosmicIndexes: Seq[File] = Nil - - /** VCF file of sites observed in normal */ - @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") - var normal_panel: Seq[File] = Nil - - /** Dependencies on any indexes of normal_panel */ - @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") - private var normal_panelIndexes: Seq[File] = Nil - - /** write out coverage in WIGGLE format to this file */ - @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") - @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) - var coverage_file: File = _ - - /** - * Short name of coverage_file - * @return Short name of coverage_file - */ - def cov = this.coverage_file - - /** - * Short name of coverage_file - * @param value Short name of coverage_file - */ - def cov_=(value: File) { this.coverage_file = value } - - /** write out 20x of Q20 coverage in WIGGLE format to this file */ - @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") - @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) - var coverage_20_q20_file: File = _ - - /** - * Short name of coverage_20_q20_file - * @return Short name of coverage_20_q20_file - */ - def cov_q20 = this.coverage_20_q20_file - - /** - * Short name of coverage_20_q20_file - * @param value Short name of coverage_20_q20_file - */ - def cov_q20_=(value: File) { this.coverage_20_q20_file = value } - - /** write out power in WIGGLE format to this file */ - @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") - @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) - var power_file: File = _ - - /** - * Short name of power_file - * @return Short name of power_file - */ - def pow = this.power_file - - /** - * Short name of power_file - * @param value Short name of power_file - */ - def pow_=(value: File) { this.power_file = value } - - /** write out tumor read depth in WIGGLE format to this file */ - @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") - @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) - var tumor_depth_file: File = _ - - /** - * Short name of tumor_depth_file - * @return Short name of tumor_depth_file - */ - def tdf = this.tumor_depth_file - - /** - * Short name of tumor_depth_file - * @param value Short name of tumor_depth_file - */ - def tdf_=(value: File) { this.tumor_depth_file = value } - - /** write out normal read depth in WIGGLE format to this file */ - @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") - @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) - var normal_depth_file: File = _ - - /** - * Short name of normal_depth_file - * @return Short name of normal_depth_file - */ - def ndf = this.normal_depth_file - - /** - * Short name of normal_depth_file - * @param value Short name of normal_depth_file - */ - def ndf_=(value: File) { this.normal_depth_file = value } - - /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ - @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") - var filter_mismatching_base_and_quals: Boolean = _ - - /** - * Short name of filter_mismatching_base_and_quals - * @return Short name of filter_mismatching_base_and_quals - */ - def filterMBQ = this.filter_mismatching_base_and_quals - - /** - * Short name of filter_mismatching_base_and_quals - * @param value Short name of filter_mismatching_base_and_quals - */ - def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } - - override def freezeFieldValues() { - super.freezeFieldValues() - dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) - cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) - normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) - } - - override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") -} From f9095c7ab74d59b35b85750886c99711b44f143c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 17:01:17 -0400 Subject: [PATCH 20/93] Generic input file name recognition (still need to implement support to FastQ, but it now can at least accept it) --- .../qscripts/DataProcessingPipeline.scala | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 56f6460fb..c21db30ce 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,6 +96,7 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS + val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -165,12 +166,15 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - // first revert the BAM file to the original qualities - val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") - val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") + val extension = bam.toString.substring(bam.toString.length - 4) + + + + val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") + val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -444,7 +448,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -452,7 +456,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From 08b6d1559c2d072541dec3f960c8978e0b952fba Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 12:02:34 -0400 Subject: [PATCH 21/93] Reverting the DPP to the original version, going to create a new simplified version for CMI in private. --- .../qscripts/DataProcessingPipeline.scala | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index c21db30ce..56f6460fb 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,7 +96,6 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS - val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -166,15 +165,12 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - val extension = bam.toString.substring(bam.toString.length - 4) - - - - val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") - val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") + // first revert the BAM file to the original qualities + val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") + val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -448,7 +444,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -456,7 +452,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From 0c177092231c623dca8c0e84fb47a4af94092817 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:13:50 -0400 Subject: [PATCH 22/93] First implementation of a generic 'bundled' Data Processing Pipeline for germline and cancer. not ready for prime time yet! --- .../src/org/broadinstitute/sting/queue/util/QScriptUtils.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 1529d9951..f684e533f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -57,7 +57,8 @@ object QScriptUtils { for (file <- fromFile(in).getLines()) if (!file.startsWith("#") && !file.isEmpty ) list :+= new File(file.trim()) - list.sortWith(_.compareTo(_) < 0) +// list.sortWith(_.compareTo(_) < 0) + list } /** From 2311606de4addf07c65540735c8b09b1385f30db Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Wed, 3 Oct 2012 16:25:34 -0400 Subject: [PATCH 29/93] initial cancer pipeline with mutations and partial indel support --- .../queue/extensions/cancer/MuTect.scala | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala new file mode 100644 index 000000000..623d397d4 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -0,0 +1,378 @@ +package org.broadinstitute.sting.queue.extensions.cancer + +import java.io.File +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.commandline.Gather +import org.broadinstitute.sting.commandline.Input +import org.broadinstitute.sting.commandline.Output +import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction +import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} + +class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { + analysisName = "MuTect" + analysis_type = "MuTect" + scatterClass = classOf[LocusScatterFunction] + + /** used for debugging, basically exit as soon as we get the reads */ + @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") + var noop: Boolean = _ + + /** add many additional columns of statistics to the output file */ + @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") + var enable_extended_output: Boolean = _ + + /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ + @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") + var artifact_detection_mode: Boolean = _ + + /** name to use for tumor in output files */ + @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") + var tumor_sample_name: String = _ + + /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ + @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") + var bam_tumor_sample_name: String = _ + + /** name to use for normal in output files */ + @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") + var normal_sample_name: String = _ + + /** force output for each site */ + @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") + var force_output: Boolean = _ + + /** force output for all alleles at each site */ + @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") + var force_alleles: Boolean = _ + + /** Initial LOD threshold for calling tumor variant */ + @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var initial_tumor_lod: Option[Float] = None + + /** Format string for initial_tumor_lod */ + @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") + var initial_tumor_lodFormat: String = "%s" + + /** LOD threshold for calling tumor variant */ + @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var tumor_lod: Option[Float] = None + + /** Format string for tumor_lod */ + @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") + var tumor_lodFormat: String = "%s" + + /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ + @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") + var fraction_contamination: Option[Float] = None + + /** Format string for fraction_contamination */ + @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") + var fraction_contaminationFormat: String = "%s" + + /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ + @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fraction: Option[Float] = None + + /** Format string for minimum_mutation_cell_fraction */ + @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fractionFormat: String = "%s" + + /** LOD threshold for calling normal non-germline */ + @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") + var normal_lod: Option[Float] = None + + /** Format string for normal_lod */ + @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") + var normal_lodFormat: String = "%s" + + /** LOD threshold for calling normal non-variant */ + @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") + var normal_artifact_lod: Option[Float] = None + + /** Format string for normal_artifact_lod */ + @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") + var normal_artifact_lodFormat: String = "%s" + + /** LOD threshold for calling strand bias */ + @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_lod: Option[Float] = None + + /** Format string for strand_artifact_lod */ + @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") + var strand_artifact_lodFormat: String = "%s" + + /** power threshold for calling strand bias */ + @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_power_threshold: Option[Float] = None + + /** Format string for strand_artifact_power_threshold */ + @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") + var strand_artifact_power_thresholdFormat: String = "%s" + + /** LOD threshold for calling normal non-variant at dbsnp sites */ + @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lod: Option[Float] = None + + /** Format string for dbsnp_normal_lod */ + @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lodFormat: String = "%s" + + /** Power threshold for normal to determine germline vs variant */ + @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_threshold: Option[Float] = None + + /** Format string for somatic_classification_normal_power_threshold */ + @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_thresholdFormat: String = "%s" + + /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ + @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fraction: Option[Float] = None + + /** Format string for minimum_normal_allele_fraction */ + @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fractionFormat: String = "%s" + + /** for computational efficiency, reject sites with allelic fraction below this threshold */ + @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") + var tumor_f_pretest: Option[Float] = None + + /** Format string for tumor_f_pretest */ + @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") + var tumor_f_pretestFormat: String = "%s" + + /** threshold for minimum base quality score */ + @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") + var min_qscore: Option[Int] = None + + /** how many gapped events (ins/del) are allowed in proximity to this candidate */ + @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") + var gap_events_threshold: Option[Int] = None + + /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ + @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fraction: Option[Float] = None + + /** Format string for heavily_clipped_read_fraction */ + @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fractionFormat: String = "%s" + + /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ + @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_threshold: Option[Float] = None + + /** Format string for clipping_bias_pvalue_threshold */ + @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_thresholdFormat: String = "%s" + + /** threshold for determining if there is relatedness between the alt and ref allele read piles */ + @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") + var fraction_mapq0_threshold: Option[Float] = None + + /** Format string for fraction_mapq0_threshold */ + @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") + var fraction_mapq0_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact median */ + @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") + var pir_median_threshold: Option[Double] = None + + /** Format string for pir_median_threshold */ + @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") + var pir_median_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact MAD */ + @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") + var pir_mad_threshold: Option[Double] = None + + /** Format string for pir_mad_threshold */ + @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") + var pir_mad_thresholdFormat: String = "%s" + + /** required minimum value for tumor alt allele maximum mapping quality score */ + @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") + var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None + + /** threshold for maximum alternate allele counts in normal */ + @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_count: Option[Int] = None + + /** threshold for maximum alternate allele quality score sum in normal */ + @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None + + /** threshold for maximum alternate allele fraction in normal */ + @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fraction: Option[Double] = None + + /** Format string for max_alt_allele_in_normal_fraction */ + @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fractionFormat: String = "%s" + + /** Phred scale quality score constant to use in power calculations */ + @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_qscore: Option[Int] = None + + /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ + @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") + var absolute_copy_number_data: File = _ + + /** Allelic fraction constant to use in power calculations */ + @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_af: Option[Double] = None + + /** Format string for power_constant_af */ + @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") + var power_constant_afFormat: String = "%s" + + /** Call-stats output */ + @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** + * Short name of out + * @return Short name of out + */ + def o = this.out + + /** + * Short name of out + * @param value Short name of out + */ + def o_=(value: File) { this.out = value } + + /** VCF file of DBSNP information */ + @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") + var dbsnp: Seq[File] = Nil + + /** Dependencies on any indexes of dbsnp */ + @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") + private var dbsnpIndexes: Seq[File] = Nil + + /** VCF file of COSMIC sites */ + @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") + var cosmic: Seq[File] = Nil + + /** Dependencies on any indexes of cosmic */ + @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") + private var cosmicIndexes: Seq[File] = Nil + + /** VCF file of sites observed in normal */ + @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") + var normal_panel: Seq[File] = Nil + + /** Dependencies on any indexes of normal_panel */ + @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") + private var normal_panelIndexes: Seq[File] = Nil + + /** write out coverage in WIGGLE format to this file */ + @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_file: File = _ + + /** + * Short name of coverage_file + * @return Short name of coverage_file + */ + def cov = this.coverage_file + + /** + * Short name of coverage_file + * @param value Short name of coverage_file + */ + def cov_=(value: File) { this.coverage_file = value } + + /** write out 20x of Q20 coverage in WIGGLE format to this file */ + @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_20_q20_file: File = _ + + /** + * Short name of coverage_20_q20_file + * @return Short name of coverage_20_q20_file + */ + def cov_q20 = this.coverage_20_q20_file + + /** + * Short name of coverage_20_q20_file + * @param value Short name of coverage_20_q20_file + */ + def cov_q20_=(value: File) { this.coverage_20_q20_file = value } + + /** write out power in WIGGLE format to this file */ + @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var power_file: File = _ + + /** + * Short name of power_file + * @return Short name of power_file + */ + def pow = this.power_file + + /** + * Short name of power_file + * @param value Short name of power_file + */ + def pow_=(value: File) { this.power_file = value } + + /** write out tumor read depth in WIGGLE format to this file */ + @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var tumor_depth_file: File = _ + + /** + * Short name of tumor_depth_file + * @return Short name of tumor_depth_file + */ + def tdf = this.tumor_depth_file + + /** + * Short name of tumor_depth_file + * @param value Short name of tumor_depth_file + */ + def tdf_=(value: File) { this.tumor_depth_file = value } + + /** write out normal read depth in WIGGLE format to this file */ + @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var normal_depth_file: File = _ + + /** + * Short name of normal_depth_file + * @return Short name of normal_depth_file + */ + def ndf = this.normal_depth_file + + /** + * Short name of normal_depth_file + * @param value Short name of normal_depth_file + */ + def ndf_=(value: File) { this.normal_depth_file = value } + + /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ + @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") + var filter_mismatching_base_and_quals: Boolean = _ + + /** + * Short name of filter_mismatching_base_and_quals + * @return Short name of filter_mismatching_base_and_quals + */ + def filterMBQ = this.filter_mismatching_base_and_quals + + /** + * Short name of filter_mismatching_base_and_quals + * @param value Short name of filter_mismatching_base_and_quals + */ + def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } + + override def freezeFieldValues() { + super.freezeFieldValues() + dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + } + + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") +} From 88297606f019da0e4b9725d5cad58abd1924a2d7 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 10 Oct 2012 13:20:30 -0400 Subject: [PATCH 31/93] Adding intellij example configuration files --- intellij_example.tar.bz2 | Bin 0 -> 7520 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 intellij_example.tar.bz2 diff --git a/intellij_example.tar.bz2 b/intellij_example.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..bce16045cd1cc476305c5e59d07ff9b94b8e5d73 GIT binary patch literal 7520 zcmV-m9iQStT4*^jL0KkKS)7!cJOD4F|M~yi5CDHu|NsC0|M36+|L{No06+`?06+jh z1{h!`o8{-7h4qB5wR>^y>D#MWyS_S4UPrYW)`~XXKBY)=w?n(T?w=>u4%}F@T^prq zM&}*98+P}1CC>JGY3}jFJo}ytO>BKWz24nKq=%-|-tIdTv1Z%Ou|py0B#;mw z0$?VFLqIe|HdE0LQ`8zCr9V{lAE^M-CYlc@B!L1ZL{xsM>S#SrDYXw!Hq`?_27mwn z0006)At_HHdU~eyLFyU;NYu?W zO%G7?YBUW!LqkS@9-*KBcwhGq&L2>c7{2Bv%KPh~XqovPD`P$wqr0wA{OX>F7ZKmy zDsL?$v;dI_RDWe46~zQU)lvw>NPvKQ{r?Z1(}ww2_)M*fQwCl2HJP1e>Ri?dCZpMN zP=M?SA|wk1NPtvh=J3} zDw+~D0aFk#a8yBk1VvNe{^Q2@ATLqre5hlqO+ zJH6U_g@n*il^HZ-l$Xlw^mBWD_ji=uWfv%71y1==(-ozyjYp|LKHs?f9BLgry=7~; zQzc6vctrO5Ohh{&v$XFjif1`0L{s*Olu^?q8x}#rqzyaoVB#B4g(81Jx};qlYMjj? z2dYy`yjW&t6?wYx0N9Uh!RRNLa}rLdMi`DPNP-P&$TYbGk_IqjZvxK0qQ|cbe@$>KA+XQo15<95gH&cGF>(8JUi?q#;jjQhX7>?$D_)jV*fSGa99hISqAgCirHOBRlz z5-9I|sR$Ai`Yfa3X$-2y?KsEHOGX z@HXt%cKK^%iU{@ItVyi`Mvy~M#VuYFejK)2%tD1XPotWR1C6g7^>_3=M{GBTR=x6} zL$ie9VdKIYD#p_7X2Ij5c!ACsYkPJWj&|WWp}5G;)cO~hDhd@kB;Qp{=JQuL+IA<+ z9G6K-0B_4%_wIgYeR?@~V~!zKJsytphBh&$h7$B()y02uDWc?iHKJ3Oou2*+MfMx^=gIf{X3|lWppAz z-I7p3R)QX!Idb;|10w?J`_#t4+9oPM5Ep6hKa7!-%b*Z%Ne=e{WH2)WCKzJBSF^U& zK`^`eM3i5TFVExdQsTLk;G@qq9{>}@N65UeiIk4 z@~k{rAI=Uv%!sgaYY$=o+l0!JKSDr9R$?^eCXzi64~|&0Ce%+awpg_DqW1T;JnT{d zru{VON@j0;c#A;BkSsr#?|iam4$XGUx$RTb)H_pMY<00{Yt{q!{n-}(G#x&{RZq!9 z?#T+_N>%`kP|_a@fmo2-OJrXX!d!y~G&Dn3dYM^vWSZJ!#jw;(nuR5VhC?^k`y8?h4xtX)+j?PU}@iN>ljD+(Qmb z8dniBZ(_g=D1mLVH)}(M5H0vcNC?Cv1(;-)C)j(xXY2nS+gLe%zZ*W^p1!|_zv=O} z_Q!+DzkQ#@*8b@JY?61J{_8LQ7Wa7yo_=DdhrF->(?ZCJE#uL2ybpcSMKqaVBwSv) zNU6k^_VtahV8JC~!U6{d!bq&!4p|_$D-~)<6%s3$x=9Tg-hN~ZTTL>hPndt*gp7cg z+wR+roDg$1Hu?=(-&o%#Ayf15RN!Z%05j!NTZ&r35#?&$A)o_Kq_+aG2Tmv-^Ti}kzPUMz}gW$fs9 zJC4FgjTw|ACp)ypVq7XolvK9GOTU706q(I(#Eu(UtE5jUBYT&==_1{Y?P%VlGrzMb zBE|_o9do|DW~w@PU%$xNzAis=G|wxF)AvA^EC+4@aDj~7z-#+Yu_mAdbYOG_n;S_f z2MF%vrdGcJHml~P2xvLQOIQJlnTe)5Z8JtNk{R2)nVMB@y3m5+0?^4LZPW(|mtd*? zDv;N1W_Rb+ea4+?lrAx`155T=*|7;B>>wiy;Ml)7#VrgvNDTg;;hzJ_HNI$RTTqf4 zfyMeU1R+7uY;dsXgq*(jmUH&D>*C?4NtOzLb-!$s%bXb_Sue0mN5$wkpEM3kv8IvGA=_?Zzv1%_C1tsyl^1@RKH{a zciZw1@6q0f3Zxbb;ED{RY}JFbgv`jo;vnjTP++40u_0!9X$&Jll(K*-u?XPE#-34@ z(2*iq168t)2LIl%)odnH8EI_Yl*{iaXWjeIC?DX z@F63H#+Z$15ur99k-Q~?3g|8Zc=x8AR4D_lS&{5(Wb(9OWn~6P1uWA@lOmW&8qFIU z7SBN;v4-l*AW>A(2|!UvMJG5MwJuL6M+i`eC_Z*#ky3?khBR#txTdfRi~`O|X#<>q zay;fL4M@CAa10!#6DOq2oZBZ*`QRkS_ry;K+IvB5vHgUgDqb| z3@cbx6TJE0H(;83Aleb%_>debXaSWl^D1it&Z*F@y=N{NvpYj~%SKvwWbjz}(2QH5 zzP2fmjwex!Q0DDKn@@wk;d+8XInO!CB-!he7tkDYlzvB5io3y3~eaI5w=k}~_n&-pi{RD^=|p|INIK%M7^Ju>U^q=8=L%d;bA7r& zav%4cJ$b-?7%3m*P6<7LL;|e%;!{W=n0z(DI%^o;Bei5F?M_70?60j?x99A# z3qM)=cXkVG2kmpPJ8oP#mdXvhiT3f-MRyJ&15Gf(d7`7!iiTN|-|lq+clXLmg&CS~L$9jF&hP0Bm8xWJJK=$OMFB>@>4P6z#Xn3dcffp+LAC z!`V2YWzCFixsCZ_7L;F_QL9T?&p+$c)*hPKS8gSO~VTFj|ro3G{Iuof@swuwn#16JUnNC5#N!PbpVLE zvff%F#RxrAU($v^EeV!2DVBm~xc6xi-$OudTP+k6K2v02fE5E45$L!=gi;#?e7UiT zMhuXoWDk1Yu>gn#4)=DU3)lONdeFJ7$d8PZ2g9kss?ltvQp=qrImnG5r+V3zF4fd( z=B;z~@$<2{hrSQ|A=lv6Q712!p!qTbzdD`k(bs>{2Pp3?yqCWFJqT zzsdD8a3Kh21F_vQhkQ6nV4J6$yI@VnB-^q@1Poy1V4pzHT>!|B!-$_&1xU~WX+}BS| zI+H9JIpulj0|c`K4h+Jv+i8{#sDpzsY3kK&nf6^uxxRKVpRY}nq>h`5p#2eSW1JCB zoQDl9g0bddzNVPmI`lIIC+n3Z-%)6 z)m1D@3`N7B@Sgj)AR36~a4}*AFBJoK>y5WL$p%MGX_9PAb!p#rxvRNmYjZFf_p^MM zS*F2;S$4gmw~WV5MV$7|80zk2IN^|p!(1CUj&QiGuRFLn?P!*xF?*d9(+V$}IXT1} zTha_h(muFR1txBW5H`^S1RVSVru%s#2xv)Akeo8AEDyF+y-8{s&ujwd5cgA z{vZA1IN}ss`1mWy37bexU#GAM?R5hGixGW*YN1F6 zNe^y$@Ot{b0s=j%3iJAec(;rNi2(`#p%tYeftrrHgl%C`y`dwy83`F7-YoY_ox@e$ zL&B&_2$DC2^*`wAL)aSaJF>S~WkZ7Y-Du;z{ zz=U=xW(E3CCn_nOid4kc4!;tEos&Z9t35>kz>s9yLG>gOM({gKT^#n%!cAcT@WPry3G z2V$fF(Bh#$fe>JI^|(v(OqCu~Gq|s3b?7=j5C#Z=5wJo%cpU|9AqFF9LZ;v@MFUXd zs2N){bdrWWIl=TxEeH;2AWyYeEYN~qcFD}6*o@`j5Unt1JJgj_YN^CRhB!bw5Oy*P z1DcHm8GutysIu}&!*1M~s!zPA&Oi}v=#?bEF!B<|)OQu(7P)!*MrromGcLP%0kRC2-f z-Ma&_=fdV8BrOEsXnl*Tw^mqWLHgJM!Kwz2*{PSy3 zu1y0}WCf==J8l%kErz3IvwaFAt0TVM@-$^1MCp&5TKidOUd1p zYf^#|AzIhOeK`tn48Ym)u!~-}_Q+l&S8a}MP zqt<;`1h6{%dl7fl4{{^4=5Z9~GklcT&KW~EA3Cu3&;?mZn=%p-SGX&lf>n(5j&iOY zYQ5cz$?+OcwiV^|gbZ zDl{t&8X}C%;tMPtc~k`|q$FrGfm%YMX(X@VI=uFI0&+ldLE%k6`SmVPkC_HWF>Jjk zUDTu0qvTmpsIKA3Q+lPhpdN}HV1^2u*J2-*FWMiW;5$l`j~ao*gwCP$RM5i#_C_Ex zH)zO<;CO6Q=Wwgery}Pl$N)(r*fN-s03O~z4|h@!W6)yYC(`*Uf$0mj(1PL>cZ3#Z z4TbDihB0%-2YpbP;Si{_nnei&VL3_(rX}Qwue;<9@ZzbMbgFgWaWzMX7*HysutU{s z6$yK$fP%s@0>K!oAjF959~anz@iKxw9tVi2%a|O!4?k>B;`D(%h5u(vcUQi+mcKd#qyCaVfaiWCOtfxqGP5gZ9C@~hnd`7h<>b=e_+%r-y-BvC8wl_K{AeW^0CX@V5&D!>>QsSU^i-+=i>^k-=!j-OGaIbfVpkOo zov0DlGYvzkWdki>W|%{31zZ$_CjdJbXwoesiiTpR&LP}<$G(Ags#wwbgBY)fP=_5T zQ4Xh%0B4=sfb<{^vtZvf!~pRs?Q2_tCyTUQVcvR>^r#ARkO%Guv=4>qj_B@o4jqbM zt7aXvQ8X%2q25s!VhE5U6mV4RggF8x`vC+b>2V@PWF#OH>&BrAM934Jfz;}KW^E!s zf_sHg^L}8s>!NOo>yjx%-@oa32x!;2(Ek5!w#C`ezx9s#*X(t`x1;-X6+ z7Tr_H3_VZlT{4sbK3ug=pGr!XjFT71U-2y+_{;q+T4bJi(b= zap}Jvh}lP|*qy&ou8QcP>pMaQngHZp=&Vk$3eem_f*S=$R8;1V4~ZCnf}#?r8iXVQ z0j{X_oxDt(h>0TcAk-a!;E`5M2WU_pz^PFx0iuHO=~{qY8>t!!C^m$obS{f0N*n5P z90a#3kBG3kQg+Y~XP`YbFs-J>`*$M_gdyA-C_|>(hkfw|+l<+w zf}KDpwJ=nwn0Cr`@CL~cY#kjD!*FzDfO#Xz58x^D4{2FCkP+ji120 z`AK?g`I(q&&XIvsdP8c|-6RwyD9PaUmZHX{h)-f%GL}>`AdodPFC>U>q}W4M$7Yrv zXn_GT_PIBf+CkyE8OEjAE9XU38OMGBuvkEl2#MCGQP6b&cqsd};rC<;+96%VZpibB za5i6A1WKt|DOYLN0p46>D$r5;!~>XE;UL^8L$+U03KV+NtWE_Trs&B5hj{M;05y_z( zIB@{aYt}fs#2_Ui8xH+<1|!JJl0XMS2m}_23jO|IA2?LhL%8il_4O)!cmxPzpqfGY zc__a@$5HV6afV(GYNQppH5ZI_4QL$Cz*M(x41|T~>WW%5dc!&O=BaR(D8S87Uc(Wb z4GAcqP$MD;MhMW2k||&T_-I1Sc7quRcUP6)AC5{qas)meG;bs!BEU|AAWLmX=fy+6 z$p=H}qdcCy^{S?BA;kyA^aH=II~+xSl}Oz`ntcF$U;6-Xk#{x0N-aG@bNDK>cMb(tan literal 0 HcmV?d00001 From f085f5d46a79645606fbfc296cdc5aae73c67ae4 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 16:15:57 -0400 Subject: [PATCH 32/93] Adding default intellij configuration files --- .idea/.name | 1 + .idea/ant.xml | 15 + .idea/codeStyleSettings.xml | 13 + .idea/compiler.xml | 21 + .idea/copyright/profiles_settings.xml | 5 + .idea/encodings.xml | 5 + .idea/highlighting.xml | 8 + .idea/inspectionProfiles/Project_Default.xml | 11 + .../inspectionProfiles/profiles_settings.xml | 7 + .idea/libraries/GATK_libraries.xml | 13 + .idea/misc.xml | 32 ++ .idea/modules.xml | 9 + .idea/scopes/scope_settings.xml | 5 + .idea/uiDesigner.xml | 125 ++++++ .idea/vcs.xml | 10 + .idea/workspace.xml | 386 ++++++++++++++++++ cmi-gatk.iml | 23 ++ 17 files changed, 689 insertions(+) create mode 100644 .idea/.name create mode 100644 .idea/ant.xml create mode 100644 .idea/codeStyleSettings.xml create mode 100644 .idea/compiler.xml create mode 100644 .idea/copyright/profiles_settings.xml create mode 100644 .idea/encodings.xml create mode 100644 .idea/highlighting.xml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/libraries/GATK_libraries.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/scopes/scope_settings.xml create mode 100644 .idea/uiDesigner.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml create mode 100644 cmi-gatk.iml diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 000000000..7014f65a5 --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +cmi-gatk \ No newline at end of file diff --git a/.idea/ant.xml b/.idea/ant.xml new file mode 100644 index 000000000..4674eeac9 --- /dev/null +++ b/.idea/ant.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml new file mode 100644 index 000000000..9178b389f --- /dev/null +++ b/.idea/codeStyleSettings.xml @@ -0,0 +1,13 @@ + + + + + + + diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 000000000..ded2e9a1d --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,21 @@ + + + + + + diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml new file mode 100644 index 000000000..3572571ad --- /dev/null +++ b/.idea/copyright/profiles_settings.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 000000000..e206d70d8 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/.idea/highlighting.xml b/.idea/highlighting.xml new file mode 100644 index 000000000..f33b64d94 --- /dev/null +++ b/.idea/highlighting.xml @@ -0,0 +1,8 @@ + + + + + + diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 000000000..b8c243dbe --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,11 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 000000000..3b312839b --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml new file mode 100644 index 000000000..970d0a3dc --- /dev/null +++ b/.idea/libraries/GATK_libraries.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 000000000..afd7f3778 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,32 @@ + + + + + + + + + + http://www.w3.org/1999/xhtml + + + + + + + diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 000000000..09caa2933 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml new file mode 100644 index 000000000..922003b84 --- /dev/null +++ b/.idea/scopes/scope_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml new file mode 100644 index 000000000..3b0002030 --- /dev/null +++ b/.idea/uiDesigner.xml @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 000000000..cbc984988 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,10 @@ + + + + + + + + + diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 000000000..87ab79287 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,386 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + localhost + 5050 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cmi-gatk + + + + + + + + GATK libraries + + + + + + + + + diff --git a/cmi-gatk.iml b/cmi-gatk.iml new file mode 100644 index 000000000..e63aff535 --- /dev/null +++ b/cmi-gatk.iml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + From e29bcab42e9ba75276d20b9402d5d881271ce04d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 10:51:53 -0400 Subject: [PATCH 33/93] Updating Intellij enviroment and adding Scala --- .idea/libraries/GATK_libraries.xml | 1 - .idea/misc.xml | 2 +- .idea/workspace.xml | 221 ++++++++++++++++++++++++----- cmi-gatk.iml | 10 +- 4 files changed, 192 insertions(+), 42 deletions(-) diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml index 970d0a3dc..b363bbe6c 100644 --- a/.idea/libraries/GATK_libraries.xml +++ b/.idea/libraries/GATK_libraries.xml @@ -6,7 +6,6 @@ - diff --git a/.idea/misc.xml b/.idea/misc.xml index afd7f3778..a79280c52 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -24,7 +24,7 @@ http://www.w3.org/1999/xhtml - + diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 87ab79287..f6d4567fd 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -1,7 +1,12 @@ - + + + + + + + + + + + + + + - + @@ -112,33 +140,7 @@ - - - - - - - - - - - - - - - - - - - - - + @@ -147,7 +149,7 @@ - + @@ -228,8 +230,9 @@ - + + @@ -286,7 +289,7 @@ + + - + + + + + + + + + + + + + + + + + - + @@ -333,6 +464,18 @@ + + + Detection + + + + + @@ -346,6 +489,7 @@ + 1.6 diff --git a/cmi-gatk.iml b/cmi-gatk.iml index e63aff535..4dbee1336 100644 --- a/cmi-gatk.iml +++ b/cmi-gatk.iml @@ -1,5 +1,13 @@ + + + + + + @@ -17,7 +25,7 @@ - + From fdf29503fb9bb6906d0e0b7ad41b6045aab2f38f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:18:44 -0400 Subject: [PATCH 34/93] removing annoying xml from IDEA configuration --- .idea/workspace.xml | 529 -------------------------------------------- 1 file changed, 529 deletions(-) delete mode 100644 .idea/workspace.xml diff --git a/.idea/workspace.xml b/.idea/workspace.xml deleted file mode 100644 index f6d4567fd..000000000 --- a/.idea/workspace.xml +++ /dev/null @@ -1,529 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - localhost - 5050 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Detection - - - - - - - - - - - - - - - 1.6 - - - - - - - - cmi-gatk - - - - - - - - GATK libraries - - - - - - - - - From 29195cd3aab9a47118f71516ce55949b979d9967 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 27 Sep 2012 11:04:56 -0400 Subject: [PATCH 35/93] Removed the intellij files from the root and made an example package for new users. This allows users to start at the same page and then change it as they see fit without interfering with the repo (thanks guillermo!) --- .idea/.name | 1 - .idea/ant.xml | 15 --- .idea/codeStyleSettings.xml | 13 -- .idea/compiler.xml | 21 --- .idea/copyright/profiles_settings.xml | 5 - .idea/encodings.xml | 5 - .idea/highlighting.xml | 8 -- .idea/inspectionProfiles/Project_Default.xml | 11 -- .../inspectionProfiles/profiles_settings.xml | 7 - .idea/libraries/GATK_libraries.xml | 12 -- .idea/misc.xml | 32 ----- .idea/modules.xml | 9 -- .idea/scopes/scope_settings.xml | 5 - .idea/uiDesigner.xml | 125 ------------------ .idea/vcs.xml | 10 -- cmi-gatk.iml | 31 ----- 16 files changed, 310 deletions(-) delete mode 100644 .idea/.name delete mode 100644 .idea/ant.xml delete mode 100644 .idea/codeStyleSettings.xml delete mode 100644 .idea/compiler.xml delete mode 100644 .idea/copyright/profiles_settings.xml delete mode 100644 .idea/encodings.xml delete mode 100644 .idea/highlighting.xml delete mode 100644 .idea/inspectionProfiles/Project_Default.xml delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml delete mode 100644 .idea/libraries/GATK_libraries.xml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/scopes/scope_settings.xml delete mode 100644 .idea/uiDesigner.xml delete mode 100644 .idea/vcs.xml delete mode 100644 cmi-gatk.iml diff --git a/.idea/.name b/.idea/.name deleted file mode 100644 index 7014f65a5..000000000 --- a/.idea/.name +++ /dev/null @@ -1 +0,0 @@ -cmi-gatk \ No newline at end of file diff --git a/.idea/ant.xml b/.idea/ant.xml deleted file mode 100644 index 4674eeac9..000000000 --- a/.idea/ant.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml deleted file mode 100644 index 9178b389f..000000000 --- a/.idea/codeStyleSettings.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - diff --git a/.idea/compiler.xml b/.idea/compiler.xml deleted file mode 100644 index ded2e9a1d..000000000 --- a/.idea/compiler.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml deleted file mode 100644 index 3572571ad..000000000 --- a/.idea/copyright/profiles_settings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml deleted file mode 100644 index e206d70d8..000000000 --- a/.idea/encodings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/.idea/highlighting.xml b/.idea/highlighting.xml deleted file mode 100644 index f33b64d94..000000000 --- a/.idea/highlighting.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index b8c243dbe..000000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 3b312839b..000000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml deleted file mode 100644 index b363bbe6c..000000000 --- a/.idea/libraries/GATK_libraries.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index a79280c52..000000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - - - - http://www.w3.org/1999/xhtml - - - - - - - diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 09caa2933..000000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml deleted file mode 100644 index 922003b84..000000000 --- a/.idea/scopes/scope_settings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml deleted file mode 100644 index 3b0002030..000000000 --- a/.idea/uiDesigner.xml +++ /dev/null @@ -1,125 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index cbc984988..000000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - diff --git a/cmi-gatk.iml b/cmi-gatk.iml deleted file mode 100644 index 4dbee1336..000000000 --- a/cmi-gatk.iml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From e9eaa33c0b3699472da7287a2c6e23cc6b1ac08f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 27 Sep 2012 11:09:41 -0400 Subject: [PATCH 36/93] adding some directories to gitignore --- .gitignore | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 456794cea..927caf98d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,10 +18,8 @@ queueScatterGather /bar* integrationtests/ public/testdata/onTheFlyOutputTest.vcf -private/testdata/onTheFlyOutputTest.vcf -lib -html -gatkdocs -dist -build -resources +build/ +dist/ +dump/ +lib/ +out/ From af5a6fdaced7814b7af3d6e858897c6b0eadb8ed Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 11 Oct 2012 11:09:49 -0400 Subject: [PATCH 37/93] Resolve [DEV-7]: add single-sample VCF calling at end of FASTQ-BAM pipeline. Initial steps of [DEV-4]: queue extensions for Picard QC metrics --- .../picard/CalculateHsMetrics.scala | 60 +++++++++++++++++++ .../picard/CollectGcBiasMetrics.scala | 32 ++++++++++ .../picard/CollectMultipleMetrics.scala | 36 +++++++++++ 3 files changed, 128 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala new file mode 100644 index 000000000..75e9300dc --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/9/12 + * Time: 5:59 PM + * To change this template use File | Settings | File Templates. + */ +class CalculateHsMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "CalculateHsMetrics" + javaMainClass = "net.sf.picard.sam.CalculateHsMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Interval list with targets", shortName = "targets", fullName = "target_list", required = true) + var targets: File = _ + + @Argument(doc="Interval list with baits", shortName = "baits", fullName = "bait_list", required = true) + var baits: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + /* + @Argument(doc = "Maximum number of file handles to keep open when spilling read ends to disk. Set this number a little lower than the per-process maximum number of file that may be open. This number can be found by executing the 'ulimit -n' command on a Unix system.", shortName = "max_file_handles", fullName ="max_file_handles_for_read_ends_maps", required=false) + var MAX_FILE_HANDLES_FOR_READ_ENDS_MAP: Int = -1; + + @Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number.", shortName = "sorting_ratio", fullName = "sorting_collection_size_ratio", required = false) + var SORTING_COLLECTION_SIZE_RATIO: Double = -1 + */ + override def freezeFieldValues() { + super.freezeFieldValues() +// if (outputIndex == null && output != null) + // outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + + val level = "SAMPLE" + + override def inputBams = input + override def outputBam = output + //this.sortOrder = null + //this.createIndex = Some(true) + override def commandLine = super.commandLine + + required("BAIT_INTERVALS=" + baits) + + required("TARGET_INTERVALS=" + targets) + + required("REFERENCE_SEQUENCE=" + reference) + + optional("METRIC_ACCUMULATION_LEVEL="+level)/*+ + conditional(REMOVE_DUPLICATES, "REMOVE_DUPLICATES=true") + + conditional(MAX_FILE_HANDLES_FOR_READ_ENDS_MAP > 0, "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=" + MAX_FILE_HANDLES_FOR_READ_ENDS_MAP.toString) + + conditional(SORTING_COLLECTION_SIZE_RATIO > 0, "SORTING_COLLECTION_SIZE_RATIO=" + SORTING_COLLECTION_SIZE_RATIO.toString) */ + + +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala new file mode 100644 index 000000000..de2b0af9e --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala @@ -0,0 +1,32 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/10/12 + * Time: 10:37 AM + * To change this template use File | Settings | File Templates. + */ +class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "CalculateGcMetrics" + javaMainClass = "net.sf.picard.sam.CalculateGcMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + + override def inputBams = input + override def outputBam = output + override def commandLine = super.commandLine + + required("CHART_OUTPUT=" + output+".pdf") + + required("REFERENCE_SEQUENCE=" + reference) + + required("ASSUME_SORTED=true") +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala new file mode 100644 index 000000000..a9af4e858 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/10/12 + * Time: 10:37 AM + * To change this template use File | Settings | File Templates. + */ +class CollectMultipleMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction{ + analysisName = "CalculateMultipleMetrics" + javaMainClass = "net.sf.picard.sam.CalculateMultipleMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + + override def inputBams = input + override def outputBam = output + override def commandLine = super.commandLine + + required("REFERENCE_SEQUENCE=" + reference) + + required("ASSUME_SORTED=true") + + required("PROGRAM=QualityScoreDistribution") + + required("PROGRAM=MeanQualityByCycle") + + required("PROGRAM=CollectAlignmentSummaryMetrics" ) + + +} From ef3882f43995afb9c2ac63a04e38b99f1fde462f Mon Sep 17 00:00:00 2001 From: Ami Levy Moonshine Date: Thu, 11 Oct 2012 14:51:41 -0400 Subject: [PATCH 38/93] PhaseByTransmission: small typo /n. variantCallQC_summaryTablesOnly.R: small changes (more to come) /n GeneralCallingPipeline.scala: the new pipeline script. It is not as clean as I want it to be, but it works. I still going to work on it a little bit more. Also, it does not include yet: (1) the RR step (2) need better eval step (3) need to include other targets (currently it eork on the CEU Trio) --- .../sting/gatk/walkers/phasing/PhaseByTransmission.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 00acf854a..7ebfec49e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -36,7 +36,7 @@ import java.util.*; *
    *
  • In parent/child pairs: If an individual genotype is missing at one site, the other one is phased if it is homozygous. No phasing probability is emitted.
  • *
  • In trios: If the child is missing, parents are treated as separate individuals and phased if homozygous. No phasing probability is emitted.
  • - *
  • In trios: If one of the parents is missing, it is handled like a parent/child pair. Phasing is done unless both the parent and child are heterozygous and a phasing probabilitt is emitted.
  • + *
  • In trios: If one of the parents is missing, it is handled like a parent/child pair. Phasing is done unless both the parent and child are heterozygous and a phasing probability is emitted.
  • *
  • In trios: If two individuals are missing, the remaining individual is phased if it is homozygous. No phasing probability is emitted.
  • *
* From c1706ef0ef42bd6a7986009e7664c453313e8cbc Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Fri, 12 Oct 2012 14:18:12 -0400 Subject: [PATCH 40/93] upgraded mutation caller with VCF output raw indel calls (non filtered,non vcf) --- .../queue/extensions/cancer/MuTect.scala | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala index 623d397d4..1193e7dec 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -6,7 +6,7 @@ import org.broadinstitute.sting.commandline.Gather import org.broadinstitute.sting.commandline.Input import org.broadinstitute.sting.commandline.Output import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction -import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} +import org.broadinstitute.sting.queue.extensions.gatk.{TaggedFile, VcfGatherFunction, LocusScatterFunction} class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { analysisName = "MuTect" @@ -45,6 +45,10 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") var force_alleles: Boolean = _ + /** only emit passing calls */ + @Argument(fullName="only_passing_calls", shortName="", doc="only emit passing calls", required=false, exclusiveOf="", validation="") + var only_passing_calls: Boolean = _ + /** Initial LOD threshold for calling tumor variant */ @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") var initial_tumor_lod: Option[Float] = None @@ -242,6 +246,28 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG */ def o_=(value: File) { this.out = value } + /** VCF output of mutation candidates */ + @Output(fullName="vcf", shortName="vcf", doc="VCF output of mutation candidates", required=false, exclusiveOf="", validation="") + @Gather(classOf[VcfGatherFunction]) + var vcf: File = _ + + /** Automatically generated index for vcf */ + @Output(fullName="vcfIndex", shortName="", doc="Automatically generated index for vcf", required=false, exclusiveOf="", validation="") + @Gather(enabled=false) + private var vcfIndex: File = _ + + /** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */ + @Argument(fullName="no_cmdline_in_header", shortName="no_cmdline_in_header", doc="Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required=false, exclusiveOf="", validation="") + var no_cmdline_in_header: Boolean = _ + + /** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */ + @Argument(fullName="sites_only", shortName="sites_only", doc="Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required=false, exclusiveOf="", validation="") + var sites_only: Boolean = _ + + /** force BCF output, regardless of the file's extension */ + @Argument(fullName="bcf", shortName="bcf", doc="force BCF output, regardless of the file's extension", required=false, exclusiveOf="", validation="") + var bcf: Boolean = _ + /** VCF file of DBSNP information */ @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") var dbsnp: Seq[File] = Nil @@ -369,10 +395,13 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG override def freezeFieldValues() { super.freezeFieldValues() + if (vcf != null && !org.broadinstitute.sting.utils.io.IOUtils.isSpecialFile(vcf)) + if (!org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor.isCompressed(vcf.getPath)) + vcfIndex = new File(vcf.getPath + ".idx") dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) } - override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + conditional(only_passing_calls, "--only_passing_calls", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + optional("-vcf", vcf, spaceSeparated=true, escape=true, format="%s") + conditional(no_cmdline_in_header, "-no_cmdline_in_header", escape=true, format="%s") + conditional(sites_only, "-sites_only", escape=true, format="%s") + conditional(bcf, "-bcf", escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") } From 05111eeaef41d3d3d5c2483b16728a76fc8f8a6e Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 10 Oct 2012 15:00:17 -0400 Subject: [PATCH 42/93] Making nContigs parameter hidden in ReduceReads For now, the het reduction should only be performed for diploids (n=2). We haven't really tested it for other ploidy so it should remain hidden until someone braves it out. --- .../compression/reducereads/ReduceReads.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 1b3e68647..5810bc94f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -181,15 +181,6 @@ public class ReduceReads extends ReadWalker, ReduceRea @Argument(fullName = "minimum_del_proportion_to_trigger_variant", shortName = "mindel", doc = "", required = false) private double minIndelProportionToTriggerVariant = 0.05; - /** - * Minimum proportion of indels in a site to trigger a variant region. Anything below this will be - * considered consensus. - */ - @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false) - private int nContigs = 2; - - - /** * Downsamples the coverage of a variable region approximately (guarantees the minimum to be equal to this). * A value of 0 turns downsampling off. @@ -197,6 +188,14 @@ public class ReduceReads extends ReadWalker, ReduceRea @Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false) private int downsampleCoverage = 250; + /** + * Number of chromossomes in the sample (this is used for the polyploid consensus compression). Only + * tested for humans (or organisms with n=2). Use at your own risk! + */ + @Hidden + @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false) + private int nContigs = 2; + @Hidden @Argument(fullName = "", shortName = "dl", doc = "", required = false) private int debugLevel = 0; From 274ac4836f3357a9cc0d0d37a0f9c6f98050542f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 12 Oct 2012 13:50:10 -0400 Subject: [PATCH 43/93] Allowing the GATK to have non-required outputs Modified the SAMFileWriterArgumentTypeDescriptor to accept output bam files that are null if they're not required (in the @Output annotation). This change enables the nWayOut parameter for the IndeRealigner and ReduceReads to operate optionally while maintaining the original single way out. [#DEV-10 transition:31 resolution:1] --- .../SAMFileWriterArgumentTypeDescriptor.java | 36 +++++++++---------- .../gatk/walkers/indels/IndelRealigner.java | 4 +-- .../indels/IndelRealignerIntegrationTest.java | 10 ++++++ 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 8566f6c63..dcf2704f5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -124,32 +124,28 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default - if(writerFileName == null) { - if(!source.isRequired()) - throw new MissingArgumentValueException(bamArgumentDefinition); - if(generateMD5) + if(writerFileName == null && generateMD5) throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside."); - } // Create the stub and set parameters. - SAMFileWriterStub stub; - if ( writerFileName != null ) + SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream); + + if ( writerFileName != null ) { stub = new SAMFileWriterStub(engine, new File(writerFileName)); - else - stub = new SAMFileWriterStub(engine, defaultOutputStream); - if ( compressionLevel != null ) - stub.setCompressionLevel(compressionLevel); - if ( indexOnTheFly ) - stub.setIndexOnTheFly(indexOnTheFly); - if ( generateMD5 ) - stub.setGenerateMD5(generateMD5); - if ( simplifyBAM ) - stub.setSimplifyBAM(simplifyBAM); + if ( compressionLevel != null ) + stub.setCompressionLevel(compressionLevel); + if ( indexOnTheFly ) + stub.setIndexOnTheFly(indexOnTheFly); + if ( generateMD5 ) + stub.setGenerateMD5(generateMD5); + if ( simplifyBAM ) + stub.setSimplifyBAM(simplifyBAM); - // WARNING: Side effects required by engine! - parsingEngine.addTags(stub,getArgumentTags(matches)); - engine.addOutput(stub); + // WARNING: Side effects required by engine! + parsingEngine.addTags(stub,getArgumentTags(matches)); + engine.addOutput(stub); + } return stub; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 76d8d85c2..998894fbf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -370,8 +370,6 @@ public class IndelRealigner extends ReadWalker { currentInterval = intervals.hasNext() ? intervals.next() : null; - writerToUse = writer; - if ( N_WAY_OUT != null ) { boolean createIndex = true; @@ -383,9 +381,9 @@ public class IndelRealigner extends ReadWalker { createIndex, generateMD5s,createProgramRecord(),KEEP_ALL_PG_RECORDS); } } else { - // set up the output writer setupWriter(getToolkit().getSAMFileHeader()); + writerToUse = writer; } manager = new ConstrainedMateFixingManager(writerToUse, getToolkit().getGenomeLocParser(), MAX_ISIZE_FOR_MOVEMENT, MAX_POS_MOVE_ALLOWED, MAX_RECORDS_IN_MEMORY); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 040845828..9b464cfec 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -113,4 +113,14 @@ public class IndelRealignerIntegrationTest extends WalkerTest { executeTest(String.format("realigner [%s]", entry.getKey()), spec); } } + + @Test + public void testNWayOut() { + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommandPrefix + " -nWayOut .clean.bam ", + 1, + Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")); + executeTest("test realigner nWayOut", spec1); + } + } From 429c96e72356aeb7554b305c73e38f486eb4a436 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 17:01:17 -0400 Subject: [PATCH 50/93] Generic input file name recognition (still need to implement support to FastQ, but it now can at least accept it) --- .../qscripts/DataProcessingPipeline.scala | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 56f6460fb..c21db30ce 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,6 +96,7 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS + val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -165,12 +166,15 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - // first revert the BAM file to the original qualities - val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") - val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") + val extension = bam.toString.substring(bam.toString.length - 4) + + + + val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") + val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -444,7 +448,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -452,7 +456,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From f1fb51b222d2f81091389367d600c7ea2b4f913d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 12:02:34 -0400 Subject: [PATCH 51/93] Reverting the DPP to the original version, going to create a new simplified version for CMI in private. --- .../qscripts/DataProcessingPipeline.scala | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index c21db30ce..56f6460fb 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,7 +96,6 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS - val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -166,15 +165,12 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - val extension = bam.toString.substring(bam.toString.length - 4) - - - - val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") - val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") + // first revert the BAM file to the original qualities + val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") + val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -448,7 +444,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -456,7 +452,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From 322ea1262c29d0b125cd69844a5abd15ef88928b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:13:50 -0400 Subject: [PATCH 52/93] First implementation of a generic 'bundled' Data Processing Pipeline for germline and cancer. not ready for prime time yet! --- .../src/org/broadinstitute/sting/queue/util/QScriptUtils.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 1529d9951..f684e533f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -57,7 +57,8 @@ object QScriptUtils { for (file <- fromFile(in).getLines()) if (!file.startsWith("#") && !file.isEmpty ) list :+= new File(file.trim()) - list.sortWith(_.compareTo(_) < 0) +// list.sortWith(_.compareTo(_) < 0) + list } /** From 658f3551712aa5e2f8cdc0ba78458c685a900a65 Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Wed, 3 Oct 2012 16:25:34 -0400 Subject: [PATCH 59/93] initial cancer pipeline with mutations and partial indel support --- .../queue/extensions/cancer/MuTect.scala | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala new file mode 100644 index 000000000..623d397d4 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -0,0 +1,378 @@ +package org.broadinstitute.sting.queue.extensions.cancer + +import java.io.File +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.commandline.Gather +import org.broadinstitute.sting.commandline.Input +import org.broadinstitute.sting.commandline.Output +import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction +import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} + +class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { + analysisName = "MuTect" + analysis_type = "MuTect" + scatterClass = classOf[LocusScatterFunction] + + /** used for debugging, basically exit as soon as we get the reads */ + @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") + var noop: Boolean = _ + + /** add many additional columns of statistics to the output file */ + @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") + var enable_extended_output: Boolean = _ + + /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ + @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") + var artifact_detection_mode: Boolean = _ + + /** name to use for tumor in output files */ + @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") + var tumor_sample_name: String = _ + + /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ + @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") + var bam_tumor_sample_name: String = _ + + /** name to use for normal in output files */ + @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") + var normal_sample_name: String = _ + + /** force output for each site */ + @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") + var force_output: Boolean = _ + + /** force output for all alleles at each site */ + @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") + var force_alleles: Boolean = _ + + /** Initial LOD threshold for calling tumor variant */ + @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var initial_tumor_lod: Option[Float] = None + + /** Format string for initial_tumor_lod */ + @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") + var initial_tumor_lodFormat: String = "%s" + + /** LOD threshold for calling tumor variant */ + @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var tumor_lod: Option[Float] = None + + /** Format string for tumor_lod */ + @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") + var tumor_lodFormat: String = "%s" + + /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ + @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") + var fraction_contamination: Option[Float] = None + + /** Format string for fraction_contamination */ + @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") + var fraction_contaminationFormat: String = "%s" + + /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ + @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fraction: Option[Float] = None + + /** Format string for minimum_mutation_cell_fraction */ + @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fractionFormat: String = "%s" + + /** LOD threshold for calling normal non-germline */ + @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") + var normal_lod: Option[Float] = None + + /** Format string for normal_lod */ + @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") + var normal_lodFormat: String = "%s" + + /** LOD threshold for calling normal non-variant */ + @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") + var normal_artifact_lod: Option[Float] = None + + /** Format string for normal_artifact_lod */ + @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") + var normal_artifact_lodFormat: String = "%s" + + /** LOD threshold for calling strand bias */ + @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_lod: Option[Float] = None + + /** Format string for strand_artifact_lod */ + @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") + var strand_artifact_lodFormat: String = "%s" + + /** power threshold for calling strand bias */ + @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_power_threshold: Option[Float] = None + + /** Format string for strand_artifact_power_threshold */ + @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") + var strand_artifact_power_thresholdFormat: String = "%s" + + /** LOD threshold for calling normal non-variant at dbsnp sites */ + @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lod: Option[Float] = None + + /** Format string for dbsnp_normal_lod */ + @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lodFormat: String = "%s" + + /** Power threshold for normal to determine germline vs variant */ + @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_threshold: Option[Float] = None + + /** Format string for somatic_classification_normal_power_threshold */ + @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_thresholdFormat: String = "%s" + + /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ + @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fraction: Option[Float] = None + + /** Format string for minimum_normal_allele_fraction */ + @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fractionFormat: String = "%s" + + /** for computational efficiency, reject sites with allelic fraction below this threshold */ + @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") + var tumor_f_pretest: Option[Float] = None + + /** Format string for tumor_f_pretest */ + @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") + var tumor_f_pretestFormat: String = "%s" + + /** threshold for minimum base quality score */ + @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") + var min_qscore: Option[Int] = None + + /** how many gapped events (ins/del) are allowed in proximity to this candidate */ + @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") + var gap_events_threshold: Option[Int] = None + + /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ + @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fraction: Option[Float] = None + + /** Format string for heavily_clipped_read_fraction */ + @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fractionFormat: String = "%s" + + /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ + @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_threshold: Option[Float] = None + + /** Format string for clipping_bias_pvalue_threshold */ + @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_thresholdFormat: String = "%s" + + /** threshold for determining if there is relatedness between the alt and ref allele read piles */ + @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") + var fraction_mapq0_threshold: Option[Float] = None + + /** Format string for fraction_mapq0_threshold */ + @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") + var fraction_mapq0_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact median */ + @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") + var pir_median_threshold: Option[Double] = None + + /** Format string for pir_median_threshold */ + @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") + var pir_median_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact MAD */ + @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") + var pir_mad_threshold: Option[Double] = None + + /** Format string for pir_mad_threshold */ + @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") + var pir_mad_thresholdFormat: String = "%s" + + /** required minimum value for tumor alt allele maximum mapping quality score */ + @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") + var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None + + /** threshold for maximum alternate allele counts in normal */ + @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_count: Option[Int] = None + + /** threshold for maximum alternate allele quality score sum in normal */ + @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None + + /** threshold for maximum alternate allele fraction in normal */ + @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fraction: Option[Double] = None + + /** Format string for max_alt_allele_in_normal_fraction */ + @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fractionFormat: String = "%s" + + /** Phred scale quality score constant to use in power calculations */ + @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_qscore: Option[Int] = None + + /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ + @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") + var absolute_copy_number_data: File = _ + + /** Allelic fraction constant to use in power calculations */ + @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_af: Option[Double] = None + + /** Format string for power_constant_af */ + @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") + var power_constant_afFormat: String = "%s" + + /** Call-stats output */ + @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** + * Short name of out + * @return Short name of out + */ + def o = this.out + + /** + * Short name of out + * @param value Short name of out + */ + def o_=(value: File) { this.out = value } + + /** VCF file of DBSNP information */ + @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") + var dbsnp: Seq[File] = Nil + + /** Dependencies on any indexes of dbsnp */ + @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") + private var dbsnpIndexes: Seq[File] = Nil + + /** VCF file of COSMIC sites */ + @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") + var cosmic: Seq[File] = Nil + + /** Dependencies on any indexes of cosmic */ + @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") + private var cosmicIndexes: Seq[File] = Nil + + /** VCF file of sites observed in normal */ + @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") + var normal_panel: Seq[File] = Nil + + /** Dependencies on any indexes of normal_panel */ + @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") + private var normal_panelIndexes: Seq[File] = Nil + + /** write out coverage in WIGGLE format to this file */ + @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_file: File = _ + + /** + * Short name of coverage_file + * @return Short name of coverage_file + */ + def cov = this.coverage_file + + /** + * Short name of coverage_file + * @param value Short name of coverage_file + */ + def cov_=(value: File) { this.coverage_file = value } + + /** write out 20x of Q20 coverage in WIGGLE format to this file */ + @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_20_q20_file: File = _ + + /** + * Short name of coverage_20_q20_file + * @return Short name of coverage_20_q20_file + */ + def cov_q20 = this.coverage_20_q20_file + + /** + * Short name of coverage_20_q20_file + * @param value Short name of coverage_20_q20_file + */ + def cov_q20_=(value: File) { this.coverage_20_q20_file = value } + + /** write out power in WIGGLE format to this file */ + @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var power_file: File = _ + + /** + * Short name of power_file + * @return Short name of power_file + */ + def pow = this.power_file + + /** + * Short name of power_file + * @param value Short name of power_file + */ + def pow_=(value: File) { this.power_file = value } + + /** write out tumor read depth in WIGGLE format to this file */ + @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var tumor_depth_file: File = _ + + /** + * Short name of tumor_depth_file + * @return Short name of tumor_depth_file + */ + def tdf = this.tumor_depth_file + + /** + * Short name of tumor_depth_file + * @param value Short name of tumor_depth_file + */ + def tdf_=(value: File) { this.tumor_depth_file = value } + + /** write out normal read depth in WIGGLE format to this file */ + @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var normal_depth_file: File = _ + + /** + * Short name of normal_depth_file + * @return Short name of normal_depth_file + */ + def ndf = this.normal_depth_file + + /** + * Short name of normal_depth_file + * @param value Short name of normal_depth_file + */ + def ndf_=(value: File) { this.normal_depth_file = value } + + /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ + @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") + var filter_mismatching_base_and_quals: Boolean = _ + + /** + * Short name of filter_mismatching_base_and_quals + * @return Short name of filter_mismatching_base_and_quals + */ + def filterMBQ = this.filter_mismatching_base_and_quals + + /** + * Short name of filter_mismatching_base_and_quals + * @param value Short name of filter_mismatching_base_and_quals + */ + def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } + + override def freezeFieldValues() { + super.freezeFieldValues() + dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + } + + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") +} From 22b79fb4dda0c75d9c2a868bc2c0f0b8daa7f504 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 11 Oct 2012 11:09:49 -0400 Subject: [PATCH 61/93] Resolve [DEV-7]: add single-sample VCF calling at end of FASTQ-BAM pipeline. Initial steps of [DEV-4]: queue extensions for Picard QC metrics --- .../picard/CalculateHsMetrics.scala | 60 +++++++++++++++++++ .../picard/CollectGcBiasMetrics.scala | 32 ++++++++++ .../picard/CollectMultipleMetrics.scala | 36 +++++++++++ 3 files changed, 128 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala new file mode 100644 index 000000000..75e9300dc --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/9/12 + * Time: 5:59 PM + * To change this template use File | Settings | File Templates. + */ +class CalculateHsMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "CalculateHsMetrics" + javaMainClass = "net.sf.picard.sam.CalculateHsMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Interval list with targets", shortName = "targets", fullName = "target_list", required = true) + var targets: File = _ + + @Argument(doc="Interval list with baits", shortName = "baits", fullName = "bait_list", required = true) + var baits: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + /* + @Argument(doc = "Maximum number of file handles to keep open when spilling read ends to disk. Set this number a little lower than the per-process maximum number of file that may be open. This number can be found by executing the 'ulimit -n' command on a Unix system.", shortName = "max_file_handles", fullName ="max_file_handles_for_read_ends_maps", required=false) + var MAX_FILE_HANDLES_FOR_READ_ENDS_MAP: Int = -1; + + @Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number.", shortName = "sorting_ratio", fullName = "sorting_collection_size_ratio", required = false) + var SORTING_COLLECTION_SIZE_RATIO: Double = -1 + */ + override def freezeFieldValues() { + super.freezeFieldValues() +// if (outputIndex == null && output != null) + // outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + + val level = "SAMPLE" + + override def inputBams = input + override def outputBam = output + //this.sortOrder = null + //this.createIndex = Some(true) + override def commandLine = super.commandLine + + required("BAIT_INTERVALS=" + baits) + + required("TARGET_INTERVALS=" + targets) + + required("REFERENCE_SEQUENCE=" + reference) + + optional("METRIC_ACCUMULATION_LEVEL="+level)/*+ + conditional(REMOVE_DUPLICATES, "REMOVE_DUPLICATES=true") + + conditional(MAX_FILE_HANDLES_FOR_READ_ENDS_MAP > 0, "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=" + MAX_FILE_HANDLES_FOR_READ_ENDS_MAP.toString) + + conditional(SORTING_COLLECTION_SIZE_RATIO > 0, "SORTING_COLLECTION_SIZE_RATIO=" + SORTING_COLLECTION_SIZE_RATIO.toString) */ + + +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala new file mode 100644 index 000000000..de2b0af9e --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala @@ -0,0 +1,32 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/10/12 + * Time: 10:37 AM + * To change this template use File | Settings | File Templates. + */ +class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "CalculateGcMetrics" + javaMainClass = "net.sf.picard.sam.CalculateGcMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + + override def inputBams = input + override def outputBam = output + override def commandLine = super.commandLine + + required("CHART_OUTPUT=" + output+".pdf") + + required("REFERENCE_SEQUENCE=" + reference) + + required("ASSUME_SORTED=true") +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala new file mode 100644 index 000000000..a9af4e858 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/10/12 + * Time: 10:37 AM + * To change this template use File | Settings | File Templates. + */ +class CollectMultipleMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction{ + analysisName = "CalculateMultipleMetrics" + javaMainClass = "net.sf.picard.sam.CalculateMultipleMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + + override def inputBams = input + override def outputBam = output + override def commandLine = super.commandLine + + required("REFERENCE_SEQUENCE=" + reference) + + required("ASSUME_SORTED=true") + + required("PROGRAM=QualityScoreDistribution") + + required("PROGRAM=MeanQualityByCycle") + + required("PROGRAM=CollectAlignmentSummaryMetrics" ) + + +} From dad7ca281eaae6ba1318e295d1ad9cac8ef732ae Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Fri, 12 Oct 2012 14:18:12 -0400 Subject: [PATCH 63/93] upgraded mutation caller with VCF output raw indel calls (non filtered,non vcf) --- .../queue/extensions/cancer/MuTect.scala | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala index 623d397d4..1193e7dec 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -6,7 +6,7 @@ import org.broadinstitute.sting.commandline.Gather import org.broadinstitute.sting.commandline.Input import org.broadinstitute.sting.commandline.Output import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction -import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} +import org.broadinstitute.sting.queue.extensions.gatk.{TaggedFile, VcfGatherFunction, LocusScatterFunction} class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { analysisName = "MuTect" @@ -45,6 +45,10 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") var force_alleles: Boolean = _ + /** only emit passing calls */ + @Argument(fullName="only_passing_calls", shortName="", doc="only emit passing calls", required=false, exclusiveOf="", validation="") + var only_passing_calls: Boolean = _ + /** Initial LOD threshold for calling tumor variant */ @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") var initial_tumor_lod: Option[Float] = None @@ -242,6 +246,28 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG */ def o_=(value: File) { this.out = value } + /** VCF output of mutation candidates */ + @Output(fullName="vcf", shortName="vcf", doc="VCF output of mutation candidates", required=false, exclusiveOf="", validation="") + @Gather(classOf[VcfGatherFunction]) + var vcf: File = _ + + /** Automatically generated index for vcf */ + @Output(fullName="vcfIndex", shortName="", doc="Automatically generated index for vcf", required=false, exclusiveOf="", validation="") + @Gather(enabled=false) + private var vcfIndex: File = _ + + /** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */ + @Argument(fullName="no_cmdline_in_header", shortName="no_cmdline_in_header", doc="Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required=false, exclusiveOf="", validation="") + var no_cmdline_in_header: Boolean = _ + + /** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */ + @Argument(fullName="sites_only", shortName="sites_only", doc="Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required=false, exclusiveOf="", validation="") + var sites_only: Boolean = _ + + /** force BCF output, regardless of the file's extension */ + @Argument(fullName="bcf", shortName="bcf", doc="force BCF output, regardless of the file's extension", required=false, exclusiveOf="", validation="") + var bcf: Boolean = _ + /** VCF file of DBSNP information */ @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") var dbsnp: Seq[File] = Nil @@ -369,10 +395,13 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG override def freezeFieldValues() { super.freezeFieldValues() + if (vcf != null && !org.broadinstitute.sting.utils.io.IOUtils.isSpecialFile(vcf)) + if (!org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor.isCompressed(vcf.getPath)) + vcfIndex = new File(vcf.getPath + ".idx") dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) } - override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + conditional(only_passing_calls, "--only_passing_calls", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + optional("-vcf", vcf, spaceSeparated=true, escape=true, format="%s") + conditional(no_cmdline_in_header, "-no_cmdline_in_header", escape=true, format="%s") + conditional(sites_only, "-sites_only", escape=true, format="%s") + conditional(bcf, "-bcf", escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") } From a234bacb02ee401efb493403e8afcd6b789fec4a Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 10 Oct 2012 15:00:17 -0400 Subject: [PATCH 65/93] Making nContigs parameter hidden in ReduceReads For now, the het reduction should only be performed for diploids (n=2). We haven't really tested it for other ploidy so it should remain hidden until someone braves it out. --- .../compression/reducereads/ReduceReads.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 1b3e68647..5810bc94f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -181,15 +181,6 @@ public class ReduceReads extends ReadWalker, ReduceRea @Argument(fullName = "minimum_del_proportion_to_trigger_variant", shortName = "mindel", doc = "", required = false) private double minIndelProportionToTriggerVariant = 0.05; - /** - * Minimum proportion of indels in a site to trigger a variant region. Anything below this will be - * considered consensus. - */ - @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false) - private int nContigs = 2; - - - /** * Downsamples the coverage of a variable region approximately (guarantees the minimum to be equal to this). * A value of 0 turns downsampling off. @@ -197,6 +188,14 @@ public class ReduceReads extends ReadWalker, ReduceRea @Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false) private int downsampleCoverage = 250; + /** + * Number of chromossomes in the sample (this is used for the polyploid consensus compression). Only + * tested for humans (or organisms with n=2). Use at your own risk! + */ + @Hidden + @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false) + private int nContigs = 2; + @Hidden @Argument(fullName = "", shortName = "dl", doc = "", required = false) private int debugLevel = 0; From 80d92e0c636a58b9cfc948ab42f650a54955ba6d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 12 Oct 2012 13:50:10 -0400 Subject: [PATCH 66/93] Allowing the GATK to have non-required outputs Modified the SAMFileWriterArgumentTypeDescriptor to accept output bam files that are null if they're not required (in the @Output annotation). This change enables the nWayOut parameter for the IndeRealigner and ReduceReads to operate optionally while maintaining the original single way out. [#DEV-10 transition:31 resolution:1] --- .../SAMFileWriterArgumentTypeDescriptor.java | 36 +++++++++---------- .../gatk/walkers/indels/IndelRealigner.java | 4 +-- .../indels/IndelRealignerIntegrationTest.java | 10 ++++++ 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 8566f6c63..dcf2704f5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -124,32 +124,28 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default - if(writerFileName == null) { - if(!source.isRequired()) - throw new MissingArgumentValueException(bamArgumentDefinition); - if(generateMD5) + if(writerFileName == null && generateMD5) throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside."); - } // Create the stub and set parameters. - SAMFileWriterStub stub; - if ( writerFileName != null ) + SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream); + + if ( writerFileName != null ) { stub = new SAMFileWriterStub(engine, new File(writerFileName)); - else - stub = new SAMFileWriterStub(engine, defaultOutputStream); - if ( compressionLevel != null ) - stub.setCompressionLevel(compressionLevel); - if ( indexOnTheFly ) - stub.setIndexOnTheFly(indexOnTheFly); - if ( generateMD5 ) - stub.setGenerateMD5(generateMD5); - if ( simplifyBAM ) - stub.setSimplifyBAM(simplifyBAM); + if ( compressionLevel != null ) + stub.setCompressionLevel(compressionLevel); + if ( indexOnTheFly ) + stub.setIndexOnTheFly(indexOnTheFly); + if ( generateMD5 ) + stub.setGenerateMD5(generateMD5); + if ( simplifyBAM ) + stub.setSimplifyBAM(simplifyBAM); - // WARNING: Side effects required by engine! - parsingEngine.addTags(stub,getArgumentTags(matches)); - engine.addOutput(stub); + // WARNING: Side effects required by engine! + parsingEngine.addTags(stub,getArgumentTags(matches)); + engine.addOutput(stub); + } return stub; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 76d8d85c2..998894fbf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -370,8 +370,6 @@ public class IndelRealigner extends ReadWalker { currentInterval = intervals.hasNext() ? intervals.next() : null; - writerToUse = writer; - if ( N_WAY_OUT != null ) { boolean createIndex = true; @@ -383,9 +381,9 @@ public class IndelRealigner extends ReadWalker { createIndex, generateMD5s,createProgramRecord(),KEEP_ALL_PG_RECORDS); } } else { - // set up the output writer setupWriter(getToolkit().getSAMFileHeader()); + writerToUse = writer; } manager = new ConstrainedMateFixingManager(writerToUse, getToolkit().getGenomeLocParser(), MAX_ISIZE_FOR_MOVEMENT, MAX_POS_MOVE_ALLOWED, MAX_RECORDS_IN_MEMORY); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 040845828..9b464cfec 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -113,4 +113,14 @@ public class IndelRealignerIntegrationTest extends WalkerTest { executeTest(String.format("realigner [%s]", entry.getKey()), spec); } } + + @Test + public void testNWayOut() { + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommandPrefix + " -nWayOut .clean.bam ", + 1, + Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")); + executeTest("test realigner nWayOut", spec1); + } + } From 69194e50322ac5f889ab6621aa29f7f24f42f24f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 15 Oct 2012 13:24:58 -0400 Subject: [PATCH 71/93] Adding intellij example files to the repo --- intellij_example.tar.bz2 | Bin 0 -> 7520 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 intellij_example.tar.bz2 diff --git a/intellij_example.tar.bz2 b/intellij_example.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..bce16045cd1cc476305c5e59d07ff9b94b8e5d73 GIT binary patch literal 7520 zcmV-m9iQStT4*^jL0KkKS)7!cJOD4F|M~yi5CDHu|NsC0|M36+|L{No06+`?06+jh z1{h!`o8{-7h4qB5wR>^y>D#MWyS_S4UPrYW)`~XXKBY)=w?n(T?w=>u4%}F@T^prq zM&}*98+P}1CC>JGY3}jFJo}ytO>BKWz24nKq=%-|-tIdTv1Z%Ou|py0B#;mw z0$?VFLqIe|HdE0LQ`8zCr9V{lAE^M-CYlc@B!L1ZL{xsM>S#SrDYXw!Hq`?_27mwn z0006)At_HHdU~eyLFyU;NYu?W zO%G7?YBUW!LqkS@9-*KBcwhGq&L2>c7{2Bv%KPh~XqovPD`P$wqr0wA{OX>F7ZKmy zDsL?$v;dI_RDWe46~zQU)lvw>NPvKQ{r?Z1(}ww2_)M*fQwCl2HJP1e>Ri?dCZpMN zP=M?SA|wk1NPtvh=J3} zDw+~D0aFk#a8yBk1VvNe{^Q2@ATLqre5hlqO+ zJH6U_g@n*il^HZ-l$Xlw^mBWD_ji=uWfv%71y1==(-ozyjYp|LKHs?f9BLgry=7~; zQzc6vctrO5Ohh{&v$XFjif1`0L{s*Olu^?q8x}#rqzyaoVB#B4g(81Jx};qlYMjj? z2dYy`yjW&t6?wYx0N9Uh!RRNLa}rLdMi`DPNP-P&$TYbGk_IqjZvxK0qQ|cbe@$>KA+XQo15<95gH&cGF>(8JUi?q#;jjQhX7>?$D_)jV*fSGa99hISqAgCirHOBRlz z5-9I|sR$Ai`Yfa3X$-2y?KsEHOGX z@HXt%cKK^%iU{@ItVyi`Mvy~M#VuYFejK)2%tD1XPotWR1C6g7^>_3=M{GBTR=x6} zL$ie9VdKIYD#p_7X2Ij5c!ACsYkPJWj&|WWp}5G;)cO~hDhd@kB;Qp{=JQuL+IA<+ z9G6K-0B_4%_wIgYeR?@~V~!zKJsytphBh&$h7$B()y02uDWc?iHKJ3Oou2*+MfMx^=gIf{X3|lWppAz z-I7p3R)QX!Idb;|10w?J`_#t4+9oPM5Ep6hKa7!-%b*Z%Ne=e{WH2)WCKzJBSF^U& zK`^`eM3i5TFVExdQsTLk;G@qq9{>}@N65UeiIk4 z@~k{rAI=Uv%!sgaYY$=o+l0!JKSDr9R$?^eCXzi64~|&0Ce%+awpg_DqW1T;JnT{d zru{VON@j0;c#A;BkSsr#?|iam4$XGUx$RTb)H_pMY<00{Yt{q!{n-}(G#x&{RZq!9 z?#T+_N>%`kP|_a@fmo2-OJrXX!d!y~G&Dn3dYM^vWSZJ!#jw;(nuR5VhC?^k`y8?h4xtX)+j?PU}@iN>ljD+(Qmb z8dniBZ(_g=D1mLVH)}(M5H0vcNC?Cv1(;-)C)j(xXY2nS+gLe%zZ*W^p1!|_zv=O} z_Q!+DzkQ#@*8b@JY?61J{_8LQ7Wa7yo_=DdhrF->(?ZCJE#uL2ybpcSMKqaVBwSv) zNU6k^_VtahV8JC~!U6{d!bq&!4p|_$D-~)<6%s3$x=9Tg-hN~ZTTL>hPndt*gp7cg z+wR+roDg$1Hu?=(-&o%#Ayf15RN!Z%05j!NTZ&r35#?&$A)o_Kq_+aG2Tmv-^Ti}kzPUMz}gW$fs9 zJC4FgjTw|ACp)ypVq7XolvK9GOTU706q(I(#Eu(UtE5jUBYT&==_1{Y?P%VlGrzMb zBE|_o9do|DW~w@PU%$xNzAis=G|wxF)AvA^EC+4@aDj~7z-#+Yu_mAdbYOG_n;S_f z2MF%vrdGcJHml~P2xvLQOIQJlnTe)5Z8JtNk{R2)nVMB@y3m5+0?^4LZPW(|mtd*? zDv;N1W_Rb+ea4+?lrAx`155T=*|7;B>>wiy;Ml)7#VrgvNDTg;;hzJ_HNI$RTTqf4 zfyMeU1R+7uY;dsXgq*(jmUH&D>*C?4NtOzLb-!$s%bXb_Sue0mN5$wkpEM3kv8IvGA=_?Zzv1%_C1tsyl^1@RKH{a zciZw1@6q0f3Zxbb;ED{RY}JFbgv`jo;vnjTP++40u_0!9X$&Jll(K*-u?XPE#-34@ z(2*iq168t)2LIl%)odnH8EI_Yl*{iaXWjeIC?DX z@F63H#+Z$15ur99k-Q~?3g|8Zc=x8AR4D_lS&{5(Wb(9OWn~6P1uWA@lOmW&8qFIU z7SBN;v4-l*AW>A(2|!UvMJG5MwJuL6M+i`eC_Z*#ky3?khBR#txTdfRi~`O|X#<>q zay;fL4M@CAa10!#6DOq2oZBZ*`QRkS_ry;K+IvB5vHgUgDqb| z3@cbx6TJE0H(;83Aleb%_>debXaSWl^D1it&Z*F@y=N{NvpYj~%SKvwWbjz}(2QH5 zzP2fmjwex!Q0DDKn@@wk;d+8XInO!CB-!he7tkDYlzvB5io3y3~eaI5w=k}~_n&-pi{RD^=|p|INIK%M7^Ju>U^q=8=L%d;bA7r& zav%4cJ$b-?7%3m*P6<7LL;|e%;!{W=n0z(DI%^o;Bei5F?M_70?60j?x99A# z3qM)=cXkVG2kmpPJ8oP#mdXvhiT3f-MRyJ&15Gf(d7`7!iiTN|-|lq+clXLmg&CS~L$9jF&hP0Bm8xWJJK=$OMFB>@>4P6z#Xn3dcffp+LAC z!`V2YWzCFixsCZ_7L;F_QL9T?&p+$c)*hPKS8gSO~VTFj|ro3G{Iuof@swuwn#16JUnNC5#N!PbpVLE zvff%F#RxrAU($v^EeV!2DVBm~xc6xi-$OudTP+k6K2v02fE5E45$L!=gi;#?e7UiT zMhuXoWDk1Yu>gn#4)=DU3)lONdeFJ7$d8PZ2g9kss?ltvQp=qrImnG5r+V3zF4fd( z=B;z~@$<2{hrSQ|A=lv6Q712!p!qTbzdD`k(bs>{2Pp3?yqCWFJqT zzsdD8a3Kh21F_vQhkQ6nV4J6$yI@VnB-^q@1Poy1V4pzHT>!|B!-$_&1xU~WX+}BS| zI+H9JIpulj0|c`K4h+Jv+i8{#sDpzsY3kK&nf6^uxxRKVpRY}nq>h`5p#2eSW1JCB zoQDl9g0bddzNVPmI`lIIC+n3Z-%)6 z)m1D@3`N7B@Sgj)AR36~a4}*AFBJoK>y5WL$p%MGX_9PAb!p#rxvRNmYjZFf_p^MM zS*F2;S$4gmw~WV5MV$7|80zk2IN^|p!(1CUj&QiGuRFLn?P!*xF?*d9(+V$}IXT1} zTha_h(muFR1txBW5H`^S1RVSVru%s#2xv)Akeo8AEDyF+y-8{s&ujwd5cgA z{vZA1IN}ss`1mWy37bexU#GAM?R5hGixGW*YN1F6 zNe^y$@Ot{b0s=j%3iJAec(;rNi2(`#p%tYeftrrHgl%C`y`dwy83`F7-YoY_ox@e$ zL&B&_2$DC2^*`wAL)aSaJF>S~WkZ7Y-Du;z{ zz=U=xW(E3CCn_nOid4kc4!;tEos&Z9t35>kz>s9yLG>gOM({gKT^#n%!cAcT@WPry3G z2V$fF(Bh#$fe>JI^|(v(OqCu~Gq|s3b?7=j5C#Z=5wJo%cpU|9AqFF9LZ;v@MFUXd zs2N){bdrWWIl=TxEeH;2AWyYeEYN~qcFD}6*o@`j5Unt1JJgj_YN^CRhB!bw5Oy*P z1DcHm8GutysIu}&!*1M~s!zPA&Oi}v=#?bEF!B<|)OQu(7P)!*MrromGcLP%0kRC2-f z-Ma&_=fdV8BrOEsXnl*Tw^mqWLHgJM!Kwz2*{PSy3 zu1y0}WCf==J8l%kErz3IvwaFAt0TVM@-$^1MCp&5TKidOUd1p zYf^#|AzIhOeK`tn48Ym)u!~-}_Q+l&S8a}MP zqt<;`1h6{%dl7fl4{{^4=5Z9~GklcT&KW~EA3Cu3&;?mZn=%p-SGX&lf>n(5j&iOY zYQ5cz$?+OcwiV^|gbZ zDl{t&8X}C%;tMPtc~k`|q$FrGfm%YMX(X@VI=uFI0&+ldLE%k6`SmVPkC_HWF>Jjk zUDTu0qvTmpsIKA3Q+lPhpdN}HV1^2u*J2-*FWMiW;5$l`j~ao*gwCP$RM5i#_C_Ex zH)zO<;CO6Q=Wwgery}Pl$N)(r*fN-s03O~z4|h@!W6)yYC(`*Uf$0mj(1PL>cZ3#Z z4TbDihB0%-2YpbP;Si{_nnei&VL3_(rX}Qwue;<9@ZzbMbgFgWaWzMX7*HysutU{s z6$yK$fP%s@0>K!oAjF959~anz@iKxw9tVi2%a|O!4?k>B;`D(%h5u(vcUQi+mcKd#qyCaVfaiWCOtfxqGP5gZ9C@~hnd`7h<>b=e_+%r-y-BvC8wl_K{AeW^0CX@V5&D!>>QsSU^i-+=i>^k-=!j-OGaIbfVpkOo zov0DlGYvzkWdki>W|%{31zZ$_CjdJbXwoesiiTpR&LP}<$G(Ags#wwbgBY)fP=_5T zQ4Xh%0B4=sfb<{^vtZvf!~pRs?Q2_tCyTUQVcvR>^r#ARkO%Guv=4>qj_B@o4jqbM zt7aXvQ8X%2q25s!VhE5U6mV4RggF8x`vC+b>2V@PWF#OH>&BrAM934Jfz;}KW^E!s zf_sHg^L}8s>!NOo>yjx%-@oa32x!;2(Ek5!w#C`ezx9s#*X(t`x1;-X6+ z7Tr_H3_VZlT{4sbK3ug=pGr!XjFT71U-2y+_{;q+T4bJi(b= zap}Jvh}lP|*qy&ou8QcP>pMaQngHZp=&Vk$3eem_f*S=$R8;1V4~ZCnf}#?r8iXVQ z0j{X_oxDt(h>0TcAk-a!;E`5M2WU_pz^PFx0iuHO=~{qY8>t!!C^m$obS{f0N*n5P z90a#3kBG3kQg+Y~XP`YbFs-J>`*$M_gdyA-C_|>(hkfw|+l<+w zf}KDpwJ=nwn0Cr`@CL~cY#kjD!*FzDfO#Xz58x^D4{2FCkP+ji120 z`AK?g`I(q&&XIvsdP8c|-6RwyD9PaUmZHX{h)-f%GL}>`AdodPFC>U>q}W4M$7Yrv zXn_GT_PIBf+CkyE8OEjAE9XU38OMGBuvkEl2#MCGQP6b&cqsd};rC<;+96%VZpibB za5i6A1WKt|DOYLN0p46>D$r5;!~>XE;UL^8L$+U03KV+NtWE_Trs&B5hj{M;05y_z( zIB@{aYt}fs#2_Ui8xH+<1|!JJl0XMS2m}_23jO|IA2?LhL%8il_4O)!cmxPzpqfGY zc__a@$5HV6afV(GYNQppH5ZI_4QL$Cz*M(x41|T~>WW%5dc!&O=BaR(D8S87Uc(Wb z4GAcqP$MD;MhMW2k||&T_-I1Sc7quRcUP6)AC5{qas)meG;bs!BEU|AAWLmX=fy+6 z$p=H}qdcCy^{S?BA;kyA^aH=II~+xSl}Oz`ntcF$U;6-Xk#{x0N-aG@bNDK>cMb(tan literal 0 HcmV?d00001 From 213cc00abe2207398d9b5cca168b78bf0edf6434 Mon Sep 17 00:00:00 2001 From: kshakir Date: Mon, 15 Oct 2012 15:03:33 -0400 Subject: [PATCH 72/93] Refactored argument matching to support other plugins in addition to file lists. Added plugin support for sending Queue status messages. Argument parsing can store subclasses of java.io.File, for example RemoteFile. --- ivy.xml | 4 +- .../sting/commandline/ArgumentMatch.java | 28 ++--- .../commandline/ArgumentMatchFileValue.java | 27 +++++ .../commandline/ArgumentMatchSource.java | 42 ++++---- .../commandline/ArgumentMatchSourceType.java | 4 +- .../commandline/ArgumentMatchStringValue.java | 24 +++++ .../sting/commandline/ArgumentMatchValue.java | 18 ++++ .../commandline/ArgumentTypeDescriptor.java | 34 +++--- .../sting/commandline/CommandLineProgram.java | 2 +- .../sting/commandline/ParsedArgs.java | 13 +++ .../sting/commandline/ParsedListArgs.java | 30 ++++++ .../sting/commandline/ParsingEngine.java | 102 +++++++++++------- .../ParsingEngineArgumentFiles.java | 30 ++++++ .../ParsingEngineArgumentProvider.java | 12 +++ .../OutputStreamArgumentTypeDescriptor.java | 2 +- .../SAMFileReaderArgumentTypeDescriptor.java | 12 +-- .../SAMFileWriterArgumentTypeDescriptor.java | 13 ++- .../VCFWriterArgumentTypeDescriptor.java | 6 +- .../sting/utils/help/HelpFormatter.java | 18 ++-- .../ArgumentMatchSiteUnitTest.java | 2 +- .../ArgumentMatchSourceUnitTest.java | 16 +-- .../sting/queue/QCommandLine.scala | 64 ++++++++--- .../broadinstitute/sting/queue/QScript.scala | 31 +++++- .../sting/queue/engine/QStatusMessenger.scala | 10 ++ .../queue/extensions/gatk/GATKIntervals.scala | 2 +- .../sting/queue/util/RemoteFile.scala | 13 +++ 26 files changed, 409 insertions(+), 150 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchFileValue.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchStringValue.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchValue.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ParsedArgs.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ParsedListArgs.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentFiles.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentProvider.java create mode 100644 public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala diff --git a/ivy.xml b/ivy.xml index 0761cb411..1e3346ff5 100644 --- a/ivy.xml +++ b/ivy.xml @@ -78,8 +78,8 @@ - - + + diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java index c0823e5c5..6c8fb1f4d 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java @@ -46,7 +46,7 @@ public class ArgumentMatch implements Iterable { /** * Maps indices of command line arguments to values paired with that argument. */ - public final SortedMap> sites = new TreeMap>(); + public final SortedMap> sites = new TreeMap>(); /** * An ordered, freeform collection of tags. @@ -90,11 +90,11 @@ public class ArgumentMatch implements Iterable { * @param value Value for the argument at this position. * @param tags ordered freeform text tags associated with this argument. */ - private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final String value, final Tags tags) { + private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final ArgumentMatchValue value, final Tags tags) { this.label = label; this.definition = definition; - ArrayList values = new ArrayList(); + ArrayList values = new ArrayList(); if( value != null ) values.add(value); sites.put(site,values ); @@ -131,11 +131,11 @@ public class ArgumentMatch implements Iterable { */ @SuppressWarnings("unchecked") ArgumentMatch transform(Multiplexer multiplexer, Object key) { - SortedMap> newIndices = new TreeMap>(); - for(Map.Entry> site: sites.entrySet()) { - List newEntries = new ArrayList(); - for(String entry: site.getValue()) - newEntries.add(multiplexer.transformArgument(key,entry)); + SortedMap> newIndices = new TreeMap>(); + for(Map.Entry> site: sites.entrySet()) { + List newEntries = new ArrayList(); + for(ArgumentMatchValue entry: site.getValue()) + newEntries.add(new ArgumentMatchStringValue(multiplexer.transformArgument(key,entry.asString()))); newIndices.put(site.getKey(),newEntries); } ArgumentMatch newArgumentMatch = new ArgumentMatch(label,definition); @@ -165,7 +165,7 @@ public class ArgumentMatch implements Iterable { /** * Iterate over each available token. */ - private Iterator tokenIterator = null; + private Iterator tokenIterator = null; /** * The next site to return. Null if none remain. @@ -175,7 +175,7 @@ public class ArgumentMatch implements Iterable { /** * The next token to return. Null if none remain. */ - String nextToken = null; + ArgumentMatchValue nextToken = null; { siteIterator = sites.keySet().iterator(); @@ -254,9 +254,9 @@ public class ArgumentMatch implements Iterable { * @param site site of the command-line argument to which this value is mated. * @param value Text representation of value to add. */ - public void addValue( ArgumentMatchSite site, String value ) { + public void addValue( ArgumentMatchSite site, ArgumentMatchValue value ) { if( !sites.containsKey(site) || sites.get(site) == null ) - sites.put(site, new ArrayList() ); + sites.put(site, new ArrayList() ); sites.get(site).add(value); } @@ -275,8 +275,8 @@ public class ArgumentMatch implements Iterable { * Return the values associated with this argument match. * @return A collection of the string representation of these value. */ - public List values() { - List values = new ArrayList(); + public List values() { + List values = new ArrayList(); for( ArgumentMatchSite site: sites.keySet() ) { if( sites.get(site) != null ) values.addAll(sites.get(site)); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchFileValue.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchFileValue.java new file mode 100644 index 000000000..344b6829a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchFileValue.java @@ -0,0 +1,27 @@ +package org.broadinstitute.sting.commandline; + +import java.io.File; + +/** + * Holds a reference to a file as an argument match value. + * + * This is useful when the type of the stored file may be a subclass of java.io.File, + * for example a Queue RemoteFile. + */ +public class ArgumentMatchFileValue extends ArgumentMatchValue { + private final File file; + + public ArgumentMatchFileValue(File file) { + this.file = file; + } + + @Override + public String asString() { + return file == null ? null : file.getAbsolutePath(); + } + + @Override + public File asFile() { + return file; + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java index ed2700006..9dfb3afbe 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java @@ -24,38 +24,36 @@ package org.broadinstitute.sting.commandline; -import java.io.File; - /** - * Where an argument match originated, via the commandline or a file. + * Where an argument match originated, via the commandline or a custom provider. */ public class ArgumentMatchSource implements Comparable { public static final ArgumentMatchSource COMMAND_LINE = new ArgumentMatchSource(ArgumentMatchSourceType.CommandLine, null); private final ArgumentMatchSourceType type; - private final File file; + private final String description; /** * Creates an argument match source from the specified file. - * @param file File specifying the arguments. Must not be null. + * @param description Where the arguments originated. */ - public ArgumentMatchSource(File file) { - this(ArgumentMatchSourceType.File, file); + public ArgumentMatchSource(String description) { + this(ArgumentMatchSourceType.Provider, description); } - private ArgumentMatchSource(ArgumentMatchSourceType type, File file) { - if (type == ArgumentMatchSourceType.File && file == null) - throw new IllegalArgumentException("An argument match source of type File cannot have a null file."); + private ArgumentMatchSource(ArgumentMatchSourceType type, String description) { + if (type == ArgumentMatchSourceType.Provider && description == null) + throw new IllegalArgumentException("An argument match source provider cannot have a null description."); this.type = type; - this.file = file; + this.description = description; } public ArgumentMatchSourceType getType() { return type; } - public File getFile() { - return file; + public String getDescription() { + return description; } @Override @@ -65,13 +63,13 @@ public class ArgumentMatchSource implements Comparable { ArgumentMatchSource that = (ArgumentMatchSource) o; - return (type == that.type) && (file == null ? that.file == null : file.equals(that.file)); + return (type == that.type) && (description == null ? that.description == null : description.equals(that.description)); } @Override public int hashCode() { int result = type != null ? type.hashCode() : 0; - result = 31 * result + (file != null ? file.hashCode() : 0); + result = 31 * result + (description != null ? description.hashCode() : 0); return result; } @@ -84,15 +82,15 @@ public class ArgumentMatchSource implements Comparable { if (comp != 0) return comp; - File f1 = this.file; - File f2 = that.file; + String d1 = this.description; + String d2 = that.description; - if ((f1 == null) ^ (f2 == null)) { - // If one of the files is null and the other is not - // put the null file first - return f1 == null ? -1 : 1; + if ((d1 == null) ^ (d2 == null)) { + // If one of the descriptions is null and the other is not + // put the null description first + return d1 == null ? -1 : 1; } - return f1 == null ? 0 : f1.compareTo(f2); + return d1 == null ? 0 : d1.compareTo(d2); } } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java index 3ff6e21d4..118316473 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java @@ -25,8 +25,8 @@ package org.broadinstitute.sting.commandline; /** - * Type of where an argument match originated, via the commandline or a file. + * Type of where an argument match originated, via the commandline or a some other provider. */ public enum ArgumentMatchSourceType { - CommandLine, File + CommandLine, Provider } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchStringValue.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchStringValue.java new file mode 100644 index 000000000..bb2015c3b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchStringValue.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.commandline; + +import java.io.File; + +/** + * Argument values that originated from a string. + */ +public class ArgumentMatchStringValue extends ArgumentMatchValue { + private final String value; + + public ArgumentMatchStringValue(String value) { + this.value = value; + } + + @Override + public String asString() { + return value; + } + + @Override + public File asFile() { + return value == null ? null : new File(value); + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchValue.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchValue.java new file mode 100644 index 000000000..bed4edfa6 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchValue.java @@ -0,0 +1,18 @@ +package org.broadinstitute.sting.commandline; + +import java.io.File; + +/** + * Returns argument values as either strings or values. + */ +public abstract class ArgumentMatchValue { + /** + * @return the value of this argument as a String object. + */ + public abstract String asString(); + + /** + * @return the value of this argument as a File object. + */ + public abstract File asFile(); +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index dd4a151bf..4b9774806 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -215,8 +215,8 @@ public abstract class ArgumentTypeDescriptor { * @param matches The matches for the given argument. * @return The value of the argument if available, or null if not present. */ - protected String getArgumentValue( ArgumentDefinition definition, ArgumentMatches matches ) { - Collection argumentValues = getArgumentValues( definition, matches ); + protected ArgumentMatchValue getArgumentValue( ArgumentDefinition definition, ArgumentMatches matches ) { + Collection argumentValues = getArgumentValues( definition, matches ); if( argumentValues.size() > 1 ) throw new UserException.CommandLineException("Multiple values associated with given definition, but this argument expects only one: " + definition.fullName); return argumentValues.size() > 0 ? argumentValues.iterator().next() : null; @@ -244,8 +244,8 @@ public abstract class ArgumentTypeDescriptor { * @param matches The matches for the given argument. * @return The value of the argument if available, or an empty collection if not present. */ - protected Collection getArgumentValues( ArgumentDefinition definition, ArgumentMatches matches ) { - Collection values = new ArrayList(); + protected Collection getArgumentValues( ArgumentDefinition definition, ArgumentMatches matches ) { + Collection values = new ArrayList(); for( ArgumentMatch match: matches ) { if( match.definition.equals(definition) ) values.addAll(match.values()); @@ -310,7 +310,7 @@ public abstract class ArgumentTypeDescriptor { */ protected Object parseBinding(ArgumentSource source, Type type, ArgumentMatches matches, Tags tags) { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); - String value = getArgumentValue(defaultDefinition, matches); + ArgumentMatchValue value = getArgumentValue(defaultDefinition, matches); @SuppressWarnings("unchecked") Class parameterType = JVMUtils.getParameterizedTypeClass(type); String name = defaultDefinition.fullName; @@ -328,7 +328,7 @@ public abstract class ArgumentTypeDescriptor { * @param fieldName The name of the field that was parsed. Used for error reporting. * @return The newly created binding object of type bindingClass. */ - public static Object parseBinding(String value, Class parameterType, Type bindingClass, + public static Object parseBinding(ArgumentMatchValue value, Class parameterType, Type bindingClass, String bindingName, Tags tags, String fieldName) { try { String tribbleType = null; @@ -337,7 +337,7 @@ public abstract class ArgumentTypeDescriptor { throw new UserException.CommandLineException( String.format("Unexpected number of positional tags for argument %s : %s. " + "Rod bindings only support -X:type and -X:name,type argument styles", - value, fieldName)); + value.asString(), fieldName)); } else if ( tags.getPositionalTags().size() == 2 ) { // -X:name,type style bindingName = tags.getPositionalTags().get(0); @@ -366,7 +366,7 @@ public abstract class ArgumentTypeDescriptor { if ( tribbleType == null ) { // try to determine the file type dynamically - File file = new File(value); + File file = value.asFile(); if ( file.canRead() && file.isFile() ) { FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file); if ( featureDescriptor != null ) { @@ -379,7 +379,7 @@ public abstract class ArgumentTypeDescriptor { // IntervalBinding can be created from a normal String Class rawType = (makeRawTypeIfNecessary(bindingClass)); try { - return rawType.getConstructor(String.class).newInstance(value); + return rawType.getConstructor(String.class).newInstance(value.asString()); } catch (NoSuchMethodException e) { /* ignore */ } @@ -399,14 +399,14 @@ public abstract class ArgumentTypeDescriptor { } Constructor ctor = (makeRawTypeIfNecessary(bindingClass)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); - return ctor.newInstance(parameterType, bindingName, value, tribbleType, tags); + return ctor.newInstance(parameterType, bindingName, value.asString(), tribbleType, tags); } catch (Exception e) { if ( e instanceof UserException ) throw ((UserException)e); else throw new UserException.CommandLineException( String.format("Failed to parse value %s for argument %s. Message: %s", - value, fieldName, e.getMessage())); + value.asString(), fieldName, e.getMessage())); } } } @@ -517,7 +517,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { return true; ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); - String value = getArgumentValue( defaultDefinition, matches ); + ArgumentMatchValue value = getArgumentValue(defaultDefinition, matches); Object result; Tags tags = getArgumentTags(matches); @@ -527,12 +527,12 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { Method valueOf = primitiveToWrapperMap.get(type).getMethod("valueOf",String.class); if(value == null) throw new MissingArgumentValueException(createDefaultArgumentDefinition(source)); - result = valueOf.invoke(null,value.trim()); + result = valueOf.invoke(null,value.asString().trim()); } else if (type.isEnum()) { Object[] vals = type.getEnumConstants(); Object defaultEnumeration = null; // as we look at options, record the default option if it exists for (Object val : vals) { - if (String.valueOf(val).equalsIgnoreCase(value)) return val; + if (String.valueOf(val).equalsIgnoreCase(value.asString())) return val; try { if (type.getField(val.toString()).isAnnotationPresent(EnumerationArgumentDefault.class)) defaultEnumeration = val; } catch (NoSuchFieldException e) { throw new ReviewedStingException("parsing " + type.toString() + "doesn't contain the field " + val.toString()); } } @@ -544,10 +544,12 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { else if (value == null) throw new MissingArgumentValueException(createDefaultArgumentDefinition(source)); else - throw new UnknownEnumeratedValueException(createDefaultArgumentDefinition(source),value); + throw new UnknownEnumeratedValueException(createDefaultArgumentDefinition(source),value.asString()); + } else if (type.equals(File.class)) { + result = value.asFile(); } else { Constructor ctor = type.getConstructor(String.class); - result = ctor.newInstance(value); + result = ctor.newInstance(value.asString()); } } catch (UserException e) { throw e; diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index 15ec9dfe5..d77ae67cf 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -174,7 +174,7 @@ public abstract class CommandLineProgram { ParsingEngine parser = clp.parser = new ParsingEngine(clp); parser.addArgumentSource(clp.getClass()); - Map> parsedArgs; + Map parsedArgs; // process the args if (clp.canAddArgumentsDynamically()) { diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsedArgs.java b/public/java/src/org/broadinstitute/sting/commandline/ParsedArgs.java new file mode 100644 index 000000000..9ab315175 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsedArgs.java @@ -0,0 +1,13 @@ +package org.broadinstitute.sting.commandline; + +/** + * Represents a collection of parsed arguments for an argument source. + * + * Useful for printing out help documents. + */ +public abstract class ParsedArgs { + /** + * @return A compact description of the arguments from an provider/source. + */ + public abstract String getDescription(); +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsedListArgs.java b/public/java/src/org/broadinstitute/sting/commandline/ParsedListArgs.java new file mode 100644 index 000000000..a77e73bcf --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsedListArgs.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.commandline; + +import org.apache.commons.lang.StringUtils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * A list of string arguments, usually from the command line or an args list file. + */ +public class ParsedListArgs extends ParsedArgs { + private final List args = new ArrayList(); + + public ParsedListArgs() { + } + + public ParsedListArgs(List args) { + this.args.addAll(args); + } + + public void add(String... args) { + this.args.addAll(Arrays.asList(args)); + } + + @Override + public String getDescription() { + return StringUtils.join(this.args, " "); + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 0fac195e1..a8b729be4 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -30,6 +30,7 @@ import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -61,7 +62,7 @@ public class ParsingEngine { * Indicates as best as possible where command-line text remains unmatched * to existing arguments. */ - ArgumentMatches argumentMatches = null; + private ArgumentMatches argumentMatches = null; /** * Techniques for parsing and for argument lookup. @@ -88,7 +89,10 @@ public class ParsingEngine { /** * List of tags associated with the given instantiation of the command-line argument. */ - private final Map tags = new IdentityHashMap(); + private final Map tags = new IdentityHashMap(); + + private PluginManager argumentProviderPluginManager = + new PluginManager(ParsingEngineArgumentProvider.class); /** * our log, which we want to capture anything from org.broadinstitute.sting @@ -105,7 +109,10 @@ public class ParsingEngine { argumentTypeDescriptors.addAll(clp.getArgumentTypeDescriptors()); argumentTypeDescriptors.addAll(STANDARD_ARGUMENT_TYPE_DESCRIPTORS); - addArgumentSource(ParsingEngineArgumentFiles.class); + List> providers = argumentProviderPluginManager.getPlugins(); + for (Class provider: providers) { + addArgumentSource(provider); + } } /** @@ -117,6 +124,10 @@ public class ParsingEngine { addArgumentSource(null, source); } + public ArgumentMatches getArgumentMatches() { + return argumentMatches; + } + /** * Add an argument source. Argument sources are expected to have * any number of fields with an @Argument annotation attached. @@ -156,29 +167,30 @@ public class ParsingEngine { * @param tokens Tokens passed on the command line. * @return The parsed arguments by file. */ - public SortedMap> parse( String[] tokens ) { + public SortedMap parse( String[] tokens ) { argumentMatches = new ArgumentMatches(); - SortedMap> parsedArgs = new TreeMap>(); + SortedMap parsedArgs = new TreeMap(); List cmdLineTokens = Arrays.asList(tokens); parse(ArgumentMatchSource.COMMAND_LINE, cmdLineTokens, argumentMatches, parsedArgs); - ParsingEngineArgumentFiles argumentFiles = new ParsingEngineArgumentFiles(); + List providers = argumentProviderPluginManager.createAllTypes(); - // Load the arguments ONLY into the argument files. - // Validation may optionally run on the rest of the arguments. - loadArgumentsIntoObject(argumentFiles); + for (ParsingEngineArgumentProvider provider: providers) { + // Load the arguments ONLY into the provider. + // Validation may optionally run on the rest of the arguments. + loadArgumentsIntoObject(provider); + } - for (File file: argumentFiles.files) { - List fileTokens = getArguments(file); - parse(new ArgumentMatchSource(file), fileTokens, argumentMatches, parsedArgs); + for (ParsingEngineArgumentProvider provider: providers) { + provider.parse(this, parsedArgs); } return parsedArgs; } - private void parse(ArgumentMatchSource matchSource, List tokens, - ArgumentMatches argumentMatches, SortedMap> parsedArgs) { + public void parse(ArgumentMatchSource matchSource, List tokens, + ArgumentMatches argumentMatches, SortedMap parsedArgs) { ArgumentMatchSite lastArgumentMatchSite = new ArgumentMatchSite(matchSource, -1); int i = 0; @@ -195,19 +207,44 @@ public class ParsingEngine { } else { if( argumentMatches.hasMatch(lastArgumentMatchSite) && - !argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite)) - argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, token ); + !argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite)) + argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, new ArgumentMatchStringValue(token) ); else - argumentMatches.MissingArgument.addValue( site, token ); + argumentMatches.MissingArgument.addValue( site, new ArgumentMatchStringValue(token) ); } i++; } - parsedArgs.put(matchSource, tokens); + parsedArgs.put(matchSource, new ParsedListArgs(tokens)); } - private List getArguments(File file) { + public void parsePairs(ArgumentMatchSource matchSource, List> tokens, + ArgumentMatches argumentMatches, ParsedArgs matchSourceArgs, + SortedMap parsedArgs) { + int i = 0; + for (Pair pair: tokens) { + + ArgumentMatchSite site = new ArgumentMatchSite(matchSource, i); + List matchers = Arrays.asList(ArgumentDefinitions.FullNameDefinitionMatcher, ArgumentDefinitions.ShortNameDefinitionMatcher); + ArgumentDefinition definition = null; + for (DefinitionMatcher matcher: matchers) { + definition = argumentDefinitions.findArgumentDefinition( pair.getFirst(), matcher ); + if (definition != null) + break; + } + if (definition == null) + continue; + ArgumentMatch argumentMatch = new ArgumentMatch(pair.getFirst(), definition, site, new Tags()); + argumentMatches.mergeInto(argumentMatch); + argumentMatch.addValue(site, pair.getSecond()); + i++; + } + + parsedArgs.put(matchSource, matchSourceArgs); + } + + protected List getArguments(File file) { try { if (file.getAbsolutePath().endsWith(".list")) { return getListArguments(file); @@ -283,9 +320,9 @@ public class ParsingEngine { // Ensure that the field contents meet the validation criteria specified by the regular expression. for( ArgumentMatch verifiableMatch: verifiableMatches ) { - for( String value: verifiableMatch.values() ) { - if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) ) - invalidValues.add( new Pair(verifiableArgument, value) ); + for( ArgumentMatchValue value: verifiableMatch.values() ) { + if( verifiableArgument.validation != null && !value.asString().matches(verifiableArgument.validation) ) + invalidValues.add( new Pair(verifiableArgument, value.asString()) ); } } } @@ -629,21 +666,21 @@ class UnmatchedArgumentException extends ArgumentException { private static String formatArguments( ArgumentMatch invalidValues ) { StringBuilder sb = new StringBuilder(); for( ArgumentMatchSite site: invalidValues.sites.keySet() ) - for( String value: invalidValues.sites.get(site) ) { + for( ArgumentMatchValue value: invalidValues.sites.get(site) ) { switch (site.getSource().getType()) { case CommandLine: sb.append( String.format("%nInvalid argument value '%s' at position %d.", - value, site.getIndex()) ); + value.asString(), site.getIndex()) ); break; - case File: - sb.append( String.format("%nInvalid argument value '%s' in file %s at position %d.", - value, site.getSource().getFile().getAbsolutePath(), site.getIndex()) ); + case Provider: + sb.append( String.format("%nInvalid argument value '%s' in %s at position %d.", + value.asString(), site.getSource().getDescription(), site.getIndex()) ); break; default: throw new RuntimeException( String.format("Unexpected argument match source type: %s", site.getSource().getType())); } - if(value != null && Utils.dupString(' ',value.length()).equals(value)) + if(value.asString() != null && Utils.dupString(' ',value.asString().length()).equals(value.asString())) sb.append(" Please make sure any line continuation backslashes on your command line are not followed by whitespace."); } return sb.toString(); @@ -696,12 +733,3 @@ class UnknownEnumeratedValueException extends ArgumentException { return String.format("Invalid value %s specified for argument %s; valid options are (%s).", argumentPassed, definition.fullName, Utils.join(",",definition.validOptions)); } } - -/** - * Container class to store the list of argument files. - * The files will be parsed after the command line arguments. - */ -class ParsingEngineArgumentFiles { - @Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false) - public List files = new ArrayList(); -} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentFiles.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentFiles.java new file mode 100644 index 000000000..3f3921937 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentFiles.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.commandline; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.SortedMap; + +/** + * Container class to store the list of argument files. + * The files will be parsed after the command line arguments. + */ +public class ParsingEngineArgumentFiles extends ParsingEngineArgumentProvider { + @Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false) + public List files = new ArrayList(); + + @Override + public void parse(ParsingEngine parsingEngine, SortedMap parsedArgs) { + ArgumentMatches argumentMatches = parsingEngine.getArgumentMatches(); + for (File file: this.files) { + List fileTokens = parsingEngine.getArguments(file); + parsingEngine.parse(new ArgumentMatchFileSource(file), fileTokens, argumentMatches, parsedArgs); + } + } +} + +class ArgumentMatchFileSource extends ArgumentMatchSource { + ArgumentMatchFileSource(File file) { + super("file " + file.getAbsolutePath()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentProvider.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentProvider.java new file mode 100644 index 000000000..a57f8b08a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentProvider.java @@ -0,0 +1,12 @@ +package org.broadinstitute.sting.commandline; + +import java.util.List; +import java.util.SortedMap; + +/** + * A class that can parse arguments for the engine + */ +public abstract class ParsingEngineArgumentProvider { + public abstract void parse(ParsingEngine parsingEngine, SortedMap parsedArgs); +} + diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java index da4eb3955..ac01468eb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java @@ -86,7 +86,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { ArgumentDefinition definition = createDefaultArgumentDefinition(source); - String fileName = getArgumentValue( definition, matches ); + String fileName = getArgumentValue( definition, matches ).asString(); // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java index 83d1b7eb2..f13cb8fa8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java @@ -25,15 +25,11 @@ package org.broadinstitute.sting.gatk.io.stubs; import net.sf.samtools.SAMFileReader; -import org.broadinstitute.sting.commandline.ArgumentMatches; -import org.broadinstitute.sting.commandline.ArgumentSource; -import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; -import org.broadinstitute.sting.commandline.ParsingEngine; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; -import java.io.File; import java.lang.reflect.Type; /** @@ -47,7 +43,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor /** * Create a new SAMFileReader argument, notifying the given engine when that argument has been created. - * @param engine + * @param engine engine */ public SAMFileReaderArgumentTypeDescriptor( GenomeAnalysisEngine engine ) { this.engine = engine; @@ -62,12 +58,12 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { SAMFileReaderBuilder builder = new SAMFileReaderBuilder(); - String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches ); + ArgumentMatchValue readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches ); if( readerFileName == null ) throw new UserException.CommandLineException("SAM file compression was supplied, but no associated writer was supplied with it."); - builder.setSAMFile(new File(readerFileName)); + builder.setSAMFile(readerFileName.asFile()); // WARNING: Skipping required side-effect because stub is impossible to generate. engine.addInput(source, builder); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index dcf2704f5..2ea4bdfb0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.io.File; import java.io.OutputStream; import java.lang.annotation.Annotation; import java.lang.reflect.Type; @@ -111,10 +110,10 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { // Extract all possible parameters that could be passed to a BAM file writer? ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source); - String writerFileName = getArgumentValue( bamArgumentDefinition, matches ); + ArgumentMatchValue writerFileName = getArgumentValue( bamArgumentDefinition, matches ); - String compressionLevelText = getArgumentValue( createBAMCompressionArgumentDefinition(source), matches ); - Integer compressionLevel = compressionLevelText != null ? Integer.valueOf(compressionLevelText) : null; + ArgumentMatchValue compressionLevelText = getArgumentValue( createBAMCompressionArgumentDefinition(source), matches ); + Integer compressionLevel = compressionLevelText != null ? Integer.valueOf(compressionLevelText.asString()) : null; boolean indexOnTheFly = !argumentIsPresent(disableWriteIndexArgumentDefinition(source),matches); boolean generateMD5 = argumentIsPresent(this.enableMD5GenerationArgumentDefinition(source),matches); @@ -124,14 +123,14 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default - if(writerFileName == null && generateMD5) + if(writerFileName.asFile() == null && generateMD5) throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside."); // Create the stub and set parameters. SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream); - if ( writerFileName != null ) { - stub = new SAMFileWriterStub(engine, new File(writerFileName)); + if ( writerFileName.asFile() != null ) { + stub = new SAMFileWriterStub(engine, writerFileName.asFile()); if ( compressionLevel != null ) stub.setCompressionLevel(compressionLevel); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 5e1132d45..43350ccc1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -138,8 +138,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source); // Get the filename for the genotype file, if it exists. If not, we'll need to send output to out. - String writerFileName = getArgumentValue(defaultArgumentDefinition,matches); - File writerFile = writerFileName != null ? new File(writerFileName) : null; + ArgumentMatchValue writerFileName = getArgumentValue(defaultArgumentDefinition,matches); + File writerFile = writerFileName != null ? writerFileName.asFile() : null; // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default @@ -151,7 +151,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { ? new VariantContextWriterStub(engine, writerFile, argumentSources) : new VariantContextWriterStub(engine, defaultOutputStream, argumentSources); - stub.setCompressed(isCompressed(writerFileName)); + stub.setCompressed(isCompressed(writerFileName.asString())); stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches)); stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches)); stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches)); diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java index 25ef8ccd2..0f6808718 100755 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java @@ -26,10 +26,7 @@ package org.broadinstitute.sting.utils.help; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.ArgumentDefinition; -import org.broadinstitute.sting.commandline.ArgumentDefinitionGroup; -import org.broadinstitute.sting.commandline.ArgumentDefinitions; -import org.broadinstitute.sting.commandline.ArgumentMatchSource; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.text.TextFormattingUtils; @@ -273,9 +270,9 @@ public class HelpFormatter { * Generate a standard header for the logger * * @param applicationDetails details of the application to run. - * @param parsedArgs the command line arguments passed in + * @param parsedArgs the arguments passed in */ - public static void generateHeaderInformation(ApplicationDetails applicationDetails, Map> parsedArgs) { + public static void generateHeaderInformation(ApplicationDetails applicationDetails, Map parsedArgs) { DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); java.util.Date date = new java.util.Date(); @@ -286,19 +283,16 @@ public class HelpFormatter { for (String headerLine : applicationDetails.applicationHeader) logger.info(headerLine); logger.debug("Current directory: " + System.getProperty("user.dir")); - for (Map.Entry> entry: parsedArgs.entrySet()) { + for (Map.Entry entry: parsedArgs.entrySet()) { ArgumentMatchSource matchSource = entry.getKey(); final String sourceName; switch (matchSource.getType()) { case CommandLine: sourceName = "Program"; break; - case File: sourceName = matchSource.getFile().getPath(); break; + case Provider: sourceName = matchSource.getDescription(); break; default: throw new RuntimeException("Unexpected argument match source type: " + matchSource.getType()); } - String output = sourceName + " Args:"; - for (String str : entry.getValue()) { - output = output + " " + str; - } + String output = sourceName + " Args: " + entry.getValue().getDescription(); logger.info(output); } logger.info("Date/Time: " + dateFormat.format(date)); diff --git a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java index 99d6b88f3..b1e788dc5 100644 --- a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java @@ -39,7 +39,7 @@ public class ArgumentMatchSiteUnitTest { @Test public void testFile() { - ArgumentMatchSource source = new ArgumentMatchSource(new File("test")); + ArgumentMatchSource source = new ArgumentMatchFileSource(new File("test")); ArgumentMatchSite site = new ArgumentMatchSite(source, 1); Assert.assertEquals(site.getSource(), source); Assert.assertEquals(site.getIndex(), 1); diff --git a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java index 4bc7eb822..a183b2001 100644 --- a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java @@ -35,15 +35,15 @@ public class ArgumentMatchSourceUnitTest extends BaseTest { public void testCommandLine() { ArgumentMatchSource source = ArgumentMatchSource.COMMAND_LINE; Assert.assertEquals(source.getType(), ArgumentMatchSourceType.CommandLine); - Assert.assertNull(source.getFile()); + Assert.assertNull(source.getDescription()); } @Test public void testFile() { File f = new File("test"); - ArgumentMatchSource source = new ArgumentMatchSource(f); - Assert.assertEquals(source.getType(), ArgumentMatchSourceType.File); - Assert.assertEquals(source.getFile(), f); + ArgumentMatchSource source = new ArgumentMatchFileSource(f); + Assert.assertEquals(source.getType(), ArgumentMatchSourceType.Provider); + Assert.assertEquals(source.getDescription(), "file " + f.getAbsolutePath()); } @Test(expectedExceptions = IllegalArgumentException.class) @@ -54,8 +54,8 @@ public class ArgumentMatchSourceUnitTest extends BaseTest { @Test public void testEquals() { ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; - ArgumentMatchSource fileA = new ArgumentMatchSource(new File("a")); - ArgumentMatchSource fileB = new ArgumentMatchSource(new File("b")); + ArgumentMatchSource fileA = new ArgumentMatchFileSource(new File("a")); + ArgumentMatchSource fileB = new ArgumentMatchFileSource(new File("b")); Assert.assertFalse(cmdLine.equals(null)); @@ -75,8 +75,8 @@ public class ArgumentMatchSourceUnitTest extends BaseTest { @Test public void testCompareTo() { ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; - ArgumentMatchSource fileA = new ArgumentMatchSource(new File("a")); - ArgumentMatchSource fileB = new ArgumentMatchSource(new File("b")); + ArgumentMatchSource fileA = new ArgumentMatchFileSource(new File("a")); + ArgumentMatchSource fileB = new ArgumentMatchFileSource(new File("b")); Assert.assertTrue(cmdLine.compareTo(cmdLine) == 0); Assert.assertTrue(cmdLine.compareTo(fileA) < 0); diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index d0379d022..f4c4b613f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -28,7 +28,7 @@ import function.QFunction import java.io.File import org.broadinstitute.sting.commandline._ import org.broadinstitute.sting.queue.util._ -import org.broadinstitute.sting.queue.engine.{QGraphSettings, QGraph} +import org.broadinstitute.sting.queue.engine.{QStatusMessenger, QGraphSettings, QGraph} import collection.JavaConversions._ import org.broadinstitute.sting.utils.classloader.PluginManager import org.broadinstitute.sting.utils.exceptions.UserException @@ -90,12 +90,16 @@ class QCommandLine extends CommandLineProgram with Logging { private var qScriptClasses: File = _ private var shuttingDown = false - private lazy val pluginManager = { + private lazy val qScriptPluginManager = { qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory) qScriptManager.loadScripts(scripts, qScriptClasses) new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL)) } + private lazy val qStatusMessengerPluginManager = { + new PluginManager[QStatusMessenger](classOf[QStatusMessenger]) + } + QFunction.parsingEngine = new ParsingEngine(this) /** @@ -103,15 +107,25 @@ class QCommandLine extends CommandLineProgram with Logging { * functions, and then builds and runs a QGraph based on the dependencies. */ def execute = { + val allStatusMessengers = qStatusMessengerPluginManager.createAllTypes() + if (settings.qSettings.runName == null) settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName) if (IOUtils.isDefaultTempDir(settings.qSettings.tempDirectory)) settings.qSettings.tempDirectory = IOUtils.absolute(settings.qSettings.runDirectory, ".queue/tmp") qGraph.initializeWithSettings(settings) - val allQScripts = pluginManager.createAllTypes() + for (statusMessenger <- allStatusMessengers) { + loadArgumentsIntoObject(statusMessenger) + } + + for (statusMessenger <- allStatusMessengers) { + statusMessenger.started() + } + + val allQScripts = qScriptPluginManager.createAllTypes() for (script <- allQScripts) { - logger.info("Scripting " + pluginManager.getName(script.getClass.asSubclass(classOf[QScript]))) + logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript]))) loadArgumentsIntoObject(script) script.qSettings = settings.qSettings try { @@ -124,6 +138,10 @@ class QCommandLine extends CommandLineProgram with Logging { logger.info("Added " + script.functions.size + " functions") } + if (settings.run) { + allQScripts.foreach(_.pullInputs()) + } + // Execute the job graph qGraph.run() @@ -142,11 +160,18 @@ class QCommandLine extends CommandLineProgram with Logging { logger.info("Writing final jobs report...") qGraph.writeJobsReport() - if (!qGraph.success) { + if (!success) { logger.info("Done with errors") qGraph.logFailed() + for (statusMessenger <- allStatusMessengers) + statusMessenger.exit("Done with errors") 1 } else { + if (settings.run) { + allQScripts.foreach(_.pushOutputs()) + for (statusMessenger <- allStatusMessengers) + statusMessenger.done() + } 0 } } @@ -158,19 +183,30 @@ class QCommandLine extends CommandLineProgram with Logging { override def canAddArgumentsDynamically = true /** - * Returns the list of QScripts passed in via -S so that their - * arguments can be inspected before QScript.script is called. - * @return Array of QScripts passed in. + * Returns the list of QScripts passed in via -S and other plugins + * so that their arguments can be inspected before QScript.script is called. + * @return Array of dynamic sources */ - override def getArgumentSources = - pluginManager.getPlugins.toIterable.toArray.asInstanceOf[Array[Class[_]]] + override def getArgumentSources = { + var plugins = Seq.empty[Class[_]] + plugins ++= qScriptPluginManager.getPlugins + plugins ++= qStatusMessengerPluginManager.getPlugins + plugins.toArray + } /** - * Returns the name of a QScript - * @return The name of a QScript + * Returns the name of a script/plugin + * @return The name of a script/plugin */ - override def getArgumentSourceName(source: Class[_]) = - pluginManager.getName(source.asSubclass(classOf[QScript])) + override def getArgumentSourceName(source: Class[_]) = { + if (classOf[QScript].isAssignableFrom(source)) + qScriptPluginManager.getName(source.asSubclass(classOf[QScript])) + else if (classOf[QStatusMessenger].isAssignableFrom(source)) + qStatusMessengerPluginManager.getName(source.asSubclass(classOf[QStatusMessenger])) + else + null + + } /** * Returns a ScalaCompoundArgumentTypeDescriptor that can parse argument sources into scala collections. diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala index 6f887ea00..c59220d4b 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -27,7 +27,9 @@ package org.broadinstitute.sting.queue import engine.JobRunInfo import org.broadinstitute.sting.queue.function.QFunction import annotation.target.field -import util.{StringFileConversions, PrimitiveOptionConversions, Logging} +import util.{ReflectionUtils, StringFileConversions, PrimitiveOptionConversions, Logging} +import org.broadinstitute.sting.utils.classloader.JVMUtils +import java.lang.reflect.Field /** * Defines a Queue pipeline as a collection of CommandLineFunctions. @@ -106,6 +108,33 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon def addAll(functions: Seq[QFunction]) { functions.foreach( f => add(f) ) } + + def pullInputs() { + val inputs = getInputs + inputs.filter(_.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile]).foreach(_.pullToLocal()) + } + + def pushOutputs() { + val outputs = getOutputs + outputs.filter(_.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile]).foreach(_.pushToRemote()) + } + + private def getInputs: Seq[File] = { + getFieldValues(classOf[Input]) + } + + private def getOutputs: Seq[File] = { + getFieldValues(classOf[Output]) + } + + private def getFieldValues(annotation: Class[_ <: java.lang.annotation.Annotation]): Seq[File] = { + val filtered: Seq[Field] = fields.filter(field => ReflectionUtils.hasAnnotation(field, annotation)) + val files = filtered.filter(field => classOf[File].isAssignableFrom(field.getType)).map(field => ReflectionUtils.getValue(this, field).asInstanceOf[File]) + val seqFiles = filtered.filter(field => classOf[Seq[File]].isAssignableFrom(field.getType)).map(field => ReflectionUtils.getValue(this, field).asInstanceOf[Seq[File]]) + seqFiles.foldLeft(files)(_ ++ _).filter(_ != null) + } + + private lazy val fields = collection.JavaConversions.asScalaBuffer(JVMUtils.getAllFields(this.getClass)).toSeq } object QScript { diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala new file mode 100644 index 000000000..c61f2ef1f --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala @@ -0,0 +1,10 @@ +package org.broadinstitute.sting.queue.engine + +/** + * Plugin to sends QStatus messages + */ +trait QStatusMessenger { + def started() + def done() + def exit(message: String) +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala index e619c0a02..395a34c60 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala @@ -92,6 +92,6 @@ object GATKIntervals { } private def createBinding(interval: String, argumentName: String, tags: Tags): IntervalBinding[Feature] = { - ArgumentTypeDescriptor.parseBinding(interval, classOf[Feature], classOf[IntervalBinding[Feature]], argumentName, tags, argumentName).asInstanceOf[IntervalBinding[Feature]] + ArgumentTypeDescriptor.parseBinding(new ArgumentMatchStringValue(interval), classOf[Feature], classOf[IntervalBinding[Feature]], argumentName, tags, argumentName).asInstanceOf[IntervalBinding[Feature]] } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala b/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala new file mode 100644 index 000000000..cfe848ba8 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala @@ -0,0 +1,13 @@ +package org.broadinstitute.sting.queue.util + +import java.io.File +import org.broadinstitute.sting.utils.io.FileExtension + +/** + * An extension of java.io.File that can be pulled from or pushed to a remote location. + */ +trait RemoteFile extends File with FileExtension { + def pullToLocal() + def pushToRemote() + def deleteRemote() +} From c4ee31075c1268eb990c36744e14139a5b1a8d80 Mon Sep 17 00:00:00 2001 From: kshakir Date: Mon, 15 Oct 2012 15:29:40 -0400 Subject: [PATCH 73/93] Fixed package error and a few deprecated scala warnings. --- .../scala/src/org/broadinstitute/sting/queue/QScript.scala | 2 +- .../org/broadinstitute/sting/queue/QScriptManager.scala | 7 ++++--- .../broadinstitute/sting/queue/engine/FunctionEdge.scala | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala index c59220d4b..da24b854e 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -27,7 +27,7 @@ package org.broadinstitute.sting.queue import engine.JobRunInfo import org.broadinstitute.sting.queue.function.QFunction import annotation.target.field -import util.{ReflectionUtils, StringFileConversions, PrimitiveOptionConversions, Logging} +import util._ import org.broadinstitute.sting.utils.classloader.JVMUtils import java.lang.reflect.Field diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala b/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala index 74487917f..2528c0572 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala @@ -11,6 +11,7 @@ import org.apache.log4j.Level import scala.tools.nsc.util.{FakePos, NoPosition, Position} import org.broadinstitute.sting.queue.util.TextFormatUtils._ import org.broadinstitute.sting.utils.classloader.JVMUtils +import tools.util.StringOps /** * Plugin manager for QScripts which loads QScripts into the current class loader. @@ -63,7 +64,7 @@ object QScriptManager extends Logging { * Heavily based on scala/src/compiler/scala/tools/nsc/reporters/ConsoleReporter.scala */ private class Log4JReporter(val settings: Settings) extends AbstractReporter { - def displayPrompt { throw new UnsupportedOperationException("Unable to prompt the user. Prompting should be off.") } + def displayPrompt() { throw new UnsupportedOperationException("Unable to prompt the user. Prompting should be off.") } /** * Displays the message at position with severity. @@ -98,9 +99,9 @@ object QScriptManager extends Logging { */ def printSummary() { if (WARNING.count > 0) - printMessage(Level.WARN, countElementsAsString(WARNING.count, "warning") + " found") + printMessage(Level.WARN, StringOps.countElementsAsString(WARNING.count, "warning") + " found") if (ERROR.count > 0) - printMessage(Level.ERROR, countElementsAsString(ERROR.count, "error") + " found") + printMessage(Level.ERROR, StringOps.countElementsAsString(ERROR.count, "error") + " found") } /** diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala index 2d4ff60f5..62c016812 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala @@ -185,7 +185,7 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod val tailLines = IOUtils.tail(errorFile, maxLines) val nl = "%n".format() val summary = if (tailLines.size > maxLines) "Last %d lines".format(maxLines) else "Contents" - this.function.jobErrorLines = collection.JavaConversions.asScalaIterable(tailLines).toSeq + this.function.jobErrorLines = collection.JavaConversions.collectionAsScalaIterable(tailLines).toSeq logger.error("%s of %s:%n%s".format(summary, errorFile, StringUtils.join(tailLines, nl))) } else { logger.error("Unable to access log file: %s".format(errorFile)) From 9fcf71c031d382b9eaa4bb26e59bfa588abd3909 Mon Sep 17 00:00:00 2001 From: kshakir Date: Tue, 16 Oct 2012 02:21:38 -0400 Subject: [PATCH 75/93] Updated google reflections due to stale slf4j version conflicting with other projects also trying to use Queue as a component. Added targets to build.xml to effectively 'mvn install' packaged GATK/Queue from ant. TODO: Versions during 'mvn install' are hardcoded at 0.0.1 until a better versioning scheme that works with maven dependencies has been identified. --- build.xml | 42 +++++++++++++++++-- ivy.xml | 3 +- .../sting/utils/classloader/JVMUtils.java | 7 ++-- .../utils/classloader/PluginManager.java | 14 +++---- settings/ivysettings.xml | 1 - 5 files changed, 49 insertions(+), 18 deletions(-) diff --git a/build.xml b/build.xml index 7e7415f08..c6b1afc56 100644 --- a/build.xml +++ b/build.xml @@ -22,7 +22,9 @@ ~ OTHER DEALINGS IN THE SOFTWARE. --> - + Compile and distribute the Sting toolkit @@ -250,11 +252,14 @@ + + - + + @@ -262,6 +267,15 @@ uri="antlib:org.apache.ivy.ant" classpath="${ivy.jar.dir}/${ivy.jar.file}"/> + + + + + @@ -295,7 +309,7 @@ - + @@ -942,6 +956,28 @@ + + + + + + + + + + + + + + + + + + + diff --git a/ivy.xml b/ivy.xml index 1e3346ff5..1d2f95dc1 100644 --- a/ivy.xml +++ b/ivy.xml @@ -46,7 +46,8 @@ - + + diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java index dd12ce761..49851249c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java @@ -32,7 +32,6 @@ import org.reflections.util.ClasspathHelper; import java.io.File; import java.io.IOException; -import java.lang.annotation.Annotation; import java.lang.reflect.*; import java.net.URL; import java.util.*; @@ -198,7 +197,7 @@ public class JVMUtils { * @return the list of class path urls. */ public static Set getClasspathURLs() { - return ClasspathHelper.getUrlsForManifestsCurrentClasspath(); + return ClasspathHelper.forManifest(); } /** @@ -240,8 +239,8 @@ public class JVMUtils { /** * Returns a comma-separated list of the names of the interfaces implemented by this class * - * @param covClass - * @return + * @param covClass class + * @return names of interfaces */ public static String classInterfaces(final Class covClass) { final List interfaces = new ArrayList(); diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java index 82fb6b8d6..43cc800d8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java @@ -25,8 +25,6 @@ package org.broadinstitute.sting.utils.classloader; -import ch.qos.logback.classic.Level; -import ch.qos.logback.classic.Logger; import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; @@ -35,7 +33,6 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.reflections.Reflections; import org.reflections.scanners.SubTypesScanner; import org.reflections.util.ConfigurationBuilder; -import org.slf4j.LoggerFactory; import java.io.File; import java.lang.reflect.Constructor; @@ -57,9 +54,8 @@ public class PluginManager { private static final Reflections defaultReflections; static { - // turn off logging in the reflections library - they talk too much (to the wrong logger factory as well, logback) - Logger logger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(Reflections.class); - logger.setLevel(Level.OFF); + // turn off logging in the reflections library - they talk too much + Reflections.log = null; Set classPathUrls = new LinkedHashSet(); @@ -179,9 +175,9 @@ public class PluginManager { /** * Sorts, in place, the list of plugins according to getName() on each element * - * @param unsortedPlugins + * @param unsortedPlugins unsorted plugins */ - private final void sortPlugins(final List> unsortedPlugins) { + private void sortPlugins(final List> unsortedPlugins) { Collections.sort(unsortedPlugins, new ComparePluginsByName()); } @@ -235,7 +231,7 @@ public class PluginManager { * @param plugin Name of the plugin for which to search. * @return True if the plugin exists, false otherwise. */ - public boolean exists(Class plugin) { + public boolean exists(Class plugin) { return pluginsByName.containsValue(plugin); } diff --git a/settings/ivysettings.xml b/settings/ivysettings.xml index e17342442..ce7667140 100644 --- a/settings/ivysettings.xml +++ b/settings/ivysettings.xml @@ -7,7 +7,6 @@ - From f93b2791511070afa5af1808426305365a4eb28f Mon Sep 17 00:00:00 2001 From: kshakir Date: Tue, 16 Oct 2012 18:49:10 -0400 Subject: [PATCH 79/93] Moved the class field caching from QScript to a ClassFieldCache utility. Using ClassFieldCache to pull values from QScript for passing to done() method of QStatusMessenger. --- .../sting/queue/QCommandLine.scala | 11 +- .../broadinstitute/sting/queue/QScript.scala | 37 ++-- .../sting/queue/engine/QStatusMessenger.scala | 5 +- .../extensions/gatk/BamGatherFunction.scala | 7 +- .../extensions/gatk/VcfGatherFunction.scala | 5 +- .../sting/queue/function/QFunction.scala | 80 +------- .../scattergather/CloneFunction.scala | 9 +- .../sting/queue/util/ClassFieldCache.scala | 183 ++++++++++++++++++ 8 files changed, 227 insertions(+), 110 deletions(-) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index f4c4b613f..5b84bfd16 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -100,7 +100,7 @@ class QCommandLine extends CommandLineProgram with Logging { new PluginManager[QStatusMessenger](classOf[QStatusMessenger]) } - QFunction.parsingEngine = new ParsingEngine(this) + ClassFieldCache.parsingEngine = new ParsingEngine(this) /** * Takes the QScripts passed in, runs their script() methods, retrieves their generated @@ -127,6 +127,9 @@ class QCommandLine extends CommandLineProgram with Logging { for (script <- allQScripts) { logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript]))) loadArgumentsIntoObject(script) + // TODO: Pulling inputs can be time/io expensive! Some scripts are using the files to generate functions-- even for dry runs-- so pull it all down for now. + //if (settings.run) + script.pullInputs() script.qSettings = settings.qSettings try { script.script() @@ -138,10 +141,6 @@ class QCommandLine extends CommandLineProgram with Logging { logger.info("Added " + script.functions.size + " functions") } - if (settings.run) { - allQScripts.foreach(_.pullInputs()) - } - // Execute the job graph qGraph.run() @@ -170,7 +169,7 @@ class QCommandLine extends CommandLineProgram with Logging { if (settings.run) { allQScripts.foreach(_.pushOutputs()) for (statusMessenger <- allStatusMessengers) - statusMessenger.done() + statusMessenger.done(allQScripts.map(_.remoteOutputs)) } 0 } diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala index da24b854e..ee2089dc5 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -28,8 +28,7 @@ import engine.JobRunInfo import org.broadinstitute.sting.queue.function.QFunction import annotation.target.field import util._ -import org.broadinstitute.sting.utils.classloader.JVMUtils -import java.lang.reflect.Field +import org.broadinstitute.sting.commandline.ArgumentSource /** * Defines a Queue pipeline as a collection of CommandLineFunctions. @@ -110,31 +109,29 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon } def pullInputs() { - val inputs = getInputs - inputs.filter(_.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile]).foreach(_.pullToLocal()) + val inputs = ClassFieldCache.getFieldFiles(this, inputFields) + filterRemoteFiles(inputs).foreach(_.pullToLocal()) } def pushOutputs() { - val outputs = getOutputs - outputs.filter(_.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile]).foreach(_.pushToRemote()) + val outputs = ClassFieldCache.getFieldFiles(this, outputFields) + filterRemoteFiles(outputs).foreach(_.pushToRemote()) } - private def getInputs: Seq[File] = { - getFieldValues(classOf[Input]) - } + def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] = + outputFields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap - private def getOutputs: Seq[File] = { - getFieldValues(classOf[Output]) - } + private def filterRemoteFiles(fields: Seq[File]): Seq[RemoteFile] = + fields.filter(field => field != null && field.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile]) - private def getFieldValues(annotation: Class[_ <: java.lang.annotation.Annotation]): Seq[File] = { - val filtered: Seq[Field] = fields.filter(field => ReflectionUtils.hasAnnotation(field, annotation)) - val files = filtered.filter(field => classOf[File].isAssignableFrom(field.getType)).map(field => ReflectionUtils.getValue(this, field).asInstanceOf[File]) - val seqFiles = filtered.filter(field => classOf[Seq[File]].isAssignableFrom(field.getType)).map(field => ReflectionUtils.getValue(this, field).asInstanceOf[Seq[File]]) - seqFiles.foldLeft(files)(_ ++ _).filter(_ != null) - } - - private lazy val fields = collection.JavaConversions.asScalaBuffer(JVMUtils.getAllFields(this.getClass)).toSeq + /** The complete list of fields. */ + def functionFields: Seq[ArgumentSource] = ClassFieldCache.classFunctionFields(this.getClass) + /** The @Input fields. */ + def inputFields: Seq[ArgumentSource] = ClassFieldCache.classInputFields(this.getClass) + /** The @Output fields. */ + def outputFields: Seq[ArgumentSource] = ClassFieldCache.classOutputFields(this.getClass) + /** The @Argument fields. */ + def argumentFields: Seq[ArgumentSource] = ClassFieldCache.classArgumentFields(this.getClass) } object QScript { diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala index c61f2ef1f..eeabe6d1d 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala @@ -1,10 +1,13 @@ package org.broadinstitute.sting.queue.engine +import org.broadinstitute.sting.commandline.ArgumentSource +import org.broadinstitute.sting.queue.util.RemoteFile + /** * Plugin to sends QStatus messages */ trait QStatusMessenger { def started() - def done() + def done(files: Seq[Map[ArgumentSource, Seq[RemoteFile]]]) def exit(message: String) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala index 9522ec86c..a59f273ad 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala @@ -28,6 +28,7 @@ import org.broadinstitute.sting.queue.function.scattergather.GatherFunction import org.broadinstitute.sting.queue.extensions.picard.PicardBamFunction import org.broadinstitute.sting.queue.function.{RetryMemoryLimit, QFunction} import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor +import org.broadinstitute.sting.queue.util.ClassFieldCache /** * Merges BAM files using net.sf.picard.sam.MergeSamFiles. @@ -47,13 +48,13 @@ class BamGatherFunction extends GatherFunction with PicardBamFunction with Retry // bam_compression and index_output_bam_on_the_fly from SAMFileWriterArgumentTypeDescriptor // are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK - val compression = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.COMPRESSION_FULLNAME) + val compression = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.COMPRESSION_FULLNAME) this.compressionLevel = originalGATK.getFieldValue(compression).asInstanceOf[Option[Int]] - val disableIndex = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME) + val disableIndex = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME) this.createIndex = Some(!originalGATK.getFieldValue(disableIndex).asInstanceOf[Boolean]) - val enableMD5 = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME) + val enableMD5 = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME) this.createMD5 = Some(originalGATK.getFieldValue(enableMD5).asInstanceOf[Boolean]) super.freezeFieldValues() diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala index 75be4d773..fb22554f0 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala @@ -27,6 +27,7 @@ package org.broadinstitute.sting.queue.extensions.gatk import org.broadinstitute.sting.queue.function.scattergather.GatherFunction import org.broadinstitute.sting.queue.function.{RetryMemoryLimit, QFunction} import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor +import org.broadinstitute.sting.queue.util.ClassFieldCache /** * Merges a vcf text file. @@ -46,10 +47,10 @@ class VcfGatherFunction extends CombineVariants with GatherFunction with RetryMe // NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor // are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK - val noHeader = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.NO_HEADER_ARG_NAME) + val noHeader = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.NO_HEADER_ARG_NAME) this.no_cmdline_in_header = originalGATK.getFieldValue(noHeader).asInstanceOf[Boolean] - val sitesOnly = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME) + val sitesOnly = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME) this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean] // ensure that the gather function receives the same unsafe parameter as the scattered function diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala index aae846534..3849b976a 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala @@ -28,7 +28,6 @@ import java.io.File import java.lang.annotation.Annotation import org.broadinstitute.sting.commandline._ import org.broadinstitute.sting.queue.{QException, QSettings} -import collection.JavaConversions._ import java.lang.IllegalStateException import org.broadinstitute.sting.queue.util._ import org.broadinstitute.sting.utils.io.IOUtils @@ -194,13 +193,13 @@ trait QFunction extends Logging with QJobReport { def failOutputs: Seq[File] = statusPrefixes.map(path => new File(path + ".fail")) /** The complete list of fields on this CommandLineFunction. */ - def functionFields = QFunction.classFields(this.functionFieldClass).functionFields + def functionFields: Seq[ArgumentSource] = ClassFieldCache.classFunctionFields(this.functionFieldClass) /** The @Input fields on this CommandLineFunction. */ - def inputFields = QFunction.classFields(this.functionFieldClass).inputFields + def inputFields: Seq[ArgumentSource] = ClassFieldCache.classInputFields(this.functionFieldClass) /** The @Output fields on this CommandLineFunction. */ - def outputFields = QFunction.classFields(this.functionFieldClass).outputFields + def outputFields: Seq[ArgumentSource] = ClassFieldCache.classOutputFields(this.functionFieldClass) /** The @Argument fields on this CommandLineFunction. */ - def argumentFields = QFunction.classFields(this.functionFieldClass).argumentFields + def argumentFields: Seq[ArgumentSource] = ClassFieldCache.classArgumentFields(this.functionFieldClass) /** * Returns the class that should be used for looking up fields. @@ -475,79 +474,12 @@ trait QFunction extends Logging with QJobReport { * @param source Field to get the value for. * @return value of the field. */ - def getFieldValue(source: ArgumentSource) = ReflectionUtils.getValue(invokeObj(source), source.field) + def getFieldValue(source: ArgumentSource) = ClassFieldCache.getFieldValue(this, source) /** * Gets the value of a field. * @param source Field to set the value for. * @return value of the field. */ - def setFieldValue(source: ArgumentSource, value: Any) = ReflectionUtils.setValue(invokeObj(source), source.field, value) - - /** - * Walks gets the fields in this object or any collections in that object - * recursively to find the object holding the field to be retrieved or set. - * @param source Field find the invoke object for. - * @return Object to invoke the field on. - */ - private def invokeObj(source: ArgumentSource) = source.parentFields.foldLeft[AnyRef](this)(ReflectionUtils.getValue(_, _)) -} - -object QFunction { - var parsingEngine: ParsingEngine = _ - - /** - * The list of fields defined on a class - * @param clazz The class to lookup fields. - */ - private class ClassFields(clazz: Class[_]) { - /** The complete list of fields on this CommandLineFunction. */ - val functionFields: Seq[ArgumentSource] = parsingEngine.extractArgumentSources(clazz).toSeq - /** The @Input fields on this CommandLineFunction. */ - val inputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input])) - /** The @Output fields on this CommandLineFunction. */ - val outputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Output])) - /** The @Argument fields on this CommandLineFunction. */ - val argumentFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Argument])) - } - - /** - * The mapping from class to fields. - */ - private var classFieldsMap = Map.empty[Class[_], ClassFields] - - /** - * Returns the field on clazz. - * @param clazz Class to search. - * @param name Name of the field to return. - * @return Argument source for the field. - */ - def findField(clazz: Class[_], name: String) = { - classFields(clazz).functionFields.find(_.field.getName == name) match { - case Some(source) => source - case None => throw new QException("Could not find a field on class %s with name %s".format(clazz, name)) - } - } - - /** - * Returns the fields for a class. - * @param clazz Class to retrieve fields for. - * @return the fields for the class. - */ - private def classFields(clazz: Class[_]) = { - classFieldsMap.get(clazz) match { - case Some(classFields) => classFields - case None => - val classFields = new ClassFields(clazz) - classFieldsMap += clazz -> classFields - classFields - } - } - - /** - * Returns the Seq of fields for a QFunction class. - * @param clazz Class to retrieve fields for. - * @return the fields of the class. - */ - def classFunctionFields(clazz: Class[_]) = classFields(clazz).functionFields + def setFieldValue(source: ArgumentSource, value: Any) = ClassFieldCache.setFieldValue(this, source, value) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala index 686188e72..91cacbb71 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala @@ -25,13 +25,14 @@ package org.broadinstitute.sting.queue.function.scattergather import org.broadinstitute.sting.commandline.ArgumentSource -import org.broadinstitute.sting.queue.function.{QFunction, CommandLineFunction} +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.queue.util.ClassFieldCache /** * Shadow clones another command line function. */ object CloneFunction { - private lazy val cloneFunctionFields = QFunction.classFunctionFields(classOf[CloneFunction]) + private lazy val cloneFunctionFields = ClassFieldCache.classFunctionFields(classOf[CloneFunction]) } class CloneFunction extends CommandLineFunction { @@ -76,7 +77,7 @@ class CloneFunction extends CommandLineFunction { def commandLine = withScatterPart(() => originalFunction.commandLine) def getFieldValue(field: String): AnyRef = { - val source = QFunction.findField(originalFunction.getClass, field) + val source = ClassFieldCache.findField(originalFunction.getClass, field) getFieldValue(source) } @@ -98,7 +99,7 @@ class CloneFunction extends CommandLineFunction { } def setFieldValue(field: String, value: Any) { - val source = QFunction.findField(originalFunction.getClass, field) + val source = ClassFieldCache.findField(originalFunction.getClass, field) setFieldValue(source, value) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala b/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala new file mode 100644 index 000000000..870dd5617 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala @@ -0,0 +1,183 @@ +package org.broadinstitute.sting.queue.util + +import org.broadinstitute.sting.commandline._ +import scala.Some +import org.broadinstitute.sting.queue.QException +import collection.JavaConversions._ +import java.io.File + +/** + * Utilities and a static cache of argument fields for various classes populated by the parsingEngine. + * Because this class works with the ParsingEngine it can walk @ArgumentCollection hierarchies. + */ +object ClassFieldCache { + var parsingEngine: ParsingEngine = _ + + + // + // Field caching + // + + /** + * The list of fields defined on a class + * @param clazz The class to lookup fields. + */ + private class ClassFields(clazz: Class[_]) { + /** The complete list of fields on this CommandLineFunction. */ + val functionFields: Seq[ArgumentSource] = parsingEngine.extractArgumentSources(clazz).toSeq + /** The @Input fields on this CommandLineFunction. */ + val inputFields: Seq[ArgumentSource] = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input])) + /** The @Output fields on this CommandLineFunction. */ + val outputFields: Seq[ArgumentSource] = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Output])) + /** The @Argument fields on this CommandLineFunction. */ + val argumentFields: Seq[ArgumentSource] = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Argument])) + } + + /** + * The mapping from class to fields. + */ + private var classFieldsMap = Map.empty[Class[_], ClassFields] + + /** + * Returns the fields for a class. + * @param clazz Class to retrieve fields for. + * @return the fields for the class. + */ + private def classFields(clazz: Class[_]): ClassFields = { + classFieldsMap.get(clazz) match { + case Some(classFields) => classFields + case None => + val classFields = new ClassFields(clazz) + classFieldsMap += clazz -> classFields + classFields + } + } + + /** + * Returns the field on clazz. + * @param clazz Class to search. + * @param name Name of the field to return. + * @return Argument source for the field. + */ + def findField(clazz: Class[_], name: String): ArgumentSource = { + classFields(clazz).functionFields.find(_.field.getName == name) match { + case Some(source) => source + case None => throw new QException("Could not find a field on class %s with name %s".format(clazz, name)) + } + } + + /** + * Returns the Seq of fields for a QFunction class. + * @param clazz Class to retrieve fields for. + * @return the fields of the class. + */ + def classFunctionFields(clazz: Class[_]): Seq[ArgumentSource] = classFields(clazz).functionFields + + /** + * Returns the Seq of inputs for a QFunction class. + * @param clazz Class to retrieve inputs for. + * @return the inputs of the class. + */ + def classInputFields(clazz: Class[_]): Seq[ArgumentSource] = classFields(clazz).inputFields + + /** + * Returns the Seq of outputs for a QFunction class. + * @param clazz Class to retrieve outputs for. + * @return the outputs of the class. + */ + def classOutputFields(clazz: Class[_]): Seq[ArgumentSource] = classFields(clazz).outputFields + + /** + * Returns the Seq of arguments for a QFunction class. + * @param clazz Class to retrieve arguments for. + * @return the arguments of the class. + */ + def classArgumentFields(clazz: Class[_]): Seq[ArgumentSource] = classFields(clazz).argumentFields + + + // + // get/set fields as AnyRef + // + + /** + * Gets the value of a field. + * @param obj Top level object storing the source info. + * @param source Field to get the value for. + * @return value of the field. + */ + def getFieldValue(obj: AnyRef, source: ArgumentSource) = ReflectionUtils.getValue(invokeObj(obj, source), source.field) + + /** + * Gets the value of a field. + * @param obj Top level object storing the source info. + * @param source Field to set the value for. + * @return value of the field. + */ + def setFieldValue(obj: AnyRef, source: ArgumentSource, value: Any) = ReflectionUtils.setValue(invokeObj(obj, source), source.field, value) + + /** + * Walks gets the fields in this object or any collections in that object + * recursively to find the object holding the field to be retrieved or set. + * @param obj Top level object storing the source info. + * @param source Field find the invoke object for. + * @return Object to invoke the field on. + */ + private def invokeObj(obj: AnyRef, source: ArgumentSource) = source.parentFields.foldLeft[AnyRef](obj)(ReflectionUtils.getValue(_, _)) + + + // + // get/set fields as java.io.File + // + + /** + * Gets the files from the fields. The fields must be a File, a FileExtension, or a Seq or Set of either. + * @param obj Top level object storing the source info. + * @param fields Fields to get files. + * @return for the fields. + */ + def getFieldFiles(obj: AnyRef, fields: Seq[ArgumentSource]): Seq[File] = { + var files: Seq[File] = Nil + for (field <- fields) + files ++= getFieldFiles(obj, field) + files.distinct + } + + /** + * Gets the files from the field. The field must be a File, a FileExtension, or a Seq or Set of either. + * @param obj Top level object storing the source info. + * @param field Field to get files. + * @return for the field. + */ + def getFieldFiles(obj: AnyRef, field: ArgumentSource): Seq[File] = { + var files: Seq[File] = Nil + CollectionUtils.foreach(getFieldValue(obj, field), (fieldValue) => { + val file = fieldValueToFile(field, fieldValue) + if (file != null) + files :+= file + }) + files.distinct + } + + /** + * Gets the file from the field. The field must be a File or a FileExtension and not a Seq or Set. + * @param obj Top level object storing the source info. + * @param field Field to get the file. + * @return for the field. + */ + def getFieldFile(obj: AnyRef, field: ArgumentSource): File = + fieldValueToFile(field, getFieldValue(obj, field)) + + /** + * Converts the field value to a file. The field must be a File or a FileExtension. + * @param field Field to get the file. + * @param value Value of the File or FileExtension or null. + * @return Null if value is null, otherwise the File. + * @throws QException if the value is not a File or FileExtension. + */ + private def fieldValueToFile(field: ArgumentSource, value: Any): File = value match { + case file: File => file + case null => null + case unknown => throw new QException("Non-file found. Try removing the annotation, change the annotation to @Argument, or extend File with FileExtension: %s: %s".format(field.field, unknown)) + } + +} From 0196dbeacac298f04c65e024ab5addd137205d9e Mon Sep 17 00:00:00 2001 From: kshakir Date: Wed, 17 Oct 2012 09:52:17 -0400 Subject: [PATCH 80/93] Added more logging to push/pull of RemoteFiles. --- .../src/org/broadinstitute/sting/queue/QScript.scala | 10 ++++++++-- .../broadinstitute/sting/queue/util/RemoteFile.scala | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala index ee2089dc5..2dcfb916c 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -110,12 +110,18 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon def pullInputs() { val inputs = ClassFieldCache.getFieldFiles(this, inputFields) - filterRemoteFiles(inputs).foreach(_.pullToLocal()) + for (remoteFile <- filterRemoteFiles(inputs)) { + logger.info("Pulling %s from %s".format(remoteFile.getAbsolutePath, remoteFile.remoteDescription)) + remoteFile.pullToLocal() + } } def pushOutputs() { val outputs = ClassFieldCache.getFieldFiles(this, outputFields) - filterRemoteFiles(outputs).foreach(_.pushToRemote()) + for (remoteFile <- filterRemoteFiles(outputs)) { + logger.info("Pushing %s to %s".format(remoteFile.getAbsolutePath, remoteFile.remoteDescription)) + remoteFile.pushToRemote() + } } def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] = diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala b/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala index cfe848ba8..9d94975ba 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala @@ -10,4 +10,5 @@ trait RemoteFile extends File with FileExtension { def pullToLocal() def pushToRemote() def deleteRemote() + def remoteDescription: String } From c9e7a947c26e0e632b3161eb457408fd75339717 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Oct 2012 08:34:47 -0400 Subject: [PATCH 82/93] Improve interface of ExactCallLogger, use it to have a more informative AFCalcPerformanceTest --- .../afcalc/AFCalcPerformanceTest.java | 21 +++++- .../genotyper/afcalc/AFCalcResult.java | 13 ++++ .../genotyper/afcalc/ExactCallLogger.java | 73 +++++++++++-------- 3 files changed, 73 insertions(+), 34 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java index fab26c9d2..e9ed6b153 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java @@ -10,6 +10,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; @@ -225,12 +226,24 @@ public class AFCalcPerformanceTest { final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1, AFCalcFactory.Calculation.EXACT_INDEPENDENT, AFCalcTestBuilder.PriorType.human); + logger.info(call); final SimpleTimer timer = new SimpleTimer().start(); final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors()); - call.newNanoTime = timer.getElapsedTimeNano(); - call.newPNonRef = result.getLog10PosteriorOfAFGT0(); - logger.info(call); - logger.info("\t\t" + result); + final long newNanoTime = timer.getElapsedTimeNano(); + if ( call.originalCall.anyPolymorphic(-1) || result.anyPolymorphic(-1) ) { + logger.info("**** ONE IS POLY"); + } + logger.info("\t\t getLog10PosteriorOfAFGT0: " + call.originalCall.getLog10PosteriorOfAFGT0() + " vs " + result.getLog10PosteriorOfAFGT0()); + final double speedup = call.runtime / (1.0 * newNanoTime); + logger.info("\t\t runtime: " + call.runtime + " vs " + newNanoTime + " speedup " + String.format("%.2f", speedup) + "x"); + for ( final Allele a : call.originalCall.getAllelesUsedInGenotyping() ) { + if ( a.isNonReference() ) { + final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : ""; + logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE); + final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : ""; + logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost); + } + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index c737416c5..7cacb2060 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -238,6 +238,19 @@ public class AFCalcResult { return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef; } + /** + * Are any of the alleles polymorphic w.r.t. #isPolymorphic? + * + * @param log10minPNonRef the confidence threshold, in log10 space + * @return true if any are poly, false otherwise + */ + public boolean anyPolymorphic(final double log10minPNonRef) { + for ( final Allele a : getAllelesUsedInGenotyping() ) + if ( a.isNonReference() && isPolymorphic(a, log10minPNonRef) ) + return true; + return false; + } + /** * Returns the log10 probability that allele is segregating * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java index 3794ba240..9394c99d7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java @@ -1,20 +1,18 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; -import org.broadinstitute.sting.utils.AutoFormattingTime; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; +import com.google.java.contract.Requires; +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; import java.io.*; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedList; -import java.util.List; +import java.util.*; /** * Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format + * + * Once opened, calls can be writen to disk with printCallInfo */ public class ExactCallLogger implements Cloneable { private PrintStream callReport = null; @@ -38,32 +36,28 @@ public class ExactCallLogger implements Cloneable { */ public static class ExactCall { final VariantContext vc; - final long origNanoTime; - long newNanoTime = -1; - final double origPNonRef; - double newPNonRef = -1; + final long runtime; + final AFCalcResult originalCall; - public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) { + public ExactCall(VariantContext vc, final long runtime, final AFCalcResult originalCall) { this.vc = vc; - this.origNanoTime = origNanoTime; - this.origPNonRef = origPNonRef; + this.runtime = runtime; + this.originalCall = originalCall; } @Override public String toString() { - return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s", + return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s", vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(), - origPNonRef, - new AutoFormattingTime(origNanoTime / 1e9).toString(), - newPNonRef, - newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString()); + originalCall.getLog10PosteriorOfAFGT0(), + new AutoFormattingTime(runtime / 1e9).toString()); } } - protected void printCallInfo(final VariantContext vc, - final double[] log10AlleleFrequencyPriors, - final long runtimeNano, - final AFCalcResult result) { + protected final void printCallInfo(final VariantContext vc, + final double[] log10AlleleFrequencyPriors, + final long runtimeNano, + final AFCalcResult result) { printCallElement(vc, "type", "ignore", vc.getType()); int allelei = 0; @@ -90,6 +84,7 @@ public class ExactCallLogger implements Cloneable { callReport.flush(); } + @Requires({"vc != null", "variable != null", "key != null", "value != null", "callReport != null"}) private void printCallElement(final VariantContext vc, final Object variable, final Object key, @@ -102,10 +97,10 @@ public class ExactCallLogger implements Cloneable { * Read in a list of ExactCall objects from reader, keeping only those * with starts in startsToKeep or all sites (if this is empty) * - * @param reader - * @param startsToKeep - * @param parser - * @return + * @param reader a just-opened reader sitting at the start of the file + * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should be kept + * @param parser a genome loc parser to create genome locs + * @return a list of ExactCall objects in reader * @throws IOException */ public static List readExactLog(final BufferedReader reader, final List startsToKeep, GenomeLocParser parser) throws IOException { @@ -118,10 +113,17 @@ public class ExactCallLogger implements Cloneable { // skip the header line reader.readLine(); + // skip the first "type" line + reader.readLine(); + while (true) { final VariantContextBuilder builder = new VariantContextBuilder(); final List alleles = new ArrayList(); final List genotypes = new ArrayList(); + final double[] posteriors = new double[2]; + final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true); + final List mle = new ArrayList(); + final Map log10pNonRefByAllele = new HashMap(); long runtimeNano = -1; GenomeLoc currentLoc = null; @@ -139,13 +141,15 @@ public class ExactCallLogger implements Cloneable { if (currentLoc == null) currentLoc = lineLoc; - if (variable.equals("log10PosteriorOfAFzero")) { + if (variable.equals("type")) { if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) { builder.alleles(alleles); final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); builder.genotypes(genotypes); - calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value))); + final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{})); + final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele); + calls.add(new ExactCall(builder.make(), runtimeNano, result)); } break; } else if (variable.equals("allele")) { @@ -155,6 +159,15 @@ public class ExactCallLogger implements Cloneable { final GenotypeBuilder gb = new GenotypeBuilder(key); gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); genotypes.add(gb.make()); + } else if (variable.equals("log10PosteriorOfAFEq0")) { + posteriors[0] = Double.valueOf(value); + } else if (variable.equals("log10PosteriorOfAFGt0")) { + posteriors[1] = Double.valueOf(value); + } else if (variable.equals("MLE")) { + mle.add(Integer.valueOf(value)); + } else if (variable.equals("pNonRefByAllele")) { + final Allele a = Allele.create(key); + log10pNonRefByAllele.put(a, Double.valueOf(value)); } else if (variable.equals("runtime.nano")) { runtimeNano = Long.valueOf(value); } else { From fa93681f513992b387387ec018f24d66232cc2dd Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Oct 2012 14:14:41 -0400 Subject: [PATCH 83/93] Scalability test for EXACT models --- .../genotyper/afcalc/AFCalcTestBuilder.java | 6 +- .../afcalc/AFCalcPerformanceUnitTest.java | 87 +++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java index b4d105507..cfb67164d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java @@ -45,12 +45,16 @@ public class AFCalcTestBuilder { human } + public int getNumAltAlleles() { + return numAltAlleles; + } + public int getnSamples() { return nSamples; } public AFCalc makeModel() { - return AFCalcFactory.createAFCalc(modelType, nSamples, 4, 4, 2); + return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), getNumAltAlleles(), 2); } public double[] makePriors() { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java new file mode 100644 index 000000000..556b7451f --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java @@ -0,0 +1,87 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class AFCalcPerformanceUnitTest extends BaseTest { + @DataProvider(name = "ScalingTests") + public Object[][] makepolyTestProviderLotsOfAlleles() { + List tests = new ArrayList(); + + // list of all high-quality models in the system + final List biAllelicModels = Arrays.asList( + AFCalcFactory.Calculation.EXACT_INDEPENDENT, + AFCalcFactory.Calculation.EXACT_REFERENCE); + + final List multiAllelicModels = Arrays.asList( + AFCalcFactory.Calculation.EXACT_INDEPENDENT); + +// for ( final int nonTypePLs : Arrays.asList(100) ) { +// for ( final int nSamples : Arrays.asList(10000) ) { +// final List alleleCounts = Arrays.asList(50); +// for ( final int nAltAlleles : Arrays.asList(1) ) { + for ( final int nonTypePLs : Arrays.asList(100) ) { + for ( final int nSamples : Arrays.asList(100, 1000) ) { + final List alleleCounts = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 50, 500); + for ( final int nAltAlleles : Arrays.asList(1, 2, 3) ) { + final List models = nAltAlleles > 1 ? multiAllelicModels : biAllelicModels; + for ( final AFCalcFactory.Calculation model : models ) { + for ( final List ACs : Utils.makePermutations(alleleCounts, nAltAlleles, true) ) { + if ( MathUtils.sum(ACs) < nSamples * 2 ) { + final AFCalcTestBuilder testBuilder + = new AFCalcTestBuilder(nSamples, nAltAlleles, model, AFCalcTestBuilder.PriorType.human); + tests.add(new Object[]{testBuilder, ACs, nonTypePLs}); + } + } + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + private Pair estNumberOfEvaluations(final AFCalcTestBuilder testBuilder, final VariantContext vc, final int nonTypePL) { + final int evalOverhead = 2; // 2 + final int maxEvalsPerSamplePerAC = 3; + + int minEvals = 0, maxEvals = 0; + + for ( final Allele alt : vc.getAlternateAlleles() ) { + final int AC = vc.getCalledChrCount(alt); + minEvals += AC + evalOverhead; // everyone is hom-var + maxEvals += AC * maxEvalsPerSamplePerAC + 10; + } + + return new Pair(minEvals, maxEvals); + } + + @Test(dataProvider = "ScalingTests") + private void testScaling(final AFCalcTestBuilder testBuilder, final List ACs, final int nonTypePL) { + final AFCalc calc = testBuilder.makeModel(); + final double[] priors = testBuilder.makePriors(); + final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL); + final AFCalcResult result = calc.getLog10PNonRef(vc, priors); + final Pair expectedNEvaluation = estNumberOfEvaluations(testBuilder, vc, nonTypePL); + final int minEvals = expectedNEvaluation.getFirst(); + final int maxEvals = expectedNEvaluation.getSecond(); + + logger.warn(" min " + minEvals + " obs " + result.getnEvaluations() + " max " + maxEvals + " for test " + testBuilder + " sum(ACs)=" + (int)MathUtils.sum(ACs)); + + Assert.assertTrue(result.getnEvaluations() >= minEvals, + "Actual number of evaluations " + result.getnEvaluations() + " < min number of evals " + minEvals); + Assert.assertTrue(result.getnEvaluations() <= maxEvals, + "Actual number of evaluations " + result.getnEvaluations() + " > max number of evals " + minEvals); + } +} \ No newline at end of file From 8288c30e364b1aa858ef56411195fcdef9364b99 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Oct 2012 14:14:55 -0400 Subject: [PATCH 84/93] Use buffered output for ExactCallLogger --- .../sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java index 9394c99d7..f13fe4429 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java @@ -24,7 +24,7 @@ public class ExactCallLogger implements Cloneable { */ public ExactCallLogger(final File outputFile) { try { - callReport = new PrintStream(new FileOutputStream(outputFile)); + callReport = new PrintStream(new BufferedOutputStream(new FileOutputStream(outputFile), 10000000)); callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value"))); } catch (FileNotFoundException e) { throw new UserException.CouldNotCreateOutputFile(outputFile, e); From 97abb98c0bb98e27337449f1d3125c97554e56ae Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 18 Oct 2012 10:19:21 -0400 Subject: [PATCH 85/93] Bugfix for bad nt / nct argument detection in MicroScheduler --- .../sting/gatk/executive/MicroScheduler.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 07d9df79a..223e11680 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -157,18 +157,22 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { if ( ! (walker instanceof TreeReducible) ) { throw badNT("nt", engine, walker); - } else { - return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation); } + } + + if ( threadAllocation.getNumCPUThreadsPerDataThread() > 1 && ! (walker instanceof NanoSchedulable) ) { + throw badNT("nct", engine, walker); + } + + if ( threadAllocation.getNumDataThreads() > 1 ) { + return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation); } else { - if ( threadAllocation.getNumCPUThreadsPerDataThread() > 1 && ! (walker instanceof NanoSchedulable) ) - throw badNT("nct", engine, walker); return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation); } } private static UserException badNT(final String parallelArg, final GenomeAnalysisEngine engine, final Walker walker) { - throw new UserException.BadArgumentValue("nt", + throw new UserException.BadArgumentValue(parallelArg, String.format("The analysis %s currently does not support parallel execution with %s. " + "Please run your analysis without the %s option.", engine.getWalkerName(walker.getClass()), parallelArg, parallelArg)); } From f20fa9d0823b031f7c7a6d952d54da3804954598 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 18 Oct 2012 10:19:34 -0400 Subject: [PATCH 86/93] SelectVariants is actually NanoSchedulable --- .../sting/gatk/walkers/variantutils/SelectVariants.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 15c17988c..98f56be1f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.NanoSchedulable; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts; @@ -42,11 +43,11 @@ import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import java.io.File; import java.io.FileNotFoundException; @@ -188,7 +189,7 @@ import java.util.*; * */ @DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) -public class SelectVariants extends RodWalker implements TreeReducible { +public class SelectVariants extends RodWalker implements TreeReducible, NanoSchedulable { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); /** From d3fc797cfe2af278d88f06f3580f3247818a8918 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 18 Oct 2012 10:42:20 -0400 Subject: [PATCH 87/93] SelectVariants is actually *NOT* NanoSchedulable --- .../gatk/walkers/variantutils/SelectVariants.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 98f56be1f..c7b1d0fc7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.NanoSchedulable; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts; @@ -189,7 +188,7 @@ import java.util.*; * */ @DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) -public class SelectVariants extends RodWalker implements TreeReducible, NanoSchedulable { +public class SelectVariants extends RodWalker implements TreeReducible { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); /** @@ -543,9 +542,11 @@ public class SelectVariants extends RodWalker implements TreeR VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS); if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) { - final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(sub)).filters(sub.getFiltersMaybeNull()); - addAnnotations(builder, sub); - sub = builder.make(); + synchronized (UG_engine) { + final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(sub)).filters(sub.getFiltersMaybeNull()); + addAnnotations(builder, sub); + sub = builder.make(); + } } if ( (!EXCLUDE_NON_VARIANTS || sub.isPolymorphicInSamples()) && (!EXCLUDE_FILTERED || !sub.isFiltered()) ) { From 3504f71b6b6be2ac7b97a5886c3609ca4c2caee6 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 18 Oct 2012 13:58:38 -0400 Subject: [PATCH 89/93] Fixing a null pointer exception bug for DEV-10 --- .../gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 2ea4bdfb0..00c6ddae8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -123,13 +123,13 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default - if(writerFileName.asFile() == null && generateMD5) + if(writerFileName != null && writerFileName.asFile() == null && generateMD5) throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside."); // Create the stub and set parameters. SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream); - if ( writerFileName.asFile() != null ) { + if (writerFileName != null && writerFileName.asFile() != null ) { stub = new SAMFileWriterStub(engine, writerFileName.asFile()); if ( compressionLevel != null ) From b4e69239dd0504ac7dc088c5a7726bb6b66e63a6 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 18 Oct 2012 14:31:15 -0400 Subject: [PATCH 90/93] In order to be considered an informative read in the PerReadAlleleLikelihoodMap it has to be informative compared to all other alleles not just the worst allele. Also, fixing a bug when there is only one allele in the map. --- ...GenotyperGeneralPloidyIntegrationTest.java | 4 ++-- .../HaplotypeCallerIntegrationTest.java | 4 ++-- .../genotyper/PerReadAlleleLikelihoodMap.java | 19 ++++++++----------- .../UnifiedGenotyperIntegrationTest.java | 11 +++++------ 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index 219c36a05..989f06ec5 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -70,12 +70,12 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","7d6f319b9edcb1ff8c290fef150a2df8"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9acfe0019efdc91217ee070acb071228"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","dd02890123e07e7412a49475cb6280f1"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","c1d4dd793f61710a1b1fc5d82803210f"); } @Test(enabled = true) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index be8fd2fb2..a8ea4b7da 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -21,7 +21,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "8c52c0955099cca3215a0d78fd455894"); + HCTest(CEUTRIO_BAM, "", "75013fa6a884104f0b1797502b636698"); } @Test @@ -31,7 +31,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { - HCTest(CEUTRIO_BAM, "--max_alternate_alleles_for_indels 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "61c1a0fb62d909229af6b5a91dad8b35"); + HCTest(CEUTRIO_BAM, "--max_alternate_alleles_for_indels 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "3cd3363976b1937d801f9f82996f4abe"); } private void HCTestComplexVariants(String bam, String args, String md5) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerReadAlleleLikelihoodMap.java index 9c0062876..a83adc275 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerReadAlleleLikelihoodMap.java @@ -113,23 +113,20 @@ public class PerReadAlleleLikelihoodMap { return likelihoodReadMap.get(p.getRead()); } - public static Allele getMostLikelyAllele(Map alleleMap) { - double minLike = Double.POSITIVE_INFINITY, maxLike = Double.NEGATIVE_INFINITY; + public static Allele getMostLikelyAllele( final Map alleleMap ) { + double maxLike = Double.NEGATIVE_INFINITY; + double prevMaxLike = Double.NEGATIVE_INFINITY; Allele mostLikelyAllele = Allele.NO_CALL; - for (Map.Entry el : alleleMap.entrySet()) { + for (final Map.Entry el : alleleMap.entrySet()) { if (el.getValue() > maxLike) { + prevMaxLike = maxLike; maxLike = el.getValue(); mostLikelyAllele = el.getKey(); + } else if( el.getValue() > prevMaxLike ) { + prevMaxLike = el.getValue(); } - - if (el.getValue() < minLike) - minLike = el.getValue(); - } - if (maxLike-minLike > INDEL_LIKELIHOOD_THRESH) - return mostLikelyAllele; - else - return Allele.NO_CALL; + return (maxLike - prevMaxLike > INDEL_LIKELIHOOD_THRESH ? mostLikelyAllele : Allele.NO_CALL ); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index df088a4ad..72724e46a 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("26af30187316f742878c85f0ed091837")); + Arrays.asList("48b4f4b05461be276bffc91350f08cbc")); executeTest("test Multiple SNP alleles", spec); } @@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("aa9cf96ab8f5aa844387e3aef1f27249")); + Arrays.asList("04affcc9d720ee17bc221759707e0cd2")); executeTest("test reverse trim", spec); } @@ -84,7 +84,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("d210ee1baa75dd4a0c63aef6b1fa7a8a")); + Arrays.asList("112e7bedfd284d4d9390aa006118c733")); executeTest("test mismatched PLs", spec); } @@ -343,13 +343,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("7fc488fe16dea9f023bfcfdaa908a548")); + Arrays.asList("863ee56b3594f09795644127f2f9539f")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("f3ff7fe0f15f31eadd726c711d6bf3de")); + Arrays.asList("503ca1b75cc7b2679eaa80f7b5e7ef1c")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -452,5 +452,4 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { Arrays.asList("bbf16e1873e525ee5975021cfb8988cf")); executeTest("test calling on a ReducedRead BAM", spec); } - } From 3db38c5a93d80ede80cb8eb53bb630a1a6ee9490 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 18 Oct 2012 15:42:14 -0400 Subject: [PATCH 92/93] Bug fix: inbreeding coeff shouldn't be computed in ref-only sites --- .../sting/gatk/walkers/annotator/InbreedingCoeff.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java index 64be64afa..9a4de3c36 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java @@ -48,7 +48,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno private Map calculateIC(final VariantContext vc) { final GenotypesContext genotypes = (founderIds == null || founderIds.isEmpty()) ? vc.getGenotypes() : vc.getGenotypes(founderIds); - if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) + if ( genotypes == null || genotypes.size() < MIN_SAMPLES || !vc.isVariant()) return null; int idxAA = 0, idxAB = 1, idxBB = 2; From 403654d40aa8cafc05061ce09f0678e3ce55e768 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Thu, 18 Oct 2012 16:57:15 -0400 Subject: [PATCH 93/93] Fixed null checkes in ArgumentTypeDescriptor due to ArgumentMatchValue updates. Fixed @Arguments such as scatter count that were labeled as java.io.File via incorrect @Input annotation. --- .../commandline/ArgumentTypeDescriptor.java | 6 ++--- .../qscripts/DataProcessingPipeline.scala | 25 ++++++++++--------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 4b9774806..54ade61f6 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -532,7 +532,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { Object[] vals = type.getEnumConstants(); Object defaultEnumeration = null; // as we look at options, record the default option if it exists for (Object val : vals) { - if (String.valueOf(val).equalsIgnoreCase(value.asString())) return val; + if (String.valueOf(val).equalsIgnoreCase(value == null ? null : value.asString())) return val; try { if (type.getField(val.toString()).isAnnotationPresent(EnumerationArgumentDefault.class)) defaultEnumeration = val; } catch (NoSuchFieldException e) { throw new ReviewedStingException("parsing " + type.toString() + "doesn't contain the field " + val.toString()); } } @@ -546,10 +546,10 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { else throw new UnknownEnumeratedValueException(createDefaultArgumentDefinition(source),value.asString()); } else if (type.equals(File.class)) { - result = value.asFile(); + result = value == null ? null : value.asFile(); } else { Constructor ctor = type.getConstructor(String.class); - result = ctor.newInstance(value.asString()); + result = ctor.newInstance(value == null ? null : value.asString()); } } catch (UserException e) { throw e; diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 56f6460fb..165e6a4e9 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -13,6 +13,7 @@ import net.sf.samtools.SAMFileHeader.SortOrder import org.broadinstitute.sting.queue.util.QScriptUtils import org.broadinstitute.sting.queue.function.ListWriterFunction import org.broadinstitute.sting.commandline.Hidden +import org.broadinstitute.sting.commandline class DataProcessingPipeline extends QScript { qscript => @@ -41,34 +42,34 @@ class DataProcessingPipeline extends QScript { @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false) var bwaPath: File = _ - @Input(doc="the project name determines the final output (BAM file) base name. Example NA12878 yields NA12878.processed.bam", fullName="project", shortName="p", required=false) + @Argument(doc="the project name determines the final output (BAM file) base name. Example NA12878 yields NA12878.processed.bam", fullName="project", shortName="p", required=false) var projectName: String = "project" - @Input(doc="Output path for the processed BAM files.", fullName="output_directory", shortName="outputDir", required=false) + @Argument(doc="Output path for the processed BAM files.", fullName="output_directory", shortName="outputDir", required=false) var outputDir: String = "" - @Input(doc="the -L interval string to be used by GATK - output bams at interval only", fullName="gatk_interval_string", shortName="L", required=false) + @Argument(doc="the -L interval string to be used by GATK - output bams at interval only", fullName="gatk_interval_string", shortName="L", required=false) var intervalString: String = "" @Input(doc="an intervals file to be used by GATK - output bams at intervals only", fullName="gatk_interval_file", shortName="intervals", required=false) var intervals: File = _ - @Input(doc="Cleaning model: KNOWNS_ONLY, USE_READS or USE_SW", fullName="clean_model", shortName="cm", required=false) + @Argument(doc="Cleaning model: KNOWNS_ONLY, USE_READS or USE_SW", fullName="clean_model", shortName="cm", required=false) var cleaningModel: String = "USE_READS" - @Input(doc="Decompose input BAM file and fully realign it using BWA and assume Single Ended reads", fullName="use_bwa_single_ended", shortName="bwase", required=false) + @Argument(doc="Decompose input BAM file and fully realign it using BWA and assume Single Ended reads", fullName="use_bwa_single_ended", shortName="bwase", required=false) var useBWAse: Boolean = false - @Input(doc="Decompose input BAM file and fully realign it using BWA and assume Pair Ended reads", fullName="use_bwa_pair_ended", shortName="bwape", required=false) + @Argument(doc="Decompose input BAM file and fully realign it using BWA and assume Pair Ended reads", fullName="use_bwa_pair_ended", shortName="bwape", required=false) var useBWApe: Boolean = false - @Input(doc="Decompose input BAM file and fully realign it using BWA SW", fullName="use_bwa_sw", shortName="bwasw", required=false) + @Argument(doc="Decompose input BAM file and fully realign it using BWA SW", fullName="use_bwa_sw", shortName="bwasw", required=false) var useBWAsw: Boolean = false - @Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false) + @Argument(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false) var bwaThreads: Int = 1 - @Input(doc="Perform validation on the BAM files", fullName="validation", shortName="vs", required=false) + @Argument(doc="Perform validation on the BAM files", fullName="validation", shortName="vs", required=false) var validation: Boolean = false @@ -76,15 +77,15 @@ class DataProcessingPipeline extends QScript { * Hidden Parameters ****************************************************************************/ @Hidden - @Input(doc="How many ways to scatter/gather", fullName="scatter_gather", shortName="sg", required=false) + @Argument(doc="How many ways to scatter/gather", fullName="scatter_gather", shortName="sg", required=false) var nContigs: Int = -1 @Hidden - @Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false) + @Argument(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false) var defaultPlatform: String = "" @Hidden - @Input(doc="Run the pipeline in test mode only", fullName = "test_mode", shortName = "test", required=false) + @Argument(doc="Run the pipeline in test mode only", fullName = "test_mode", shortName = "test", required=false) var testMode: Boolean = false