From ca8458644371eb47596a22a385b92679924a9b33 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 16:15:57 -0400 Subject: [PATCH 01/43] Adding default intellij configuration files --- .idea/.name | 1 + .idea/ant.xml | 15 + .idea/codeStyleSettings.xml | 13 + .idea/compiler.xml | 21 + .idea/copyright/profiles_settings.xml | 5 + .idea/encodings.xml | 5 + .idea/highlighting.xml | 8 + .idea/inspectionProfiles/Project_Default.xml | 11 + .../inspectionProfiles/profiles_settings.xml | 7 + .idea/libraries/GATK_libraries.xml | 13 + .idea/misc.xml | 32 ++ .idea/modules.xml | 9 + .idea/scopes/scope_settings.xml | 5 + .idea/uiDesigner.xml | 125 ++++++ .idea/vcs.xml | 10 + .idea/workspace.xml | 386 ++++++++++++++++++ cmi-gatk.iml | 23 ++ 17 files changed, 689 insertions(+) create mode 100644 .idea/.name create mode 100644 .idea/ant.xml create mode 100644 .idea/codeStyleSettings.xml create mode 100644 .idea/compiler.xml create mode 100644 .idea/copyright/profiles_settings.xml create mode 100644 .idea/encodings.xml create mode 100644 .idea/highlighting.xml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/libraries/GATK_libraries.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/scopes/scope_settings.xml create mode 100644 .idea/uiDesigner.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml create mode 100644 cmi-gatk.iml diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 000000000..7014f65a5 --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +cmi-gatk \ No newline at end of file diff --git a/.idea/ant.xml b/.idea/ant.xml new file mode 100644 index 000000000..4674eeac9 --- /dev/null +++ b/.idea/ant.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml new file mode 100644 index 000000000..9178b389f --- /dev/null +++ b/.idea/codeStyleSettings.xml @@ -0,0 +1,13 @@ + + + + + + + diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 000000000..ded2e9a1d --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,21 @@ + + + + + + diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml new file mode 100644 index 000000000..3572571ad --- /dev/null +++ b/.idea/copyright/profiles_settings.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 000000000..e206d70d8 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/.idea/highlighting.xml b/.idea/highlighting.xml new file mode 100644 index 000000000..f33b64d94 --- /dev/null +++ b/.idea/highlighting.xml @@ -0,0 +1,8 @@ + + + + + + diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 000000000..b8c243dbe --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,11 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 000000000..3b312839b --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml new file mode 100644 index 000000000..970d0a3dc --- /dev/null +++ b/.idea/libraries/GATK_libraries.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 000000000..afd7f3778 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,32 @@ + + + + + + + + + + http://www.w3.org/1999/xhtml + + + + + + + diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 000000000..09caa2933 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml new file mode 100644 index 000000000..922003b84 --- /dev/null +++ b/.idea/scopes/scope_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml new file mode 100644 index 000000000..3b0002030 --- /dev/null +++ b/.idea/uiDesigner.xml @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 000000000..cbc984988 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,10 @@ + + + + + + + + + diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 000000000..87ab79287 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,386 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + localhost + 5050 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cmi-gatk + + + + + + + + GATK libraries + + + + + + + + + diff --git a/cmi-gatk.iml b/cmi-gatk.iml new file mode 100644 index 000000000..e63aff535 --- /dev/null +++ b/cmi-gatk.iml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + From 4aad135f8c14e0d8d60fa4782024b1a5f29dd5dc Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 17:01:17 -0400 Subject: [PATCH 02/43] Generic input file name recognition (still need to implement support to FastQ, but it now can at least accept it) --- .../qscripts/DataProcessingPipeline.scala | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 56f6460fb..c21db30ce 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,6 +96,7 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS + val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -165,12 +166,15 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - // first revert the BAM file to the original qualities - val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") - val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") + val extension = bam.toString.substring(bam.toString.length - 4) + + + + val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") + val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -444,7 +448,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -452,7 +456,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From 4324bd72fdec5b9215ec10a6bd41b60c83135157 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 10:51:53 -0400 Subject: [PATCH 03/43] Updating Intellij enviroment and adding Scala --- .idea/libraries/GATK_libraries.xml | 1 - .idea/misc.xml | 2 +- .idea/workspace.xml | 221 ++++++++++++++++++++++++----- cmi-gatk.iml | 10 +- 4 files changed, 192 insertions(+), 42 deletions(-) diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml index 970d0a3dc..b363bbe6c 100644 --- a/.idea/libraries/GATK_libraries.xml +++ b/.idea/libraries/GATK_libraries.xml @@ -6,7 +6,6 @@ - diff --git a/.idea/misc.xml b/.idea/misc.xml index afd7f3778..a79280c52 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -24,7 +24,7 @@ http://www.w3.org/1999/xhtml - + diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 87ab79287..f6d4567fd 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -1,7 +1,12 @@ - + + + + + + + + + + + + + + - + @@ -112,33 +140,7 @@ - - - - - - - - - - - - - - - - - - - - - + @@ -147,7 +149,7 @@ - + @@ -228,8 +230,9 @@ - + + @@ -286,7 +289,7 @@ + + - + + + + + + + + + + + + + + + + + - + @@ -333,6 +464,18 @@ + + + Detection + + + + + @@ -346,6 +489,7 @@ + 1.6 diff --git a/cmi-gatk.iml b/cmi-gatk.iml index e63aff535..4dbee1336 100644 --- a/cmi-gatk.iml +++ b/cmi-gatk.iml @@ -1,5 +1,13 @@ + + + + + + @@ -17,7 +25,7 @@ - + From 65b100f9b0de9ba03a35f1bb51b1c8e55af92513 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 12:02:34 -0400 Subject: [PATCH 04/43] Reverting the DPP to the original version, going to create a new simplified version for CMI in private. --- .../qscripts/DataProcessingPipeline.scala | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index c21db30ce..56f6460fb 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,7 +96,6 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS - val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -166,15 +165,12 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - val extension = bam.toString.substring(bam.toString.length - 4) - - - - val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") - val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") + // first revert the BAM file to the original qualities + val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") + val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -448,7 +444,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -456,7 +452,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From cb8d4c97e119bd76b382dcb5cc69277700456897 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:13:50 -0400 Subject: [PATCH 05/43] First implementation of a generic 'bundled' Data Processing Pipeline for germline and cancer. not ready for prime time yet! --- .../src/org/broadinstitute/sting/queue/util/QScriptUtils.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 1529d9951..f684e533f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -57,7 +57,8 @@ object QScriptUtils { for (file <- fromFile(in).getLines()) if (!file.startsWith("#") && !file.isEmpty ) list :+= new File(file.trim()) - list.sortWith(_.compareTo(_) < 0) +// list.sortWith(_.compareTo(_) < 0) + list } /** From c9c2682f8688d5978b001d21eee4fd7f111c9350 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:18:44 -0400 Subject: [PATCH 07/43] removing annoying xml from IDEA configuration --- .idea/workspace.xml | 529 -------------------------------------------- 1 file changed, 529 deletions(-) delete mode 100644 .idea/workspace.xml diff --git a/.idea/workspace.xml b/.idea/workspace.xml deleted file mode 100644 index f6d4567fd..000000000 --- a/.idea/workspace.xml +++ /dev/null @@ -1,529 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - localhost - 5050 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Detection - - - - - - - - - - - - - - - 1.6 - - - - - - - - cmi-gatk - - - - - - - - GATK libraries - - - - - - - - - From 3e68fee76489a6667d070210c88aa0e3509ad2a8 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 27 Sep 2012 11:04:56 -0400 Subject: [PATCH 10/43] Removed the intellij files from the root and made an example package for new users. This allows users to start at the same page and then change it as they see fit without interfering with the repo (thanks guillermo!) --- .idea/.name | 1 - .idea/ant.xml | 15 --- .idea/codeStyleSettings.xml | 13 -- .idea/compiler.xml | 21 --- .idea/copyright/profiles_settings.xml | 5 - .idea/encodings.xml | 5 - .idea/highlighting.xml | 8 -- .idea/inspectionProfiles/Project_Default.xml | 11 -- .../inspectionProfiles/profiles_settings.xml | 7 - .idea/libraries/GATK_libraries.xml | 12 -- .idea/misc.xml | 32 ----- .idea/modules.xml | 9 -- .idea/scopes/scope_settings.xml | 5 - .idea/uiDesigner.xml | 125 ------------------ .idea/vcs.xml | 10 -- cmi-gatk.iml | 31 ----- intellij_example.tar.bz2 | Bin 0 -> 7520 bytes 17 files changed, 310 deletions(-) delete mode 100644 .idea/.name delete mode 100644 .idea/ant.xml delete mode 100644 .idea/codeStyleSettings.xml delete mode 100644 .idea/compiler.xml delete mode 100644 .idea/copyright/profiles_settings.xml delete mode 100644 .idea/encodings.xml delete mode 100644 .idea/highlighting.xml delete mode 100644 .idea/inspectionProfiles/Project_Default.xml delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml delete mode 100644 .idea/libraries/GATK_libraries.xml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/scopes/scope_settings.xml delete mode 100644 .idea/uiDesigner.xml delete mode 100644 .idea/vcs.xml delete mode 100644 cmi-gatk.iml create mode 100644 intellij_example.tar.bz2 diff --git a/.idea/.name b/.idea/.name deleted file mode 100644 index 7014f65a5..000000000 --- a/.idea/.name +++ /dev/null @@ -1 +0,0 @@ -cmi-gatk \ No newline at end of file diff --git a/.idea/ant.xml b/.idea/ant.xml deleted file mode 100644 index 4674eeac9..000000000 --- a/.idea/ant.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml deleted file mode 100644 index 9178b389f..000000000 --- a/.idea/codeStyleSettings.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - diff --git a/.idea/compiler.xml b/.idea/compiler.xml deleted file mode 100644 index ded2e9a1d..000000000 --- a/.idea/compiler.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml deleted file mode 100644 index 3572571ad..000000000 --- a/.idea/copyright/profiles_settings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml deleted file mode 100644 index e206d70d8..000000000 --- a/.idea/encodings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/.idea/highlighting.xml b/.idea/highlighting.xml deleted file mode 100644 index f33b64d94..000000000 --- a/.idea/highlighting.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index b8c243dbe..000000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 3b312839b..000000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml deleted file mode 100644 index b363bbe6c..000000000 --- a/.idea/libraries/GATK_libraries.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index a79280c52..000000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - - - - http://www.w3.org/1999/xhtml - - - - - - - diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 09caa2933..000000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml deleted file mode 100644 index 922003b84..000000000 --- a/.idea/scopes/scope_settings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml deleted file mode 100644 index 3b0002030..000000000 --- a/.idea/uiDesigner.xml +++ /dev/null @@ -1,125 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index cbc984988..000000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - diff --git a/cmi-gatk.iml b/cmi-gatk.iml deleted file mode 100644 index 4dbee1336..000000000 --- a/cmi-gatk.iml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/intellij_example.tar.bz2 b/intellij_example.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..bce16045cd1cc476305c5e59d07ff9b94b8e5d73 GIT binary patch literal 7520 zcmV-m9iQStT4*^jL0KkKS)7!cJOD4F|M~yi5CDHu|NsC0|M36+|L{No06+`?06+jh z1{h!`o8{-7h4qB5wR>^y>D#MWyS_S4UPrYW)`~XXKBY)=w?n(T?w=>u4%}F@T^prq zM&}*98+P}1CC>JGY3}jFJo}ytO>BKWz24nKq=%-|-tIdTv1Z%Ou|py0B#;mw z0$?VFLqIe|HdE0LQ`8zCr9V{lAE^M-CYlc@B!L1ZL{xsM>S#SrDYXw!Hq`?_27mwn z0006)At_HHdU~eyLFyU;NYu?W zO%G7?YBUW!LqkS@9-*KBcwhGq&L2>c7{2Bv%KPh~XqovPD`P$wqr0wA{OX>F7ZKmy zDsL?$v;dI_RDWe46~zQU)lvw>NPvKQ{r?Z1(}ww2_)M*fQwCl2HJP1e>Ri?dCZpMN zP=M?SA|wk1NPtvh=J3} zDw+~D0aFk#a8yBk1VvNe{^Q2@ATLqre5hlqO+ zJH6U_g@n*il^HZ-l$Xlw^mBWD_ji=uWfv%71y1==(-ozyjYp|LKHs?f9BLgry=7~; zQzc6vctrO5Ohh{&v$XFjif1`0L{s*Olu^?q8x}#rqzyaoVB#B4g(81Jx};qlYMjj? z2dYy`yjW&t6?wYx0N9Uh!RRNLa}rLdMi`DPNP-P&$TYbGk_IqjZvxK0qQ|cbe@$>KA+XQo15<95gH&cGF>(8JUi?q#;jjQhX7>?$D_)jV*fSGa99hISqAgCirHOBRlz z5-9I|sR$Ai`Yfa3X$-2y?KsEHOGX z@HXt%cKK^%iU{@ItVyi`Mvy~M#VuYFejK)2%tD1XPotWR1C6g7^>_3=M{GBTR=x6} zL$ie9VdKIYD#p_7X2Ij5c!ACsYkPJWj&|WWp}5G;)cO~hDhd@kB;Qp{=JQuL+IA<+ z9G6K-0B_4%_wIgYeR?@~V~!zKJsytphBh&$h7$B()y02uDWc?iHKJ3Oou2*+MfMx^=gIf{X3|lWppAz z-I7p3R)QX!Idb;|10w?J`_#t4+9oPM5Ep6hKa7!-%b*Z%Ne=e{WH2)WCKzJBSF^U& zK`^`eM3i5TFVExdQsTLk;G@qq9{>}@N65UeiIk4 z@~k{rAI=Uv%!sgaYY$=o+l0!JKSDr9R$?^eCXzi64~|&0Ce%+awpg_DqW1T;JnT{d zru{VON@j0;c#A;BkSsr#?|iam4$XGUx$RTb)H_pMY<00{Yt{q!{n-}(G#x&{RZq!9 z?#T+_N>%`kP|_a@fmo2-OJrXX!d!y~G&Dn3dYM^vWSZJ!#jw;(nuR5VhC?^k`y8?h4xtX)+j?PU}@iN>ljD+(Qmb z8dniBZ(_g=D1mLVH)}(M5H0vcNC?Cv1(;-)C)j(xXY2nS+gLe%zZ*W^p1!|_zv=O} z_Q!+DzkQ#@*8b@JY?61J{_8LQ7Wa7yo_=DdhrF->(?ZCJE#uL2ybpcSMKqaVBwSv) zNU6k^_VtahV8JC~!U6{d!bq&!4p|_$D-~)<6%s3$x=9Tg-hN~ZTTL>hPndt*gp7cg z+wR+roDg$1Hu?=(-&o%#Ayf15RN!Z%05j!NTZ&r35#?&$A)o_Kq_+aG2Tmv-^Ti}kzPUMz}gW$fs9 zJC4FgjTw|ACp)ypVq7XolvK9GOTU706q(I(#Eu(UtE5jUBYT&==_1{Y?P%VlGrzMb zBE|_o9do|DW~w@PU%$xNzAis=G|wxF)AvA^EC+4@aDj~7z-#+Yu_mAdbYOG_n;S_f z2MF%vrdGcJHml~P2xvLQOIQJlnTe)5Z8JtNk{R2)nVMB@y3m5+0?^4LZPW(|mtd*? zDv;N1W_Rb+ea4+?lrAx`155T=*|7;B>>wiy;Ml)7#VrgvNDTg;;hzJ_HNI$RTTqf4 zfyMeU1R+7uY;dsXgq*(jmUH&D>*C?4NtOzLb-!$s%bXb_Sue0mN5$wkpEM3kv8IvGA=_?Zzv1%_C1tsyl^1@RKH{a zciZw1@6q0f3Zxbb;ED{RY}JFbgv`jo;vnjTP++40u_0!9X$&Jll(K*-u?XPE#-34@ z(2*iq168t)2LIl%)odnH8EI_Yl*{iaXWjeIC?DX z@F63H#+Z$15ur99k-Q~?3g|8Zc=x8AR4D_lS&{5(Wb(9OWn~6P1uWA@lOmW&8qFIU z7SBN;v4-l*AW>A(2|!UvMJG5MwJuL6M+i`eC_Z*#ky3?khBR#txTdfRi~`O|X#<>q zay;fL4M@CAa10!#6DOq2oZBZ*`QRkS_ry;K+IvB5vHgUgDqb| z3@cbx6TJE0H(;83Aleb%_>debXaSWl^D1it&Z*F@y=N{NvpYj~%SKvwWbjz}(2QH5 zzP2fmjwex!Q0DDKn@@wk;d+8XInO!CB-!he7tkDYlzvB5io3y3~eaI5w=k}~_n&-pi{RD^=|p|INIK%M7^Ju>U^q=8=L%d;bA7r& zav%4cJ$b-?7%3m*P6<7LL;|e%;!{W=n0z(DI%^o;Bei5F?M_70?60j?x99A# z3qM)=cXkVG2kmpPJ8oP#mdXvhiT3f-MRyJ&15Gf(d7`7!iiTN|-|lq+clXLmg&CS~L$9jF&hP0Bm8xWJJK=$OMFB>@>4P6z#Xn3dcffp+LAC z!`V2YWzCFixsCZ_7L;F_QL9T?&p+$c)*hPKS8gSO~VTFj|ro3G{Iuof@swuwn#16JUnNC5#N!PbpVLE zvff%F#RxrAU($v^EeV!2DVBm~xc6xi-$OudTP+k6K2v02fE5E45$L!=gi;#?e7UiT zMhuXoWDk1Yu>gn#4)=DU3)lONdeFJ7$d8PZ2g9kss?ltvQp=qrImnG5r+V3zF4fd( z=B;z~@$<2{hrSQ|A=lv6Q712!p!qTbzdD`k(bs>{2Pp3?yqCWFJqT zzsdD8a3Kh21F_vQhkQ6nV4J6$yI@VnB-^q@1Poy1V4pzHT>!|B!-$_&1xU~WX+}BS| zI+H9JIpulj0|c`K4h+Jv+i8{#sDpzsY3kK&nf6^uxxRKVpRY}nq>h`5p#2eSW1JCB zoQDl9g0bddzNVPmI`lIIC+n3Z-%)6 z)m1D@3`N7B@Sgj)AR36~a4}*AFBJoK>y5WL$p%MGX_9PAb!p#rxvRNmYjZFf_p^MM zS*F2;S$4gmw~WV5MV$7|80zk2IN^|p!(1CUj&QiGuRFLn?P!*xF?*d9(+V$}IXT1} zTha_h(muFR1txBW5H`^S1RVSVru%s#2xv)Akeo8AEDyF+y-8{s&ujwd5cgA z{vZA1IN}ss`1mWy37bexU#GAM?R5hGixGW*YN1F6 zNe^y$@Ot{b0s=j%3iJAec(;rNi2(`#p%tYeftrrHgl%C`y`dwy83`F7-YoY_ox@e$ zL&B&_2$DC2^*`wAL)aSaJF>S~WkZ7Y-Du;z{ zz=U=xW(E3CCn_nOid4kc4!;tEos&Z9t35>kz>s9yLG>gOM({gKT^#n%!cAcT@WPry3G z2V$fF(Bh#$fe>JI^|(v(OqCu~Gq|s3b?7=j5C#Z=5wJo%cpU|9AqFF9LZ;v@MFUXd zs2N){bdrWWIl=TxEeH;2AWyYeEYN~qcFD}6*o@`j5Unt1JJgj_YN^CRhB!bw5Oy*P z1DcHm8GutysIu}&!*1M~s!zPA&Oi}v=#?bEF!B<|)OQu(7P)!*MrromGcLP%0kRC2-f z-Ma&_=fdV8BrOEsXnl*Tw^mqWLHgJM!Kwz2*{PSy3 zu1y0}WCf==J8l%kErz3IvwaFAt0TVM@-$^1MCp&5TKidOUd1p zYf^#|AzIhOeK`tn48Ym)u!~-}_Q+l&S8a}MP zqt<;`1h6{%dl7fl4{{^4=5Z9~GklcT&KW~EA3Cu3&;?mZn=%p-SGX&lf>n(5j&iOY zYQ5cz$?+OcwiV^|gbZ zDl{t&8X}C%;tMPtc~k`|q$FrGfm%YMX(X@VI=uFI0&+ldLE%k6`SmVPkC_HWF>Jjk zUDTu0qvTmpsIKA3Q+lPhpdN}HV1^2u*J2-*FWMiW;5$l`j~ao*gwCP$RM5i#_C_Ex zH)zO<;CO6Q=Wwgery}Pl$N)(r*fN-s03O~z4|h@!W6)yYC(`*Uf$0mj(1PL>cZ3#Z z4TbDihB0%-2YpbP;Si{_nnei&VL3_(rX}Qwue;<9@ZzbMbgFgWaWzMX7*HysutU{s z6$yK$fP%s@0>K!oAjF959~anz@iKxw9tVi2%a|O!4?k>B;`D(%h5u(vcUQi+mcKd#qyCaVfaiWCOtfxqGP5gZ9C@~hnd`7h<>b=e_+%r-y-BvC8wl_K{AeW^0CX@V5&D!>>QsSU^i-+=i>^k-=!j-OGaIbfVpkOo zov0DlGYvzkWdki>W|%{31zZ$_CjdJbXwoesiiTpR&LP}<$G(Ags#wwbgBY)fP=_5T zQ4Xh%0B4=sfb<{^vtZvf!~pRs?Q2_tCyTUQVcvR>^r#ARkO%Guv=4>qj_B@o4jqbM zt7aXvQ8X%2q25s!VhE5U6mV4RggF8x`vC+b>2V@PWF#OH>&BrAM934Jfz;}KW^E!s zf_sHg^L}8s>!NOo>yjx%-@oa32x!;2(Ek5!w#C`ezx9s#*X(t`x1;-X6+ z7Tr_H3_VZlT{4sbK3ug=pGr!XjFT71U-2y+_{;q+T4bJi(b= zap}Jvh}lP|*qy&ou8QcP>pMaQngHZp=&Vk$3eem_f*S=$R8;1V4~ZCnf}#?r8iXVQ z0j{X_oxDt(h>0TcAk-a!;E`5M2WU_pz^PFx0iuHO=~{qY8>t!!C^m$obS{f0N*n5P z90a#3kBG3kQg+Y~XP`YbFs-J>`*$M_gdyA-C_|>(hkfw|+l<+w zf}KDpwJ=nwn0Cr`@CL~cY#kjD!*FzDfO#Xz58x^D4{2FCkP+ji120 z`AK?g`I(q&&XIvsdP8c|-6RwyD9PaUmZHX{h)-f%GL}>`AdodPFC>U>q}W4M$7Yrv zXn_GT_PIBf+CkyE8OEjAE9XU38OMGBuvkEl2#MCGQP6b&cqsd};rC<;+96%VZpibB za5i6A1WKt|DOYLN0p46>D$r5;!~>XE;UL^8L$+U03KV+NtWE_Trs&B5hj{M;05y_z( zIB@{aYt}fs#2_Ui8xH+<1|!JJl0XMS2m}_23jO|IA2?LhL%8il_4O)!cmxPzpqfGY zc__a@$5HV6afV(GYNQppH5ZI_4QL$Cz*M(x41|T~>WW%5dc!&O=BaR(D8S87Uc(Wb z4GAcqP$MD;MhMW2k||&T_-I1Sc7quRcUP6)AC5{qas)meG;bs!BEU|AAWLmX=fy+6 z$p=H}qdcCy^{S?BA;kyA^aH=II~+xSl}Oz`ntcF$U;6-Xk#{x0N-aG@bNDK>cMb(tan literal 0 HcmV?d00001 From a640afa995a7be890d1753085e568b58f9e449d2 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 27 Sep 2012 11:09:41 -0400 Subject: [PATCH 11/43] adding some directories to gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 8623fa076..927caf98d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,8 @@ queueScatterGather /bar* integrationtests/ public/testdata/onTheFlyOutputTest.vcf +build/ +dist/ +dump/ +lib/ +out/ From 0afde9906a1c043d3644e47e45603b9bf9e6a382 Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Wed, 3 Oct 2012 16:25:34 -0400 Subject: [PATCH 15/43] initial cancer pipeline with mutations and partial indel support --- .../queue/extensions/cancer/MuTect.scala | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala new file mode 100644 index 000000000..623d397d4 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -0,0 +1,378 @@ +package org.broadinstitute.sting.queue.extensions.cancer + +import java.io.File +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.commandline.Gather +import org.broadinstitute.sting.commandline.Input +import org.broadinstitute.sting.commandline.Output +import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction +import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} + +class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { + analysisName = "MuTect" + analysis_type = "MuTect" + scatterClass = classOf[LocusScatterFunction] + + /** used for debugging, basically exit as soon as we get the reads */ + @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") + var noop: Boolean = _ + + /** add many additional columns of statistics to the output file */ + @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") + var enable_extended_output: Boolean = _ + + /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ + @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") + var artifact_detection_mode: Boolean = _ + + /** name to use for tumor in output files */ + @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") + var tumor_sample_name: String = _ + + /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ + @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") + var bam_tumor_sample_name: String = _ + + /** name to use for normal in output files */ + @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") + var normal_sample_name: String = _ + + /** force output for each site */ + @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") + var force_output: Boolean = _ + + /** force output for all alleles at each site */ + @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") + var force_alleles: Boolean = _ + + /** Initial LOD threshold for calling tumor variant */ + @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var initial_tumor_lod: Option[Float] = None + + /** Format string for initial_tumor_lod */ + @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") + var initial_tumor_lodFormat: String = "%s" + + /** LOD threshold for calling tumor variant */ + @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var tumor_lod: Option[Float] = None + + /** Format string for tumor_lod */ + @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") + var tumor_lodFormat: String = "%s" + + /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ + @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") + var fraction_contamination: Option[Float] = None + + /** Format string for fraction_contamination */ + @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") + var fraction_contaminationFormat: String = "%s" + + /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ + @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fraction: Option[Float] = None + + /** Format string for minimum_mutation_cell_fraction */ + @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fractionFormat: String = "%s" + + /** LOD threshold for calling normal non-germline */ + @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") + var normal_lod: Option[Float] = None + + /** Format string for normal_lod */ + @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") + var normal_lodFormat: String = "%s" + + /** LOD threshold for calling normal non-variant */ + @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") + var normal_artifact_lod: Option[Float] = None + + /** Format string for normal_artifact_lod */ + @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") + var normal_artifact_lodFormat: String = "%s" + + /** LOD threshold for calling strand bias */ + @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_lod: Option[Float] = None + + /** Format string for strand_artifact_lod */ + @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") + var strand_artifact_lodFormat: String = "%s" + + /** power threshold for calling strand bias */ + @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_power_threshold: Option[Float] = None + + /** Format string for strand_artifact_power_threshold */ + @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") + var strand_artifact_power_thresholdFormat: String = "%s" + + /** LOD threshold for calling normal non-variant at dbsnp sites */ + @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lod: Option[Float] = None + + /** Format string for dbsnp_normal_lod */ + @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lodFormat: String = "%s" + + /** Power threshold for normal to determine germline vs variant */ + @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_threshold: Option[Float] = None + + /** Format string for somatic_classification_normal_power_threshold */ + @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_thresholdFormat: String = "%s" + + /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ + @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fraction: Option[Float] = None + + /** Format string for minimum_normal_allele_fraction */ + @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fractionFormat: String = "%s" + + /** for computational efficiency, reject sites with allelic fraction below this threshold */ + @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") + var tumor_f_pretest: Option[Float] = None + + /** Format string for tumor_f_pretest */ + @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") + var tumor_f_pretestFormat: String = "%s" + + /** threshold for minimum base quality score */ + @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") + var min_qscore: Option[Int] = None + + /** how many gapped events (ins/del) are allowed in proximity to this candidate */ + @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") + var gap_events_threshold: Option[Int] = None + + /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ + @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fraction: Option[Float] = None + + /** Format string for heavily_clipped_read_fraction */ + @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fractionFormat: String = "%s" + + /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ + @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_threshold: Option[Float] = None + + /** Format string for clipping_bias_pvalue_threshold */ + @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_thresholdFormat: String = "%s" + + /** threshold for determining if there is relatedness between the alt and ref allele read piles */ + @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") + var fraction_mapq0_threshold: Option[Float] = None + + /** Format string for fraction_mapq0_threshold */ + @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") + var fraction_mapq0_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact median */ + @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") + var pir_median_threshold: Option[Double] = None + + /** Format string for pir_median_threshold */ + @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") + var pir_median_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact MAD */ + @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") + var pir_mad_threshold: Option[Double] = None + + /** Format string for pir_mad_threshold */ + @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") + var pir_mad_thresholdFormat: String = "%s" + + /** required minimum value for tumor alt allele maximum mapping quality score */ + @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") + var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None + + /** threshold for maximum alternate allele counts in normal */ + @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_count: Option[Int] = None + + /** threshold for maximum alternate allele quality score sum in normal */ + @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None + + /** threshold for maximum alternate allele fraction in normal */ + @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fraction: Option[Double] = None + + /** Format string for max_alt_allele_in_normal_fraction */ + @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fractionFormat: String = "%s" + + /** Phred scale quality score constant to use in power calculations */ + @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_qscore: Option[Int] = None + + /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ + @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") + var absolute_copy_number_data: File = _ + + /** Allelic fraction constant to use in power calculations */ + @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_af: Option[Double] = None + + /** Format string for power_constant_af */ + @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") + var power_constant_afFormat: String = "%s" + + /** Call-stats output */ + @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** + * Short name of out + * @return Short name of out + */ + def o = this.out + + /** + * Short name of out + * @param value Short name of out + */ + def o_=(value: File) { this.out = value } + + /** VCF file of DBSNP information */ + @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") + var dbsnp: Seq[File] = Nil + + /** Dependencies on any indexes of dbsnp */ + @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") + private var dbsnpIndexes: Seq[File] = Nil + + /** VCF file of COSMIC sites */ + @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") + var cosmic: Seq[File] = Nil + + /** Dependencies on any indexes of cosmic */ + @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") + private var cosmicIndexes: Seq[File] = Nil + + /** VCF file of sites observed in normal */ + @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") + var normal_panel: Seq[File] = Nil + + /** Dependencies on any indexes of normal_panel */ + @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") + private var normal_panelIndexes: Seq[File] = Nil + + /** write out coverage in WIGGLE format to this file */ + @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_file: File = _ + + /** + * Short name of coverage_file + * @return Short name of coverage_file + */ + def cov = this.coverage_file + + /** + * Short name of coverage_file + * @param value Short name of coverage_file + */ + def cov_=(value: File) { this.coverage_file = value } + + /** write out 20x of Q20 coverage in WIGGLE format to this file */ + @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_20_q20_file: File = _ + + /** + * Short name of coverage_20_q20_file + * @return Short name of coverage_20_q20_file + */ + def cov_q20 = this.coverage_20_q20_file + + /** + * Short name of coverage_20_q20_file + * @param value Short name of coverage_20_q20_file + */ + def cov_q20_=(value: File) { this.coverage_20_q20_file = value } + + /** write out power in WIGGLE format to this file */ + @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var power_file: File = _ + + /** + * Short name of power_file + * @return Short name of power_file + */ + def pow = this.power_file + + /** + * Short name of power_file + * @param value Short name of power_file + */ + def pow_=(value: File) { this.power_file = value } + + /** write out tumor read depth in WIGGLE format to this file */ + @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var tumor_depth_file: File = _ + + /** + * Short name of tumor_depth_file + * @return Short name of tumor_depth_file + */ + def tdf = this.tumor_depth_file + + /** + * Short name of tumor_depth_file + * @param value Short name of tumor_depth_file + */ + def tdf_=(value: File) { this.tumor_depth_file = value } + + /** write out normal read depth in WIGGLE format to this file */ + @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var normal_depth_file: File = _ + + /** + * Short name of normal_depth_file + * @return Short name of normal_depth_file + */ + def ndf = this.normal_depth_file + + /** + * Short name of normal_depth_file + * @param value Short name of normal_depth_file + */ + def ndf_=(value: File) { this.normal_depth_file = value } + + /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ + @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") + var filter_mismatching_base_and_quals: Boolean = _ + + /** + * Short name of filter_mismatching_base_and_quals + * @return Short name of filter_mismatching_base_and_quals + */ + def filterMBQ = this.filter_mismatching_base_and_quals + + /** + * Short name of filter_mismatching_base_and_quals + * @param value Short name of filter_mismatching_base_and_quals + */ + def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } + + override def freezeFieldValues() { + super.freezeFieldValues() + dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + } + + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") +} From f9095c7ab74d59b35b85750886c99711b44f143c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 17:01:17 -0400 Subject: [PATCH 17/43] Generic input file name recognition (still need to implement support to FastQ, but it now can at least accept it) --- .../qscripts/DataProcessingPipeline.scala | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 56f6460fb..c21db30ce 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,6 +96,7 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS + val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -165,12 +166,15 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - // first revert the BAM file to the original qualities - val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") - val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") + val extension = bam.toString.substring(bam.toString.length - 4) + + + + val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") + val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -444,7 +448,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -452,7 +456,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From 08b6d1559c2d072541dec3f960c8978e0b952fba Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 12:02:34 -0400 Subject: [PATCH 18/43] Reverting the DPP to the original version, going to create a new simplified version for CMI in private. --- .../qscripts/DataProcessingPipeline.scala | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index c21db30ce..56f6460fb 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,7 +96,6 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS - val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -166,15 +165,12 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - val extension = bam.toString.substring(bam.toString.length - 4) - - - - val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") - val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") + // first revert the BAM file to the original qualities + val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") + val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -448,7 +444,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -456,7 +452,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From 0c177092231c623dca8c0e84fb47a4af94092817 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:13:50 -0400 Subject: [PATCH 19/43] First implementation of a generic 'bundled' Data Processing Pipeline for germline and cancer. not ready for prime time yet! --- .../src/org/broadinstitute/sting/queue/util/QScriptUtils.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 1529d9951..f684e533f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -57,7 +57,8 @@ object QScriptUtils { for (file <- fromFile(in).getLines()) if (!file.startsWith("#") && !file.isEmpty ) list :+= new File(file.trim()) - list.sortWith(_.compareTo(_) < 0) +// list.sortWith(_.compareTo(_) < 0) + list } /** From 2311606de4addf07c65540735c8b09b1385f30db Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Wed, 3 Oct 2012 16:25:34 -0400 Subject: [PATCH 26/43] initial cancer pipeline with mutations and partial indel support --- .../queue/extensions/cancer/MuTect.scala | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala new file mode 100644 index 000000000..623d397d4 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -0,0 +1,378 @@ +package org.broadinstitute.sting.queue.extensions.cancer + +import java.io.File +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.commandline.Gather +import org.broadinstitute.sting.commandline.Input +import org.broadinstitute.sting.commandline.Output +import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction +import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} + +class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { + analysisName = "MuTect" + analysis_type = "MuTect" + scatterClass = classOf[LocusScatterFunction] + + /** used for debugging, basically exit as soon as we get the reads */ + @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") + var noop: Boolean = _ + + /** add many additional columns of statistics to the output file */ + @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") + var enable_extended_output: Boolean = _ + + /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ + @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") + var artifact_detection_mode: Boolean = _ + + /** name to use for tumor in output files */ + @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") + var tumor_sample_name: String = _ + + /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ + @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") + var bam_tumor_sample_name: String = _ + + /** name to use for normal in output files */ + @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") + var normal_sample_name: String = _ + + /** force output for each site */ + @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") + var force_output: Boolean = _ + + /** force output for all alleles at each site */ + @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") + var force_alleles: Boolean = _ + + /** Initial LOD threshold for calling tumor variant */ + @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var initial_tumor_lod: Option[Float] = None + + /** Format string for initial_tumor_lod */ + @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") + var initial_tumor_lodFormat: String = "%s" + + /** LOD threshold for calling tumor variant */ + @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var tumor_lod: Option[Float] = None + + /** Format string for tumor_lod */ + @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") + var tumor_lodFormat: String = "%s" + + /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ + @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") + var fraction_contamination: Option[Float] = None + + /** Format string for fraction_contamination */ + @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") + var fraction_contaminationFormat: String = "%s" + + /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ + @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fraction: Option[Float] = None + + /** Format string for minimum_mutation_cell_fraction */ + @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fractionFormat: String = "%s" + + /** LOD threshold for calling normal non-germline */ + @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") + var normal_lod: Option[Float] = None + + /** Format string for normal_lod */ + @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") + var normal_lodFormat: String = "%s" + + /** LOD threshold for calling normal non-variant */ + @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") + var normal_artifact_lod: Option[Float] = None + + /** Format string for normal_artifact_lod */ + @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") + var normal_artifact_lodFormat: String = "%s" + + /** LOD threshold for calling strand bias */ + @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_lod: Option[Float] = None + + /** Format string for strand_artifact_lod */ + @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") + var strand_artifact_lodFormat: String = "%s" + + /** power threshold for calling strand bias */ + @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_power_threshold: Option[Float] = None + + /** Format string for strand_artifact_power_threshold */ + @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") + var strand_artifact_power_thresholdFormat: String = "%s" + + /** LOD threshold for calling normal non-variant at dbsnp sites */ + @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lod: Option[Float] = None + + /** Format string for dbsnp_normal_lod */ + @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lodFormat: String = "%s" + + /** Power threshold for normal to determine germline vs variant */ + @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_threshold: Option[Float] = None + + /** Format string for somatic_classification_normal_power_threshold */ + @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_thresholdFormat: String = "%s" + + /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ + @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fraction: Option[Float] = None + + /** Format string for minimum_normal_allele_fraction */ + @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fractionFormat: String = "%s" + + /** for computational efficiency, reject sites with allelic fraction below this threshold */ + @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") + var tumor_f_pretest: Option[Float] = None + + /** Format string for tumor_f_pretest */ + @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") + var tumor_f_pretestFormat: String = "%s" + + /** threshold for minimum base quality score */ + @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") + var min_qscore: Option[Int] = None + + /** how many gapped events (ins/del) are allowed in proximity to this candidate */ + @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") + var gap_events_threshold: Option[Int] = None + + /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ + @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fraction: Option[Float] = None + + /** Format string for heavily_clipped_read_fraction */ + @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fractionFormat: String = "%s" + + /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ + @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_threshold: Option[Float] = None + + /** Format string for clipping_bias_pvalue_threshold */ + @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_thresholdFormat: String = "%s" + + /** threshold for determining if there is relatedness between the alt and ref allele read piles */ + @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") + var fraction_mapq0_threshold: Option[Float] = None + + /** Format string for fraction_mapq0_threshold */ + @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") + var fraction_mapq0_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact median */ + @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") + var pir_median_threshold: Option[Double] = None + + /** Format string for pir_median_threshold */ + @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") + var pir_median_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact MAD */ + @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") + var pir_mad_threshold: Option[Double] = None + + /** Format string for pir_mad_threshold */ + @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") + var pir_mad_thresholdFormat: String = "%s" + + /** required minimum value for tumor alt allele maximum mapping quality score */ + @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") + var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None + + /** threshold for maximum alternate allele counts in normal */ + @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_count: Option[Int] = None + + /** threshold for maximum alternate allele quality score sum in normal */ + @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None + + /** threshold for maximum alternate allele fraction in normal */ + @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fraction: Option[Double] = None + + /** Format string for max_alt_allele_in_normal_fraction */ + @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fractionFormat: String = "%s" + + /** Phred scale quality score constant to use in power calculations */ + @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_qscore: Option[Int] = None + + /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ + @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") + var absolute_copy_number_data: File = _ + + /** Allelic fraction constant to use in power calculations */ + @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_af: Option[Double] = None + + /** Format string for power_constant_af */ + @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") + var power_constant_afFormat: String = "%s" + + /** Call-stats output */ + @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** + * Short name of out + * @return Short name of out + */ + def o = this.out + + /** + * Short name of out + * @param value Short name of out + */ + def o_=(value: File) { this.out = value } + + /** VCF file of DBSNP information */ + @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") + var dbsnp: Seq[File] = Nil + + /** Dependencies on any indexes of dbsnp */ + @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") + private var dbsnpIndexes: Seq[File] = Nil + + /** VCF file of COSMIC sites */ + @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") + var cosmic: Seq[File] = Nil + + /** Dependencies on any indexes of cosmic */ + @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") + private var cosmicIndexes: Seq[File] = Nil + + /** VCF file of sites observed in normal */ + @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") + var normal_panel: Seq[File] = Nil + + /** Dependencies on any indexes of normal_panel */ + @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") + private var normal_panelIndexes: Seq[File] = Nil + + /** write out coverage in WIGGLE format to this file */ + @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_file: File = _ + + /** + * Short name of coverage_file + * @return Short name of coverage_file + */ + def cov = this.coverage_file + + /** + * Short name of coverage_file + * @param value Short name of coverage_file + */ + def cov_=(value: File) { this.coverage_file = value } + + /** write out 20x of Q20 coverage in WIGGLE format to this file */ + @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_20_q20_file: File = _ + + /** + * Short name of coverage_20_q20_file + * @return Short name of coverage_20_q20_file + */ + def cov_q20 = this.coverage_20_q20_file + + /** + * Short name of coverage_20_q20_file + * @param value Short name of coverage_20_q20_file + */ + def cov_q20_=(value: File) { this.coverage_20_q20_file = value } + + /** write out power in WIGGLE format to this file */ + @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var power_file: File = _ + + /** + * Short name of power_file + * @return Short name of power_file + */ + def pow = this.power_file + + /** + * Short name of power_file + * @param value Short name of power_file + */ + def pow_=(value: File) { this.power_file = value } + + /** write out tumor read depth in WIGGLE format to this file */ + @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var tumor_depth_file: File = _ + + /** + * Short name of tumor_depth_file + * @return Short name of tumor_depth_file + */ + def tdf = this.tumor_depth_file + + /** + * Short name of tumor_depth_file + * @param value Short name of tumor_depth_file + */ + def tdf_=(value: File) { this.tumor_depth_file = value } + + /** write out normal read depth in WIGGLE format to this file */ + @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var normal_depth_file: File = _ + + /** + * Short name of normal_depth_file + * @return Short name of normal_depth_file + */ + def ndf = this.normal_depth_file + + /** + * Short name of normal_depth_file + * @param value Short name of normal_depth_file + */ + def ndf_=(value: File) { this.normal_depth_file = value } + + /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ + @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") + var filter_mismatching_base_and_quals: Boolean = _ + + /** + * Short name of filter_mismatching_base_and_quals + * @return Short name of filter_mismatching_base_and_quals + */ + def filterMBQ = this.filter_mismatching_base_and_quals + + /** + * Short name of filter_mismatching_base_and_quals + * @param value Short name of filter_mismatching_base_and_quals + */ + def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } + + override def freezeFieldValues() { + super.freezeFieldValues() + dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + } + + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") +} From 88297606f019da0e4b9725d5cad58abd1924a2d7 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 10 Oct 2012 13:20:30 -0400 Subject: [PATCH 28/43] Adding intellij example configuration files --- intellij_example.tar.bz2 | Bin 0 -> 7520 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 intellij_example.tar.bz2 diff --git a/intellij_example.tar.bz2 b/intellij_example.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..bce16045cd1cc476305c5e59d07ff9b94b8e5d73 GIT binary patch literal 7520 zcmV-m9iQStT4*^jL0KkKS)7!cJOD4F|M~yi5CDHu|NsC0|M36+|L{No06+`?06+jh z1{h!`o8{-7h4qB5wR>^y>D#MWyS_S4UPrYW)`~XXKBY)=w?n(T?w=>u4%}F@T^prq zM&}*98+P}1CC>JGY3}jFJo}ytO>BKWz24nKq=%-|-tIdTv1Z%Ou|py0B#;mw z0$?VFLqIe|HdE0LQ`8zCr9V{lAE^M-CYlc@B!L1ZL{xsM>S#SrDYXw!Hq`?_27mwn z0006)At_HHdU~eyLFyU;NYu?W zO%G7?YBUW!LqkS@9-*KBcwhGq&L2>c7{2Bv%KPh~XqovPD`P$wqr0wA{OX>F7ZKmy zDsL?$v;dI_RDWe46~zQU)lvw>NPvKQ{r?Z1(}ww2_)M*fQwCl2HJP1e>Ri?dCZpMN zP=M?SA|wk1NPtvh=J3} zDw+~D0aFk#a8yBk1VvNe{^Q2@ATLqre5hlqO+ zJH6U_g@n*il^HZ-l$Xlw^mBWD_ji=uWfv%71y1==(-ozyjYp|LKHs?f9BLgry=7~; zQzc6vctrO5Ohh{&v$XFjif1`0L{s*Olu^?q8x}#rqzyaoVB#B4g(81Jx};qlYMjj? z2dYy`yjW&t6?wYx0N9Uh!RRNLa}rLdMi`DPNP-P&$TYbGk_IqjZvxK0qQ|cbe@$>KA+XQo15<95gH&cGF>(8JUi?q#;jjQhX7>?$D_)jV*fSGa99hISqAgCirHOBRlz z5-9I|sR$Ai`Yfa3X$-2y?KsEHOGX z@HXt%cKK^%iU{@ItVyi`Mvy~M#VuYFejK)2%tD1XPotWR1C6g7^>_3=M{GBTR=x6} zL$ie9VdKIYD#p_7X2Ij5c!ACsYkPJWj&|WWp}5G;)cO~hDhd@kB;Qp{=JQuL+IA<+ z9G6K-0B_4%_wIgYeR?@~V~!zKJsytphBh&$h7$B()y02uDWc?iHKJ3Oou2*+MfMx^=gIf{X3|lWppAz z-I7p3R)QX!Idb;|10w?J`_#t4+9oPM5Ep6hKa7!-%b*Z%Ne=e{WH2)WCKzJBSF^U& zK`^`eM3i5TFVExdQsTLk;G@qq9{>}@N65UeiIk4 z@~k{rAI=Uv%!sgaYY$=o+l0!JKSDr9R$?^eCXzi64~|&0Ce%+awpg_DqW1T;JnT{d zru{VON@j0;c#A;BkSsr#?|iam4$XGUx$RTb)H_pMY<00{Yt{q!{n-}(G#x&{RZq!9 z?#T+_N>%`kP|_a@fmo2-OJrXX!d!y~G&Dn3dYM^vWSZJ!#jw;(nuR5VhC?^k`y8?h4xtX)+j?PU}@iN>ljD+(Qmb z8dniBZ(_g=D1mLVH)}(M5H0vcNC?Cv1(;-)C)j(xXY2nS+gLe%zZ*W^p1!|_zv=O} z_Q!+DzkQ#@*8b@JY?61J{_8LQ7Wa7yo_=DdhrF->(?ZCJE#uL2ybpcSMKqaVBwSv) zNU6k^_VtahV8JC~!U6{d!bq&!4p|_$D-~)<6%s3$x=9Tg-hN~ZTTL>hPndt*gp7cg z+wR+roDg$1Hu?=(-&o%#Ayf15RN!Z%05j!NTZ&r35#?&$A)o_Kq_+aG2Tmv-^Ti}kzPUMz}gW$fs9 zJC4FgjTw|ACp)ypVq7XolvK9GOTU706q(I(#Eu(UtE5jUBYT&==_1{Y?P%VlGrzMb zBE|_o9do|DW~w@PU%$xNzAis=G|wxF)AvA^EC+4@aDj~7z-#+Yu_mAdbYOG_n;S_f z2MF%vrdGcJHml~P2xvLQOIQJlnTe)5Z8JtNk{R2)nVMB@y3m5+0?^4LZPW(|mtd*? zDv;N1W_Rb+ea4+?lrAx`155T=*|7;B>>wiy;Ml)7#VrgvNDTg;;hzJ_HNI$RTTqf4 zfyMeU1R+7uY;dsXgq*(jmUH&D>*C?4NtOzLb-!$s%bXb_Sue0mN5$wkpEM3kv8IvGA=_?Zzv1%_C1tsyl^1@RKH{a zciZw1@6q0f3Zxbb;ED{RY}JFbgv`jo;vnjTP++40u_0!9X$&Jll(K*-u?XPE#-34@ z(2*iq168t)2LIl%)odnH8EI_Yl*{iaXWjeIC?DX z@F63H#+Z$15ur99k-Q~?3g|8Zc=x8AR4D_lS&{5(Wb(9OWn~6P1uWA@lOmW&8qFIU z7SBN;v4-l*AW>A(2|!UvMJG5MwJuL6M+i`eC_Z*#ky3?khBR#txTdfRi~`O|X#<>q zay;fL4M@CAa10!#6DOq2oZBZ*`QRkS_ry;K+IvB5vHgUgDqb| z3@cbx6TJE0H(;83Aleb%_>debXaSWl^D1it&Z*F@y=N{NvpYj~%SKvwWbjz}(2QH5 zzP2fmjwex!Q0DDKn@@wk;d+8XInO!CB-!he7tkDYlzvB5io3y3~eaI5w=k}~_n&-pi{RD^=|p|INIK%M7^Ju>U^q=8=L%d;bA7r& zav%4cJ$b-?7%3m*P6<7LL;|e%;!{W=n0z(DI%^o;Bei5F?M_70?60j?x99A# z3qM)=cXkVG2kmpPJ8oP#mdXvhiT3f-MRyJ&15Gf(d7`7!iiTN|-|lq+clXLmg&CS~L$9jF&hP0Bm8xWJJK=$OMFB>@>4P6z#Xn3dcffp+LAC z!`V2YWzCFixsCZ_7L;F_QL9T?&p+$c)*hPKS8gSO~VTFj|ro3G{Iuof@swuwn#16JUnNC5#N!PbpVLE zvff%F#RxrAU($v^EeV!2DVBm~xc6xi-$OudTP+k6K2v02fE5E45$L!=gi;#?e7UiT zMhuXoWDk1Yu>gn#4)=DU3)lONdeFJ7$d8PZ2g9kss?ltvQp=qrImnG5r+V3zF4fd( z=B;z~@$<2{hrSQ|A=lv6Q712!p!qTbzdD`k(bs>{2Pp3?yqCWFJqT zzsdD8a3Kh21F_vQhkQ6nV4J6$yI@VnB-^q@1Poy1V4pzHT>!|B!-$_&1xU~WX+}BS| zI+H9JIpulj0|c`K4h+Jv+i8{#sDpzsY3kK&nf6^uxxRKVpRY}nq>h`5p#2eSW1JCB zoQDl9g0bddzNVPmI`lIIC+n3Z-%)6 z)m1D@3`N7B@Sgj)AR36~a4}*AFBJoK>y5WL$p%MGX_9PAb!p#rxvRNmYjZFf_p^MM zS*F2;S$4gmw~WV5MV$7|80zk2IN^|p!(1CUj&QiGuRFLn?P!*xF?*d9(+V$}IXT1} zTha_h(muFR1txBW5H`^S1RVSVru%s#2xv)Akeo8AEDyF+y-8{s&ujwd5cgA z{vZA1IN}ss`1mWy37bexU#GAM?R5hGixGW*YN1F6 zNe^y$@Ot{b0s=j%3iJAec(;rNi2(`#p%tYeftrrHgl%C`y`dwy83`F7-YoY_ox@e$ zL&B&_2$DC2^*`wAL)aSaJF>S~WkZ7Y-Du;z{ zz=U=xW(E3CCn_nOid4kc4!;tEos&Z9t35>kz>s9yLG>gOM({gKT^#n%!cAcT@WPry3G z2V$fF(Bh#$fe>JI^|(v(OqCu~Gq|s3b?7=j5C#Z=5wJo%cpU|9AqFF9LZ;v@MFUXd zs2N){bdrWWIl=TxEeH;2AWyYeEYN~qcFD}6*o@`j5Unt1JJgj_YN^CRhB!bw5Oy*P z1DcHm8GutysIu}&!*1M~s!zPA&Oi}v=#?bEF!B<|)OQu(7P)!*MrromGcLP%0kRC2-f z-Ma&_=fdV8BrOEsXnl*Tw^mqWLHgJM!Kwz2*{PSy3 zu1y0}WCf==J8l%kErz3IvwaFAt0TVM@-$^1MCp&5TKidOUd1p zYf^#|AzIhOeK`tn48Ym)u!~-}_Q+l&S8a}MP zqt<;`1h6{%dl7fl4{{^4=5Z9~GklcT&KW~EA3Cu3&;?mZn=%p-SGX&lf>n(5j&iOY zYQ5cz$?+OcwiV^|gbZ zDl{t&8X}C%;tMPtc~k`|q$FrGfm%YMX(X@VI=uFI0&+ldLE%k6`SmVPkC_HWF>Jjk zUDTu0qvTmpsIKA3Q+lPhpdN}HV1^2u*J2-*FWMiW;5$l`j~ao*gwCP$RM5i#_C_Ex zH)zO<;CO6Q=Wwgery}Pl$N)(r*fN-s03O~z4|h@!W6)yYC(`*Uf$0mj(1PL>cZ3#Z z4TbDihB0%-2YpbP;Si{_nnei&VL3_(rX}Qwue;<9@ZzbMbgFgWaWzMX7*HysutU{s z6$yK$fP%s@0>K!oAjF959~anz@iKxw9tVi2%a|O!4?k>B;`D(%h5u(vcUQi+mcKd#qyCaVfaiWCOtfxqGP5gZ9C@~hnd`7h<>b=e_+%r-y-BvC8wl_K{AeW^0CX@V5&D!>>QsSU^i-+=i>^k-=!j-OGaIbfVpkOo zov0DlGYvzkWdki>W|%{31zZ$_CjdJbXwoesiiTpR&LP}<$G(Ags#wwbgBY)fP=_5T zQ4Xh%0B4=sfb<{^vtZvf!~pRs?Q2_tCyTUQVcvR>^r#ARkO%Guv=4>qj_B@o4jqbM zt7aXvQ8X%2q25s!VhE5U6mV4RggF8x`vC+b>2V@PWF#OH>&BrAM934Jfz;}KW^E!s zf_sHg^L}8s>!NOo>yjx%-@oa32x!;2(Ek5!w#C`ezx9s#*X(t`x1;-X6+ z7Tr_H3_VZlT{4sbK3ug=pGr!XjFT71U-2y+_{;q+T4bJi(b= zap}Jvh}lP|*qy&ou8QcP>pMaQngHZp=&Vk$3eem_f*S=$R8;1V4~ZCnf}#?r8iXVQ z0j{X_oxDt(h>0TcAk-a!;E`5M2WU_pz^PFx0iuHO=~{qY8>t!!C^m$obS{f0N*n5P z90a#3kBG3kQg+Y~XP`YbFs-J>`*$M_gdyA-C_|>(hkfw|+l<+w zf}KDpwJ=nwn0Cr`@CL~cY#kjD!*FzDfO#Xz58x^D4{2FCkP+ji120 z`AK?g`I(q&&XIvsdP8c|-6RwyD9PaUmZHX{h)-f%GL}>`AdodPFC>U>q}W4M$7Yrv zXn_GT_PIBf+CkyE8OEjAE9XU38OMGBuvkEl2#MCGQP6b&cqsd};rC<;+96%VZpibB za5i6A1WKt|DOYLN0p46>D$r5;!~>XE;UL^8L$+U03KV+NtWE_Trs&B5hj{M;05y_z( zIB@{aYt}fs#2_Ui8xH+<1|!JJl0XMS2m}_23jO|IA2?LhL%8il_4O)!cmxPzpqfGY zc__a@$5HV6afV(GYNQppH5ZI_4QL$Cz*M(x41|T~>WW%5dc!&O=BaR(D8S87Uc(Wb z4GAcqP$MD;MhMW2k||&T_-I1Sc7quRcUP6)AC5{qas)meG;bs!BEU|AAWLmX=fy+6 z$p=H}qdcCy^{S?BA;kyA^aH=II~+xSl}Oz`ntcF$U;6-Xk#{x0N-aG@bNDK>cMb(tan literal 0 HcmV?d00001 From f085f5d46a79645606fbfc296cdc5aae73c67ae4 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 16:15:57 -0400 Subject: [PATCH 29/43] Adding default intellij configuration files --- .idea/.name | 1 + .idea/ant.xml | 15 + .idea/codeStyleSettings.xml | 13 + .idea/compiler.xml | 21 + .idea/copyright/profiles_settings.xml | 5 + .idea/encodings.xml | 5 + .idea/highlighting.xml | 8 + .idea/inspectionProfiles/Project_Default.xml | 11 + .../inspectionProfiles/profiles_settings.xml | 7 + .idea/libraries/GATK_libraries.xml | 13 + .idea/misc.xml | 32 ++ .idea/modules.xml | 9 + .idea/scopes/scope_settings.xml | 5 + .idea/uiDesigner.xml | 125 ++++++ .idea/vcs.xml | 10 + .idea/workspace.xml | 386 ++++++++++++++++++ cmi-gatk.iml | 23 ++ 17 files changed, 689 insertions(+) create mode 100644 .idea/.name create mode 100644 .idea/ant.xml create mode 100644 .idea/codeStyleSettings.xml create mode 100644 .idea/compiler.xml create mode 100644 .idea/copyright/profiles_settings.xml create mode 100644 .idea/encodings.xml create mode 100644 .idea/highlighting.xml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/libraries/GATK_libraries.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/scopes/scope_settings.xml create mode 100644 .idea/uiDesigner.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml create mode 100644 cmi-gatk.iml diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 000000000..7014f65a5 --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +cmi-gatk \ No newline at end of file diff --git a/.idea/ant.xml b/.idea/ant.xml new file mode 100644 index 000000000..4674eeac9 --- /dev/null +++ b/.idea/ant.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml new file mode 100644 index 000000000..9178b389f --- /dev/null +++ b/.idea/codeStyleSettings.xml @@ -0,0 +1,13 @@ + + + + + + + diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 000000000..ded2e9a1d --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,21 @@ + + + + + + diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml new file mode 100644 index 000000000..3572571ad --- /dev/null +++ b/.idea/copyright/profiles_settings.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 000000000..e206d70d8 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/.idea/highlighting.xml b/.idea/highlighting.xml new file mode 100644 index 000000000..f33b64d94 --- /dev/null +++ b/.idea/highlighting.xml @@ -0,0 +1,8 @@ + + + + + + diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 000000000..b8c243dbe --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,11 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 000000000..3b312839b --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml new file mode 100644 index 000000000..970d0a3dc --- /dev/null +++ b/.idea/libraries/GATK_libraries.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 000000000..afd7f3778 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,32 @@ + + + + + + + + + + http://www.w3.org/1999/xhtml + + + + + + + diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 000000000..09caa2933 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml new file mode 100644 index 000000000..922003b84 --- /dev/null +++ b/.idea/scopes/scope_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml new file mode 100644 index 000000000..3b0002030 --- /dev/null +++ b/.idea/uiDesigner.xml @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 000000000..cbc984988 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,10 @@ + + + + + + + + + diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 000000000..87ab79287 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,386 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + localhost + 5050 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cmi-gatk + + + + + + + + GATK libraries + + + + + + + + + diff --git a/cmi-gatk.iml b/cmi-gatk.iml new file mode 100644 index 000000000..e63aff535 --- /dev/null +++ b/cmi-gatk.iml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + From e29bcab42e9ba75276d20b9402d5d881271ce04d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 10:51:53 -0400 Subject: [PATCH 30/43] Updating Intellij enviroment and adding Scala --- .idea/libraries/GATK_libraries.xml | 1 - .idea/misc.xml | 2 +- .idea/workspace.xml | 221 ++++++++++++++++++++++++----- cmi-gatk.iml | 10 +- 4 files changed, 192 insertions(+), 42 deletions(-) diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml index 970d0a3dc..b363bbe6c 100644 --- a/.idea/libraries/GATK_libraries.xml +++ b/.idea/libraries/GATK_libraries.xml @@ -6,7 +6,6 @@ - diff --git a/.idea/misc.xml b/.idea/misc.xml index afd7f3778..a79280c52 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -24,7 +24,7 @@ http://www.w3.org/1999/xhtml - + diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 87ab79287..f6d4567fd 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -1,7 +1,12 @@ - + + + + + + + + + + + + + + - + @@ -112,33 +140,7 @@ - - - - - - - - - - - - - - - - - - - - - + @@ -147,7 +149,7 @@ - + @@ -228,8 +230,9 @@ - + + @@ -286,7 +289,7 @@ + + - + + + + + + + + + + + + + + + + + - + @@ -333,6 +464,18 @@ + + + Detection + + + + + @@ -346,6 +489,7 @@ + 1.6 diff --git a/cmi-gatk.iml b/cmi-gatk.iml index e63aff535..4dbee1336 100644 --- a/cmi-gatk.iml +++ b/cmi-gatk.iml @@ -1,5 +1,13 @@ + + + + + + @@ -17,7 +25,7 @@ - + From fdf29503fb9bb6906d0e0b7ad41b6045aab2f38f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:18:44 -0400 Subject: [PATCH 31/43] removing annoying xml from IDEA configuration --- .idea/workspace.xml | 529 -------------------------------------------- 1 file changed, 529 deletions(-) delete mode 100644 .idea/workspace.xml diff --git a/.idea/workspace.xml b/.idea/workspace.xml deleted file mode 100644 index f6d4567fd..000000000 --- a/.idea/workspace.xml +++ /dev/null @@ -1,529 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - localhost - 5050 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Detection - - - - - - - - - - - - - - - 1.6 - - - - - - - - cmi-gatk - - - - - - - - GATK libraries - - - - - - - - - From 29195cd3aab9a47118f71516ce55949b979d9967 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 27 Sep 2012 11:04:56 -0400 Subject: [PATCH 32/43] Removed the intellij files from the root and made an example package for new users. This allows users to start at the same page and then change it as they see fit without interfering with the repo (thanks guillermo!) --- .idea/.name | 1 - .idea/ant.xml | 15 --- .idea/codeStyleSettings.xml | 13 -- .idea/compiler.xml | 21 --- .idea/copyright/profiles_settings.xml | 5 - .idea/encodings.xml | 5 - .idea/highlighting.xml | 8 -- .idea/inspectionProfiles/Project_Default.xml | 11 -- .../inspectionProfiles/profiles_settings.xml | 7 - .idea/libraries/GATK_libraries.xml | 12 -- .idea/misc.xml | 32 ----- .idea/modules.xml | 9 -- .idea/scopes/scope_settings.xml | 5 - .idea/uiDesigner.xml | 125 ------------------ .idea/vcs.xml | 10 -- cmi-gatk.iml | 31 ----- 16 files changed, 310 deletions(-) delete mode 100644 .idea/.name delete mode 100644 .idea/ant.xml delete mode 100644 .idea/codeStyleSettings.xml delete mode 100644 .idea/compiler.xml delete mode 100644 .idea/copyright/profiles_settings.xml delete mode 100644 .idea/encodings.xml delete mode 100644 .idea/highlighting.xml delete mode 100644 .idea/inspectionProfiles/Project_Default.xml delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml delete mode 100644 .idea/libraries/GATK_libraries.xml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/scopes/scope_settings.xml delete mode 100644 .idea/uiDesigner.xml delete mode 100644 .idea/vcs.xml delete mode 100644 cmi-gatk.iml diff --git a/.idea/.name b/.idea/.name deleted file mode 100644 index 7014f65a5..000000000 --- a/.idea/.name +++ /dev/null @@ -1 +0,0 @@ -cmi-gatk \ No newline at end of file diff --git a/.idea/ant.xml b/.idea/ant.xml deleted file mode 100644 index 4674eeac9..000000000 --- a/.idea/ant.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml deleted file mode 100644 index 9178b389f..000000000 --- a/.idea/codeStyleSettings.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - diff --git a/.idea/compiler.xml b/.idea/compiler.xml deleted file mode 100644 index ded2e9a1d..000000000 --- a/.idea/compiler.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml deleted file mode 100644 index 3572571ad..000000000 --- a/.idea/copyright/profiles_settings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml deleted file mode 100644 index e206d70d8..000000000 --- a/.idea/encodings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/.idea/highlighting.xml b/.idea/highlighting.xml deleted file mode 100644 index f33b64d94..000000000 --- a/.idea/highlighting.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index b8c243dbe..000000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 3b312839b..000000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml deleted file mode 100644 index b363bbe6c..000000000 --- a/.idea/libraries/GATK_libraries.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index a79280c52..000000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - - - - http://www.w3.org/1999/xhtml - - - - - - - diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 09caa2933..000000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml deleted file mode 100644 index 922003b84..000000000 --- a/.idea/scopes/scope_settings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml deleted file mode 100644 index 3b0002030..000000000 --- a/.idea/uiDesigner.xml +++ /dev/null @@ -1,125 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index cbc984988..000000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - diff --git a/cmi-gatk.iml b/cmi-gatk.iml deleted file mode 100644 index 4dbee1336..000000000 --- a/cmi-gatk.iml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From e9eaa33c0b3699472da7287a2c6e23cc6b1ac08f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 27 Sep 2012 11:09:41 -0400 Subject: [PATCH 33/43] adding some directories to gitignore --- .gitignore | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 456794cea..927caf98d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,10 +18,8 @@ queueScatterGather /bar* integrationtests/ public/testdata/onTheFlyOutputTest.vcf -private/testdata/onTheFlyOutputTest.vcf -lib -html -gatkdocs -dist -build -resources +build/ +dist/ +dump/ +lib/ +out/ From af5a6fdaced7814b7af3d6e858897c6b0eadb8ed Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 11 Oct 2012 11:09:49 -0400 Subject: [PATCH 34/43] Resolve [DEV-7]: add single-sample VCF calling at end of FASTQ-BAM pipeline. Initial steps of [DEV-4]: queue extensions for Picard QC metrics --- .../picard/CalculateHsMetrics.scala | 60 +++++++++++++++++++ .../picard/CollectGcBiasMetrics.scala | 32 ++++++++++ .../picard/CollectMultipleMetrics.scala | 36 +++++++++++ 3 files changed, 128 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala new file mode 100644 index 000000000..75e9300dc --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/9/12 + * Time: 5:59 PM + * To change this template use File | Settings | File Templates. + */ +class CalculateHsMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "CalculateHsMetrics" + javaMainClass = "net.sf.picard.sam.CalculateHsMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Interval list with targets", shortName = "targets", fullName = "target_list", required = true) + var targets: File = _ + + @Argument(doc="Interval list with baits", shortName = "baits", fullName = "bait_list", required = true) + var baits: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + /* + @Argument(doc = "Maximum number of file handles to keep open when spilling read ends to disk. Set this number a little lower than the per-process maximum number of file that may be open. This number can be found by executing the 'ulimit -n' command on a Unix system.", shortName = "max_file_handles", fullName ="max_file_handles_for_read_ends_maps", required=false) + var MAX_FILE_HANDLES_FOR_READ_ENDS_MAP: Int = -1; + + @Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number.", shortName = "sorting_ratio", fullName = "sorting_collection_size_ratio", required = false) + var SORTING_COLLECTION_SIZE_RATIO: Double = -1 + */ + override def freezeFieldValues() { + super.freezeFieldValues() +// if (outputIndex == null && output != null) + // outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + + val level = "SAMPLE" + + override def inputBams = input + override def outputBam = output + //this.sortOrder = null + //this.createIndex = Some(true) + override def commandLine = super.commandLine + + required("BAIT_INTERVALS=" + baits) + + required("TARGET_INTERVALS=" + targets) + + required("REFERENCE_SEQUENCE=" + reference) + + optional("METRIC_ACCUMULATION_LEVEL="+level)/*+ + conditional(REMOVE_DUPLICATES, "REMOVE_DUPLICATES=true") + + conditional(MAX_FILE_HANDLES_FOR_READ_ENDS_MAP > 0, "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=" + MAX_FILE_HANDLES_FOR_READ_ENDS_MAP.toString) + + conditional(SORTING_COLLECTION_SIZE_RATIO > 0, "SORTING_COLLECTION_SIZE_RATIO=" + SORTING_COLLECTION_SIZE_RATIO.toString) */ + + +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala new file mode 100644 index 000000000..de2b0af9e --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala @@ -0,0 +1,32 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/10/12 + * Time: 10:37 AM + * To change this template use File | Settings | File Templates. + */ +class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "CalculateGcMetrics" + javaMainClass = "net.sf.picard.sam.CalculateGcMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + + override def inputBams = input + override def outputBam = output + override def commandLine = super.commandLine + + required("CHART_OUTPUT=" + output+".pdf") + + required("REFERENCE_SEQUENCE=" + reference) + + required("ASSUME_SORTED=true") +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala new file mode 100644 index 000000000..a9af4e858 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/10/12 + * Time: 10:37 AM + * To change this template use File | Settings | File Templates. + */ +class CollectMultipleMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction{ + analysisName = "CalculateMultipleMetrics" + javaMainClass = "net.sf.picard.sam.CalculateMultipleMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + + override def inputBams = input + override def outputBam = output + override def commandLine = super.commandLine + + required("REFERENCE_SEQUENCE=" + reference) + + required("ASSUME_SORTED=true") + + required("PROGRAM=QualityScoreDistribution") + + required("PROGRAM=MeanQualityByCycle") + + required("PROGRAM=CollectAlignmentSummaryMetrics" ) + + +} From c1706ef0ef42bd6a7986009e7664c453313e8cbc Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Fri, 12 Oct 2012 14:18:12 -0400 Subject: [PATCH 36/43] upgraded mutation caller with VCF output raw indel calls (non filtered,non vcf) --- .../queue/extensions/cancer/MuTect.scala | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala index 623d397d4..1193e7dec 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -6,7 +6,7 @@ import org.broadinstitute.sting.commandline.Gather import org.broadinstitute.sting.commandline.Input import org.broadinstitute.sting.commandline.Output import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction -import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} +import org.broadinstitute.sting.queue.extensions.gatk.{TaggedFile, VcfGatherFunction, LocusScatterFunction} class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { analysisName = "MuTect" @@ -45,6 +45,10 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") var force_alleles: Boolean = _ + /** only emit passing calls */ + @Argument(fullName="only_passing_calls", shortName="", doc="only emit passing calls", required=false, exclusiveOf="", validation="") + var only_passing_calls: Boolean = _ + /** Initial LOD threshold for calling tumor variant */ @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") var initial_tumor_lod: Option[Float] = None @@ -242,6 +246,28 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG */ def o_=(value: File) { this.out = value } + /** VCF output of mutation candidates */ + @Output(fullName="vcf", shortName="vcf", doc="VCF output of mutation candidates", required=false, exclusiveOf="", validation="") + @Gather(classOf[VcfGatherFunction]) + var vcf: File = _ + + /** Automatically generated index for vcf */ + @Output(fullName="vcfIndex", shortName="", doc="Automatically generated index for vcf", required=false, exclusiveOf="", validation="") + @Gather(enabled=false) + private var vcfIndex: File = _ + + /** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */ + @Argument(fullName="no_cmdline_in_header", shortName="no_cmdline_in_header", doc="Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required=false, exclusiveOf="", validation="") + var no_cmdline_in_header: Boolean = _ + + /** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */ + @Argument(fullName="sites_only", shortName="sites_only", doc="Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required=false, exclusiveOf="", validation="") + var sites_only: Boolean = _ + + /** force BCF output, regardless of the file's extension */ + @Argument(fullName="bcf", shortName="bcf", doc="force BCF output, regardless of the file's extension", required=false, exclusiveOf="", validation="") + var bcf: Boolean = _ + /** VCF file of DBSNP information */ @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") var dbsnp: Seq[File] = Nil @@ -369,10 +395,13 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG override def freezeFieldValues() { super.freezeFieldValues() + if (vcf != null && !org.broadinstitute.sting.utils.io.IOUtils.isSpecialFile(vcf)) + if (!org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor.isCompressed(vcf.getPath)) + vcfIndex = new File(vcf.getPath + ".idx") dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) } - override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + conditional(only_passing_calls, "--only_passing_calls", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + optional("-vcf", vcf, spaceSeparated=true, escape=true, format="%s") + conditional(no_cmdline_in_header, "-no_cmdline_in_header", escape=true, format="%s") + conditional(sites_only, "-sites_only", escape=true, format="%s") + conditional(bcf, "-bcf", escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") } From 05111eeaef41d3d3d5c2483b16728a76fc8f8a6e Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 10 Oct 2012 15:00:17 -0400 Subject: [PATCH 38/43] Making nContigs parameter hidden in ReduceReads For now, the het reduction should only be performed for diploids (n=2). We haven't really tested it for other ploidy so it should remain hidden until someone braves it out. --- .../compression/reducereads/ReduceReads.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 1b3e68647..5810bc94f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -181,15 +181,6 @@ public class ReduceReads extends ReadWalker, ReduceRea @Argument(fullName = "minimum_del_proportion_to_trigger_variant", shortName = "mindel", doc = "", required = false) private double minIndelProportionToTriggerVariant = 0.05; - /** - * Minimum proportion of indels in a site to trigger a variant region. Anything below this will be - * considered consensus. - */ - @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false) - private int nContigs = 2; - - - /** * Downsamples the coverage of a variable region approximately (guarantees the minimum to be equal to this). * A value of 0 turns downsampling off. @@ -197,6 +188,14 @@ public class ReduceReads extends ReadWalker, ReduceRea @Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false) private int downsampleCoverage = 250; + /** + * Number of chromossomes in the sample (this is used for the polyploid consensus compression). Only + * tested for humans (or organisms with n=2). Use at your own risk! + */ + @Hidden + @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false) + private int nContigs = 2; + @Hidden @Argument(fullName = "", shortName = "dl", doc = "", required = false) private int debugLevel = 0; From 274ac4836f3357a9cc0d0d37a0f9c6f98050542f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 12 Oct 2012 13:50:10 -0400 Subject: [PATCH 39/43] Allowing the GATK to have non-required outputs Modified the SAMFileWriterArgumentTypeDescriptor to accept output bam files that are null if they're not required (in the @Output annotation). This change enables the nWayOut parameter for the IndeRealigner and ReduceReads to operate optionally while maintaining the original single way out. [#DEV-10 transition:31 resolution:1] --- .../SAMFileWriterArgumentTypeDescriptor.java | 36 +++++++++---------- .../gatk/walkers/indels/IndelRealigner.java | 4 +-- .../indels/IndelRealignerIntegrationTest.java | 10 ++++++ 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 8566f6c63..dcf2704f5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -124,32 +124,28 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default - if(writerFileName == null) { - if(!source.isRequired()) - throw new MissingArgumentValueException(bamArgumentDefinition); - if(generateMD5) + if(writerFileName == null && generateMD5) throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside."); - } // Create the stub and set parameters. - SAMFileWriterStub stub; - if ( writerFileName != null ) + SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream); + + if ( writerFileName != null ) { stub = new SAMFileWriterStub(engine, new File(writerFileName)); - else - stub = new SAMFileWriterStub(engine, defaultOutputStream); - if ( compressionLevel != null ) - stub.setCompressionLevel(compressionLevel); - if ( indexOnTheFly ) - stub.setIndexOnTheFly(indexOnTheFly); - if ( generateMD5 ) - stub.setGenerateMD5(generateMD5); - if ( simplifyBAM ) - stub.setSimplifyBAM(simplifyBAM); + if ( compressionLevel != null ) + stub.setCompressionLevel(compressionLevel); + if ( indexOnTheFly ) + stub.setIndexOnTheFly(indexOnTheFly); + if ( generateMD5 ) + stub.setGenerateMD5(generateMD5); + if ( simplifyBAM ) + stub.setSimplifyBAM(simplifyBAM); - // WARNING: Side effects required by engine! - parsingEngine.addTags(stub,getArgumentTags(matches)); - engine.addOutput(stub); + // WARNING: Side effects required by engine! + parsingEngine.addTags(stub,getArgumentTags(matches)); + engine.addOutput(stub); + } return stub; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 76d8d85c2..998894fbf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -370,8 +370,6 @@ public class IndelRealigner extends ReadWalker { currentInterval = intervals.hasNext() ? intervals.next() : null; - writerToUse = writer; - if ( N_WAY_OUT != null ) { boolean createIndex = true; @@ -383,9 +381,9 @@ public class IndelRealigner extends ReadWalker { createIndex, generateMD5s,createProgramRecord(),KEEP_ALL_PG_RECORDS); } } else { - // set up the output writer setupWriter(getToolkit().getSAMFileHeader()); + writerToUse = writer; } manager = new ConstrainedMateFixingManager(writerToUse, getToolkit().getGenomeLocParser(), MAX_ISIZE_FOR_MOVEMENT, MAX_POS_MOVE_ALLOWED, MAX_RECORDS_IN_MEMORY); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 040845828..9b464cfec 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -113,4 +113,14 @@ public class IndelRealignerIntegrationTest extends WalkerTest { executeTest(String.format("realigner [%s]", entry.getKey()), spec); } } + + @Test + public void testNWayOut() { + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommandPrefix + " -nWayOut .clean.bam ", + 1, + Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")); + executeTest("test realigner nWayOut", spec1); + } + }